### Stemming 

In [1]:
from nltk.stem import PorterStemmer

stemmer = PorterStemmer()

In [2]:
example = "Cats Running Was"
example = [stemmer.stem(token) for token in example.split(" ")]
print(" ".join(example))

cat run wa


In [3]:
lyrics = "You better lose yourself in the music, the moment "\
+ "You own it, you better never let it go "\
+ "You only get one shot, do not miss your chance to blow "\
+ "This opportunity comes once in a lifetime "
lyrics = [stemmer.stem(token) for token in lyrics.split(" ")]
print(" ".join(lyrics))

you better lose yourself in the music, the moment you own it, you better never let it go you onli get one shot, do not miss your chanc to blow thi opportun come onc in a lifetim 


In [4]:
review = "Bromwell High is a cartoon comedy. "\
+ "It ran at the same time as some other programs about school life, such as \"Teachers\". "\
+ "My 35 years in the teaching profession lead me to believe that Bromwell High's satire is much "\
+ "closer to reality than is \"Teachers\". The scramble to survive financially, the insightful "\
+ "students who can see right through their pathetic teachers' pomp, the pettiness of the whole situation "\
+ ", all remind me of the schools I knew and their students. When I saw the episode in which a student "\
+ "repeatedly tried to burn down the school, I immediately recalled ......... at .......... High. "\
+ "A classic line: INSPECTOR: I'm here to sack one of your teachers. STUDENT: Welcome to Bromwell High. "\
+ "I expect that many adults of my age think that Bromwell High is far fetched. What a pity that it isn't!"

review= [stemmer.stem(token) for token in review.split(" ")]
print(" ".join(review))

bromwel high is a cartoon comedy. It ran at the same time as some other program about school life, such as "teachers". My 35 year in the teach profess lead me to believ that bromwel high' satir is much closer to realiti than is "teachers". the scrambl to surviv financially, the insight student who can see right through their pathet teachers' pomp, the petti of the whole situat , all remind me of the school I knew and their students. when I saw the episod in which a student repeatedli tri to burn down the school, I immedi recal ......... at .......... high. A classic line: inspector: i'm here to sack one of your teachers. student: welcom to bromwel high. I expect that mani adult of my age think that bromwel high is far fetched. what a piti that it isn't!


### Vectorization

In [5]:
from sklearn.feature_extraction.text import CountVectorizer

vectorizer = CountVectorizer(binary=True, token_pattern=r'\b[^\d\W]+\b')

In [6]:
corpus = ["The dog is on the table", "the cats now are on the table"]
vectorizer.fit(corpus)


CountVectorizer(analyzer='word', binary=True, decode_error='strict',
                dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',
                lowercase=True, max_df=1.0, max_features=None, min_df=1,
                ngram_range=(1, 1), preprocessor=None, stop_words=None,
                strip_accents=None, token_pattern='\\b[^\\d\\W]+\\b',
                tokenizer=None, vocabulary=None)

In [7]:
vocab = vectorizer.vocabulary_
for key in sorted(vocab.keys()):
    print("{}: {}".format(key, vocab[key]))

are: 0
cats: 1
dog: 2
is: 3
now: 4
on: 5
table: 6
the: 7


In [8]:
corpus2 = ["I am jack", "You are john", "I am john"]
vectorizer.fit(corpus2)
# print(vectorizer.transform(corpus2))
print(vectorizer.transform(corpus2).toarray())

[[1 0 1 1 0 0]
 [0 1 0 0 1 1]
 [1 0 1 0 1 0]]


In [9]:
vocab = vectorizer.vocabulary_

for key in sorted(vocab.keys()):
    print("{}: {}".format(key, vocab[key]))

am: 0
are: 1
i: 2
jack: 3
john: 4
you: 5


### Named Entity Recognition

In [17]:
pip install spacy

SyntaxError: invalid syntax (<ipython-input-17-3fb90ab97b7f>, line 1)

In [20]:
pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.2.0/en_core_web_sm-2.2.0.tar.gz

Collecting https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.2.0/en_core_web_sm-2.2.0.tar.gz
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.2.0/en_core_web_sm-2.2.0.tar.gz (12.0MB)
Building wheels for collected packages: en-core-web-sm
  Building wheel for en-core-web-sm (setup.py): started
  Building wheel for en-core-web-sm (setup.py): finished with status 'done'
  Stored in directory: C:\Users\srivutta\AppData\Local\pip\Cache\wheels\48\5c\1c\15f9d02afc8221a668d2172446dd8467b20cdb9aef80a172a4
Successfully built en-core-web-sm
Installing collected packages: en-core-web-sm
Successfully installed en-core-web-sm-2.2.0
Note: you may need to restart the kernel to use updated packages.


In [23]:
python -m spacy download en_core_web_sm

SyntaxError: invalid syntax (<ipython-input-23-e8a31c0c54ec>, line 1)

In [11]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [12]:
import warnings
warnings.filterwarnings("ignore")

In [15]:
example1 = "man woman king queen"
tokens = nlp(example1)
for token1 in tokens:
    for token2 in tokens:
        print(token1.text, token2.text, token1.similarity(token2))

man man 1.0
man woman 0.53572804
man king 0.45626906
man queen 0.27471867
woman man 0.53572804
woman woman 1.0
woman king 0.49596605
woman queen 0.33717477
king man 0.45626906
king woman 0.49596605
king king 1.0
king queen 0.47533715
queen man 0.27471867
queen woman 0.33717477
queen king 0.47533715
queen queen 1.0


In [16]:
example1 = "walking walked swimming swam"
tokens = nlp(example1)
for token1 in tokens:
    for token2 in tokens:
        if(token1.text == token2.text):
            continue
        print(token1.text, token2.text, token1.similarity(token2))

walking walked 0.03879928
walking swimming 0.399566
walking swam 0.27984843
walked walking 0.03879928
walked swimming 0.18434295
walked swam -0.0038515006
swimming walking 0.399566
swimming walked 0.18434295
swimming swam 0.2591489
swam walking 0.27984843
swam walked -0.0038515006
swam swimming 0.2591489


In [None]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [None]:
example = "Google, a company founded by Larry Page and Sergey Brin in the United States of America "\
+ "has one of the world’s most advanced search engines."

doc = nlp(example)

for ent in doc.ents:
    print(ent.text, ent.label_)

In [None]:
example = "U.S. officials are meeting with former Taliban members "\
+ "amid intensifying efforts to wind down America's longest war, three of the "\
+ "militant group's commanders told NBC News."

doc = nlp(example)

for ent in doc.ents:
    print(ent.text, ent.label_)

In [None]:
example = "It’s been an arduous year for German chancellor Angela Merkel, so far. "\
+ "She has battled through coalition negotiations to form a government, chivvied "\
+ "the European Union into a loose agreement on migrants, weathered insults from "\
+ "US president Donald Trump, and headed off a revolt from her interior minister. No wonder "\
+ "then one journalist at her summer news conference in Berlin today (July 20) asked if "\
+ "she was, honestly, just exhausted. “I can’t complain,” Merkel said, “I have a few days "\
+ "holiday now and am looking forward to sleeping a bit longer.”"

doc = nlp(example)

for ent in doc.ents:
    print(ent.text, ent.label_)

### Chatbots

In [None]:
# Load data preprocessing libs
import pandas as pd
import numpy as np

# Load vectorizer and similarity measure
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [None]:
# Read data and drop examples that has no answer
df = pd.read_csv("aws_faq.csv")
df.dropna(inplace=True)

In [None]:
vectorizer = TfidfVectorizer()
vectorizer.fit(np.concatenate((df.Question, df.Answer)))

In [None]:
Question_vectors = vectorizer.transform(df.Question)

In [None]:
print("You can start chatting with me now.")
# Read user input
input_question = input()
# Locate the closest question
input_question_vector = vectorizer.transform([input_question])
# Compute similarities
similarities = cosine_similarity(input_question_vector, Question_vectors)
# Find the closest question
closest = np.argmax(similarities, axis=1)
# Print the correct answer
text =df.Answer.iloc[closest].values[0]
print(text)
from gtts import gTTS
import os
mytext = text
language = 'en'
myobj = gTTS(text=mytext, lang=language, slow=False)
myobj.save("welcome.mp3")
os.system("welcome.mp3")     

In [None]:
pip install gTTS

In [None]:
# Import the required module for text  
# to speech conversion
from gtts import gTTS
 
# This module is imported so that we can  
# play the converted audio
import os
 
# The text that you want to convert to audio
mytext = 'hi this is bharath'
 
# Language in which you want to convert
language = 'en'
 
# Passing the text and language to the engine,  
# here we have marked slow=False. Which tells  
# the module that the converted audio should  
# have a high speed
myobj = gTTS(text=mytext, lang=language, slow=False)
 
# Saving the converted audio in a mp3 file named
# welcome  
myobj.save("welcome.mp3")
 
# Playing the converted file
os.system("welcome.mp3") 

In [13]:
pip install SpeechRecognition

Collecting SpeechRecognition
  Downloading https://files.pythonhosted.org/packages/26/e1/7f5678cd94ec1234269d23756dbdaa4c8cfaed973412f88ae8adf7893a50/SpeechRecognition-3.8.1-py2.py3-none-any.whl (32.8MB)
Installing collected packages: SpeechRecognition
Successfully installed SpeechRecognition-3.8.1
Note: you may need to restart the kernel to use updated packages.


In [14]:
import win32com.client 
import speech_recognition as sr

In [16]:
pip install PyAudio

Collecting PyAudio
  Downloading https://files.pythonhosted.org/packages/ab/42/b4f04721c5c5bfc196ce156b3c768998ef8c0ae3654ed29ea5020c749a6b/PyAudio-0.2.11.tar.gz
Building wheels for collected packages: PyAudio
  Building wheel for PyAudio (setup.py): started
  Building wheel for PyAudio (setup.py): finished with status 'error'
  Running setup.py clean for PyAudio
Failed to build PyAudio
Installing collected packages: PyAudio
  Running setup.py install for PyAudio: started
    Running setup.py install for PyAudio: finished with status 'error'
Note: you may need to restart the kernel to use updated packages.


  ERROR: Complete output from command 'd:\Users\hh\Anaconda3\python.exe' -u -c 'import setuptools, tokenize;__file__='"'"'C:\\Users\\hh\\AppData\\Local\\Temp\\pip-install-6shkkrvp\\PyAudio\\setup.py'"'"';f=getattr(tokenize, '"'"'open'"'"', open)(__file__);code=f.read().replace('"'"'\r\n'"'"', '"'"'\n'"'"');f.close();exec(compile(code, __file__, '"'"'exec'"'"'))' bdist_wheel -d 'C:\Users\hh\AppData\Local\Temp\pip-wheel-jy92ao3v' --python-tag cp37:
  ERROR: running bdist_wheel
  running build
  running build_py
  creating build
  creating build\lib.win-amd64-3.7
  copying src\pyaudio.py -> build\lib.win-amd64-3.7
  running build_ext
  building '_portaudio' extension
  error: Microsoft Visual C++ 14.0 is required. Get it with "Microsoft Visual C++ Build Tools": https://visualstudio.microsoft.com/downloads/
  ----------------------------------------
  ERROR: Failed building wheel for PyAudio
    ERROR: Complete output from command 'd:\Users\hh\Anaconda3\python.exe' -u -c 'import setuptools

In [17]:
i="yes"
a=[]
while (i=="yes"): 
    speaker = win32com.client.Dispatch("SAPI.SpVoice") 
    r = sr.Recognizer()
    with sr.Microphone() as source:
        print("hey, Tell me about your phone ? ")
        speaker.Speak("hey, Tell me about your phone ?  ")
        print("Listining now ..... ")
        #print("Speak now :")
        #speaker.Speak("Speak now :")
        audio = r.listen(source)
        try:
            text = r.recognize_google(audio)
            print("You said : {}".format(text))
            a.append(text)
        except:
            print("Sorry could not recognize what you said")
    #i=1
    r1 = sr.Recognizer()
    #i=="no"
    with sr.Microphone() as source:
        print("should we continue ?")
        speaker.Speak("should we continue ? ")
        print("Listining now ..... ")
        audio1 = r1.listen(source)
        try:
            i = r1.recognize_google(audio1)
            print("You said : {}".format(i))
            #a.append(text)
        except:
            print("Sorry could not recognize what you said")
            

print(a)
y=model.predict(vect.transform(a))
print(y)
r=0
for t in y:
    r+=1
    if t==0:
        speaker.Speak("your Feedback for phone {} is recognized as Bad Review!".format(r))
    else:
        speaker.Speak("your Feedback for phone {} is recognized as Good Review!".format(r))
# if y[0]==0:
#     speaker.Speak("your Feedback is recognized as Bad Review!")
# else:
#     speaker.Speak("your Feedback is recognized as Good Review!")

AttributeError: Could not find PyAudio; check installation

In [18]:
# Text Summarization using NLP

# Install BeautifulSoup 4 - pip install beautifulsoup4
# Install lxml - pip install lxml

# Importing the libraries
import bs4 as bs
import urllib.request
import re
import nltk
nltk.download('stopwords')
import heapq

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\hh\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\stopwords.zip.


In [29]:
# Gettings the data source
source = urllib.request.urlopen(input()).read()

# Parsing the data/ creating BeautifulSoup object
soup = bs.BeautifulSoup(source,'lxml')


https://www.shabdkosh.com/dictionary/english-kannada/vaanijya/vaanijya-meaning-in-kannada


In [30]:
# Fetching the data
text = ""
for paragraph in soup.find_all('p'):
    text += paragraph.text

In [31]:
text

'English Kannada Dictionary | ಇಂಗ್ಲೀಶ್ ಕನ್ನಡ ನಿಘಂಟು\nKeyboard: Off\nLanguage: English\nInterpreted your input "vaanijya" as "ವಾಣಿಜ್ಯ".Meaning and definitions of vaanijya, vaanijya meaning, translation in kannada language for vaanijya with similar and opposite words. Also find spoken pronunciation of vaanijya in kannada and in English language. What vaanijya means in kannada, vaanijya meaning in kannada, vaanijya definition, examples and pronunciation of vaanijya in kannada language.Also see: vaanijya in HindiNew!'

In [24]:
>>> import nltk
>>> nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\hh\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.


True

In [33]:
sentences = nltk.sent_tokenize(text)

# Stopword list
stop_words = nltk.corpus.stopwords.words('english')

# Word counts 
word2count = {}
for word in nltk.word_tokenize(text):
    if word not in stop_words:
        if word not in word2count.keys():
            word2count[word] = 1
        else:
            word2count[word] += 1
# Converting counts to weights
max_count = max(word2count.values())
for key in word2count.keys():
    word2count[key] = word2count[key]/max_count
    
# Product sentence scores    
sent2score = {}
for sentence in sentences:
    for word in nltk.word_tokenize(sentence.lower()):
        if word in word2count.keys():
            if len(sentence.split(' ')) < 25:
                if sentence not in sent2score.keys():
                    sent2score[sentence] = word2count[word]
                else:
                    sent2score[sentence] += word2count[word]
                    
# Gettings best 5 lines             
best_sentences = heapq.nlargest(5, sent2score, key=sent2score.get)

print('---------------------------------------------------------')
for sentence in best_sentences:
    print(sentence)

---------------------------------------------------------
What vaanijya means in kannada, vaanijya meaning in kannada, vaanijya definition, examples and pronunciation of vaanijya in kannada language.Also see: vaanijya in HindiNew!
Also find spoken pronunciation of vaanijya in kannada and in English language.


In [35]:
text.split(".")

['English Kannada Dictionary | ಇಂಗ್ಲೀಶ್ ಕನ್ನಡ ನಿಘಂಟು\nKeyboard: Off\nLanguage: English\nInterpreted your input "vaanijya" as "ವಾಣಿಜ್ಯ"',
 'Meaning and definitions of vaanijya, vaanijya meaning, translation in kannada language for vaanijya with similar and opposite words',
 ' Also find spoken pronunciation of vaanijya in kannada and in English language',
 ' What vaanijya means in kannada, vaanijya meaning in kannada, vaanijya definition, examples and pronunciation of vaanijya in kannada language',
 'Also see: vaanijya in HindiNew!']