# Speech Recognition

In [1]:
import speech_recognition as sr
import nltk

In [33]:
r = sr.Recognizer()
with sr.Microphone() as source:
    print("Speak Now")
    audio = r.listen(source)
    user_input = r.recognize_google(audio)
    
if 'hello caddie' or 'hey caddie' in user_input:
    try:
        print(user_input)
    except:
        print('error')


Speak Now
India to France


In [34]:
from nltk.tokenize import word_tokenize

In [35]:
input_tokens = word_tokenize(user_input)
print(input_tokens)

['India', 'to', 'France']


# Find word count

In [36]:
from nltk.probability import FreqDist
fdist = FreqDist()

In [37]:
for word in input_tokens:
    fdist[word.lower()]+=1
fdist

FreqDist({'india': 1, 'to': 1, 'france': 1})

# Stemming

In [38]:
from nltk.stem import PorterStemmer

pst = PorterStemmer()

pst.stem('go going gone gave given')

'go going gone gave given'

In [39]:
pst.stem(user_input)

'india to fr'

In [40]:
from nltk.stem import wordnet

from nltk.stem import WordNetLemmatizer
word_lem = WordNetLemmatizer()

In [41]:
import re
puncutation = re.compile(user_input)

In [42]:
for token in input_tokens:
    print(nltk.pos_tag([token]))

[('India', 'NNP')]
[('to', 'TO')]
[('France', 'NNP')]


### Remove stop words

In [43]:
from nltk.corpus import stopwords

stopwords_set = set(stopwords.words('english'))
print(stopwords_set)

{"wouldn't", 'isn', 'yourselves', 'but', 'such', 'before', 't', 'there', 'into', 're', "mustn't", 'yourself', 'because', 'haven', 'or', "couldn't", 'their', 'mustn', "wasn't", 'where', 'y', 'between', 'was', "don't", 'am', 'weren', 'the', 'under', 'down', 'having', 'on', 'nor', 'wasn', 'doing', 'these', 'until', "won't", 'further', 'each', 'not', 'of', 'some', 'while', 'just', 'out', "shouldn't", 'with', 'those', 'few', "that'll", 'other', 'own', 'hadn', 'than', 'me', 'so', 'he', 'in', 'at', 'myself', 'too', 'themselves', 'been', "should've", 'for', 'then', 'himself', 'are', 'her', 'can', "haven't", 'who', 'i', "hasn't", 'my', 'itself', 'only', "you're", 'herself', 'did', 'more', 'as', 'aren', 'below', 'whom', 'same', "hadn't", 'when', 'o', 'here', "she's", 'mightn', 'doesn', 'again', 'she', 'an', 'ours', 'do', 'm', 'will', 'why', 'ain', "weren't", 've', 'a', 'hasn', 'during', "shan't", 'our', 'll', 'they', 'had', "mightn't", 'wouldn', 'its', 'your', 'by', 'over', 'once', 'which', 'abo

In [44]:
def remove_stopwords(tokenized_text):
    text_clean = [word for word in tokenized_text if word not in stopwords_set]
    return text_clean

In [45]:
print(remove_stopwords(input_tokens))

['India', 'France']


# Name Entity Recognition

In [46]:
from nltk import ne_chunk

In [47]:
input_NER = ne_chunk(nltk.pos_tag(input_tokens))
print(input_NER)

(S (GPE India/NNP) to/TO (GPE France/NNP))


# spaCY 

It has different models.

In [48]:
#pip install spacy

#pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.2.0/en_core_web_sm-2.2.0.tar.gz

In [49]:
import spacy

In [50]:
nlp = spacy.load('en_core_web_sm')

In [51]:
doc = nlp(user_input)

### Tokenization

In [52]:
for token in doc:
    print(token.text)

India
to
France


In [53]:
token = doc[2]
token

France

In [54]:
span = doc[2:5]
span

France

In [55]:
for token in doc:
    print(token.i, token.text)

0 India
1 to
2 France


# POS Tagging

In [56]:
for token in doc:
    print(token.i, token.text, token.pos_)

0 India PROPN
1 to ADP
2 France PROPN


# NER

In [57]:
locations=[]
for ent in doc.ents:
    print(ent.text, ent.label_)
    if str(ent.label_) == 'GPE':
        locations.append(ent.text)
print(locations)

India GPE
France GPE
['India', 'France']


# Calculate the distance

In [58]:
#pip install geopy

In [59]:
from geopy.geocoders import Nominatim
from geopy.distance import distance

In [60]:
def calculate_distance(point1, point2):
    
    geolocator = Nominatim(user_agent="Golf Caddie App")
    
    location1 = geolocator.geocode(point1)
    print("Point A: ", location1.address)
    print("latitude is :-" ,location1.latitude,"\nlongtitude is:-" ,location1.longitude,"\n")
    location1_coord = location1.latitude, location1.longitude
    
    location2 = geolocator.geocode(point2)
    print("Point B:", location2.address)
    print("latitude is :-" ,location2.latitude,"\nlongtitude is:-" ,location2.longitude,"\n")
    location2_coord = location2.latitude, location2.longitude
    
    d = distance(location1_coord, location2_coord)
    
    return d

In [61]:
print(locations[0], locations[1])
calculated_distance = calculate_distance(locations[0] ,locations[1])

print(calculated_distance)

India France
Point A:  India
latitude is :- 22.3511148 
longtitude is:- 78.6677428 

Point B: France
latitude is :- 46.603354 
longtitude is:- 1.8883335 

7245.67339386197 km


# Text to speech 

In [62]:
#pip install gtts
approx_distance = round(int(str(calculated_distance).split('.').pop(0)))
print(str(approx_distance) +' km(s)')
#

7245 km(s)


In [63]:
from gtts import gTTS

In [64]:
reply_message = 'The distance between ' + str(locations[0]) + ' and ' + str(locations[1]) + ' is ' + str(approx_distance) + (' kilometers')

In [65]:
obj = gTTS(text=reply_message, lang = "fr" , slow = False)

In [66]:
#pip install google-cloud-texttospeech

In [67]:
from google.cloud import texttospeech
import os

In [68]:
def text_to_speech(text_message):
    #set up authorization
    os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = r'/Users/priyamchadha/Desktop/Shubham/PRI/Hello Birdie/EPITA-PRI-6664052d2f22.json'

    # Instantiates a client
    client = texttospeech.TextToSpeechClient()

    # Set the text input to be synthesized
    synthesis_input = texttospeech.types.SynthesisInput(text=text_message)

    # Build the voice request, select the language code ("en-US") and the ssml
    # voice gender ("neutral/MALE/FEMALE")
    voice = texttospeech.types.VoiceSelectionParams(
        language_code='en-US',
        ssml_gender=texttospeech.enums.SsmlVoiceGender.FEMALE)

    # Select the type of audio file you want returned
    audio_config = texttospeech.types.AudioConfig(
        audio_encoding=texttospeech.enums.AudioEncoding.MP3)

    # Perform the text-to-speech request on the text input with the selected
    # voice parameters and audio file type
    response = client.synthesize_speech(synthesis_input, voice, audio_config)

    # The response's audio_content is binary.
    with open('output.mp3', 'wb') as out:
        # Write the response to the output file.
        out.write(response.audio_content)
        
        return 'Audio content written to file "output.mp3"'

In [69]:
print(text_to_speech(reply_message))

Audio content written to file "output.mp3"
