## Speech Recognition using Python

In [2]:
!apt install libasound2-dev portaudio19-dev libportaudio2 libportaudiocpp0 ffmpeg
!pip install pyaudio
!pip install ConfigParser
!apt-get install python-gnuradio-audio-portaudio
!python -m pip install pyaudio
!pip install SpeechRecognition


Reading package lists... Done
Building dependency tree       
Reading state information... Done
libasound2-dev is already the newest version (1.1.3-5ubuntu0.2).
ffmpeg is already the newest version (7:3.4.6-0ubuntu0.18.04.1).
The following package was automatically installed and is no longer required:
  libnvidia-common-430
Use 'apt autoremove' to remove it.
Suggested packages:
  portaudio19-doc
The following NEW packages will be installed:
  libportaudio2 libportaudiocpp0 portaudio19-dev
0 upgraded, 3 newly installed, 0 to remove and 7 not upgraded.
Need to get 184 kB of archives.
After this operation, 891 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libportaudio2 amd64 19.6.0-1 [64.6 kB]
Get:2 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libportaudiocpp0 amd64 19.6.0-1 [15.1 kB]
Get:3 http://archive.ubuntu.com/ubuntu bionic/universe amd64 portaudio19-dev amd64 19.6.0-1 [104 kB]
Fetched 184 kB in 1s (303 kB/s)
Selecti

In [0]:
# Import Speech Recogition Package
import speech_recognition as spr

In [7]:
# Validate the installation
spr.__version__

'3.8.1'

In [0]:
recog = spr.Recognizer()

In [9]:
recog.recognize_google()

TypeError: ignored

### Convert Speech to Text

In [0]:
speech = spr.AudioFile('/content/sample_data/maria.wav')
with speech as filesource:
    audio = recog.record(filesource)

In [16]:
recog.recognize_google(audio)

'I like I like you but I lied'

### Convert Speech to Text - Capture only particular segments of audio using offset and duration

In [23]:
with speech as filesource:
    audio = recog.record(filesource, duration=10)
    
recog.recognize_google(audio)

'I like I like you but I lied'

In [18]:
# Capture multiple portions of speech one after another
with speech as filesource:
    audio_1 = recog.record(filesource, duration=5)
    audio_2 = recog.record(filesource, duration=5)

recog.recognize_google(audio_1)

'I like I like you'

In [19]:
recog.recognize_google(audio_2)

'but I like Venezuela'

In [0]:
# Capturing second portion of the speech using an offset argument
with speech as filesource:
    audio = recog.record(filesource, offset=5, duration=7)

recog.recognize_google(audio)

'proceed to the dark blue background it is easy to tell the depth of a well'

### Convert Speech to Text - Effect of Noise

In [0]:
noisyspeech = spr.AudioFile('noisy_speech.wav')

with noisyspeech as noisesource:
    audio = recog.record(noisesource)

recog.recognize_google(audio)

'the snail smelling old beer drinkers'

In [0]:
with noisyspeech as noisesource:
    recog.adjust_for_ambient_noise(noisesource)
    audio = recog.record(noisesource)

recog.recognize_google(audio)

'still smell like old beer drinkers'

In [0]:
recog.recognize_google(audio, show_all=True)

{'alternative': [{'transcript': 'the snail smelly old gear vendors',
   'confidence': 0.81596899},
  {'transcript': 'the snail smell of old gear vendors'},
  {'transcript': 'the snail smelled old gear vendors'},
  {'transcript': 'the snail smell old gear vendors'},
  {'transcript': 'the snails smell of old gear vendors'}],
 'final': True}

### Convert Speech to Text in Real Time using Microphone

In [0]:
mc = spr.Microphone()

In [0]:
#sr.Microphone.list_microphone_names()
mc.list_microphone_names()

['Microsoft Sound Mapper - Input',
 'Stereo Mix (Realtek High Defini',
 'Microphone Array (Realtek High ',
 'Microsoft Sound Mapper - Output',
 'Speaker/Headphone (Realtek High']

In [0]:
mc = spr.Microphone(device_index=0)

In [0]:
with mc as source:
    audio = recog.listen(source)

In [0]:
recog.recognize_google(audio)

'hello how are you'

In [0]:
#Reducing the effect of Noise
with mc as source:
    recog.adjust_for_ambient_noise(source)
    audio = recog.listen(source)

## Speech Recognition based Project

In [0]:
#Import Necessary Libraries
import speech_recognition as spr
import webbrowser as wb
import pafy
import vlc
import urllib.request
from bs4 import BeautifulSoup
import time

#Create an empty list to store all the video URLs from the youtube.com page
linklist = []

#Create Recognizer() class objects called recog1 and recog2
recog1 = spr.Recognizer()
recog2 = spr.Recognizer()

#Create microphone instance with device microphone chosen whose index value is 0
mc = spr.Microphone(device_index=0)

#Capture voice
with mc as source:
    print("Search Youtube video to play")
    print("----------------------------")
    print("You can speak now")
    audio = recog1.listen(source)

#Based on speech, open youtube search page in a browser, get the first video link and play it in VLC media player
if 'search' in recog1.recognize_google(audio):
    recog1 = spr.Recognizer()
    url = 'https://www.youtube.com/results?search_query='
    with mc as source:
        print('Searching for the video(s)...')
        audio = recog2.listen(source)
        
        try:
            get_keyword = recog1.recognize_google(audio)
            print(get_keyword)
            wb.get().open_new(url+get_keyword)
            response = urllib.request.urlopen(url+get_keyword)
            html = response.read()
            soup = BeautifulSoup(html, 'html.parser')
            for vid in soup.findAll(attrs={'class':'yt-uix-tile-link'}):
                linklist.append('https://www.youtube.com' +vid['href'])
            videolink = pafy.new(linklist[1])
            bestlink = videolink.getbest()
            media = vlc.MediaPlayer(bestlink.url)
            media.play()
#             time.sleep(60)
#             media.stop()
        except spr.UnknownValueError:
            print("Unable to understand the input")
        except spr.RequestError as e:
            print("Unable to provide required output".format(e))

Search Youtube video to play
----------------------------
You can speak now
Searching for the video(s)...
coding


In [0]:
media.stop()