### **Reading simple audio files**

source: https://www.thepythoncode.com/article/using-speech-recognition-to-convert-speech-to-text-python

In [2]:
#!pip3 install SpeechRecognition pydub

Collecting SpeechRecognition
  Downloading SpeechRecognition-3.9.0-py2.py3-none-any.whl (32.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.8/32.8 MB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub, SpeechRecognition
Successfully installed SpeechRecognition-3.9.0 pydub-0.25.1


In [3]:
import speech_recognition as sr

In [4]:
filename = "16-122828-0002.wav"

In [5]:
# initialize the recognizer
r = sr.Recognizer()

In [6]:
# open the file
with sr.AudioFile(filename) as source:
    # listen for the data (load audio to memory)
    audio_data = r.record(source)
    # recognize (convert from speech to text)
    text = r.recognize_google(audio_data)
    print(text)

result2:
{   'alternative': [   {   'confidence': 0.93603683,
                           'transcript': "I believe you're just talking "
                                         'nonsense'},
                       {   'transcript': 'I believe you are just talking '
                                         'nonsense'},
                       {'transcript': 'I believe your just talking nonsense'},
                       {   'transcript': 'I believe you were just talking '
                                         'nonsense'}],
    'final': True}
I believe you're just talking nonsense


### **Reading large audio files**

In [1]:
# importing libraries 
import speech_recognition as sr 
import os 
from pydub import AudioSegment
from pydub.silence import split_on_silence

# create a speech recognition object
r = sr.Recognizer()

# a function that splits the audio file into chunks
# and applies speech recognition
def get_large_audio_transcription(path):
    """
    Splitting the large audio file into chunks
    and apply speech recognition on each of these chunks
    """
    # open the audio file using pydub
    sound = AudioSegment.from_wav(path)  
    # split audio sound where silence is 700 miliseconds or more and get chunks
    chunks = split_on_silence(sound,
        # experiment with this value for your target audio file
        min_silence_len = 500,
        # adjust this per requirement
        silence_thresh = sound.dBFS-14,
        # keep the silence for 1 second, adjustable as well
        keep_silence=500,
    )
    folder_name = "audio-chunks"
    # create a directory to store the audio chunks
    if not os.path.isdir(folder_name):
        os.mkdir(folder_name)
    whole_text = ""
    # process each chunk 
    for i, audio_chunk in enumerate(chunks, start=1):
        # export audio chunk and save it in
        # the `folder_name` directory.
        chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
        audio_chunk.export(chunk_filename, format="wav")
        # recognize the chunk
        with sr.AudioFile(chunk_filename) as source:
            audio_listened = r.record(source)
            # try converting it to text
            try:
                text = r.recognize_google(audio_listened)
            except sr.UnknownValueError as e:
                print("Error:", str(e))
            else:
                text = f"{text.capitalize()}. "
                print(chunk_filename, ":", text)
                whole_text += text
    # return the text for all chunks detected
    return whole_text



In [2]:
path = "7601-291468-0006.wav"
print("\nFull text:", get_large_audio_transcription(path))

result2:
{   'alternative': [   {   'confidence': 0.84238338,
                           'transcript': 'his abode which he had fixed at '
                                         'about 3 or country see'},
                       {   'transcript': 'his abode which he had fixed in a '
                                         'bowery or countryside'},
                       {   'transcript': 'his abode which he had fixed at '
                                         'about 3 or country seat'},
                       {   'transcript': 'his abode which he had fixed at '
                                         'about 3 or countryside'},
                       {   'transcript': 'his abode which he had fixed at a '
                                         'bowery or countryside'}],
    'final': True}
audio-chunks/chunk1.wav : His abode which he had fixed at about 3 or country see. 
result2:
{   'alternative': [   {   'confidence': 0.91398591,
                           'transcript': 'had a sh

### **Reading from the microphone**

In [12]:
#brew install portaudio
#!pip3 install pyaudio
#import pyaudio as pa
import speech_recognition as sr

In [13]:
# Ali's code
r=sr.Recognizer()
with sr.Microphone() as source:
    print("Please say something")
    audio = r.listen(source)
    print("Time over, thanks")
    try:
        print("I think you said: "+r.recognize_google(audio));
    except:
        pass

Please say something
Time over, thanks
result2:
{   'alternative': [   {   'confidence': 0.85474348,
                           'transcript': "hi I'm idiot song"},
                       {'transcript': 'hi I made a song'},
                       {'transcript': "hi I'm hideous song"},
                       {'transcript': "hi I'm a da song"},
                       {'transcript': "hi I'm a deer song"}],
    'final': True}
I think you said: hi I'm idiot song


In [11]:
with sr.Microphone() as source:
    # read the audio data from the default microphone
    audio_data = r.record(source, duration=5)
    print("Recognizing...")
    # convert speech to text
    text = r.recognize_google(audio_data)
    print(text)

Recognizing...
result2:
{   'alternative': [   {   'confidence': 0.9284699,
                           'transcript': "now it's working it's working now"},
                       {'transcript': "no it's working it's working now"},
                       {'transcript': "I know it's working it's working now"},
                       {'transcript': "now it's working it's party now"},
                       {'transcript': "now it's working it's starting now"}],
    'final': True}
now it's working it's working now
