In [None]:
import pyaudio
import wave
import speech_recognition as sr
from pydub import AudioSegment
from os import path 

def record_audio(filename, duration=5, channels=1, rate=44100, chunk=1024):
    """Record audio from the microphone and save it to a WAV file.

    Args:
        filename (str): The name of the output WAV file.
        duration (int): The duration of the recording in seconds.
        channels (int): The number of audio channels (1 for mono, 2 for stereo).
        rate (int): The sample rate in Hz.
        chunk (int): The number of frames per buffer.
    """
    audio = pyaudio.PyAudio()

    # Open the stream
    stream = audio.open(format=pyaudio.paInt16,
                        channels=channels,
                        rate=rate,
                        input=True,
                        frames_per_buffer=chunk)

    print("Recording...")

    frames = []

    # Record for the specified duration
    for _ in range(0, int(rate / chunk * duration)):
        data = stream.read(chunk)
        frames.append(data)

    print("Finished recording.")

    # Stop and close the stream
    stream.stop_stream()
    stream.close()
    audio.terminate()

    # Save the recorded data as a WAV file
    with wave.open(filename, 'wb') as wf:
        wf.setnchannels(channels)
        wf.setsampwidth(audio.get_sample_size(pyaudio.paInt16))
        wf.setframerate(rate)
        wf.writeframes(b''.join(frames))

In [None]:
def analyse_audio(filename):
    """Analyze the recorded audio file.

    Args:s
        filename (str): The name of the WAV file to analyze.
    """
    # Placeholder for analysis logic
    print(f"Analyzing {filename}...")
        
    r = sr.Recognizer()
    with sr.AudioFile(filename) as source:
        audio = r.record(source)
        try:
            data = r.recognize_google(audio, language="en-EN")
            print(data)
        except Exception as e:
            print("Please try again")
            print(e)
    print("Analysis complete.")
    
record_audio("output.wav", duration=5, channels=1, rate=44100, chunk=1024)
analyse_audio("output.wav")

In [None]:
# Initialize the recognizer and microphone
micro = sr.Microphone()
r = sr.Recognizer()
with micro as source:
    print("Please say something...")
    audio = r.listen(source)
    print("Recognizing...")
    try:
        # Recognize speech using Google Web Speech API
        text = r.recognize_google(audio, language="en-EN")
        print("You said: " + text)
    except sr.UnknownValueError:
        print("Sorry, I could not understand the audio.")
    except sr.RequestError as e:
        print(f"Could not request results; {e}")

In [None]:
import vosk
import pyaudio
import json

# Here I have downloaded this model to my PC, extracted the files 
# and saved it in local directory
# Set the model path
model_path = "vosk-model-small-fr-0.22"
# Initialize the model with model-path
model = vosk.Model(model_path)

#if you don't want to download the model, just mention "lang" argument 
#in vosk.Model() and it will download the right  model, here the language is 
#US-English
#model = vosk.Model(lang="en-us")

# Create a recognizer
rec = vosk.KaldiRecognizer(model, 16000)
# Open the microphone stream
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paInt16,
                channels=1,
                rate=16000,
                input=True,
                frames_per_buffer=8192)
# Specify the path for the output text file
output_file_path = "recognized_text.txt"
    
with open(output_file_path, "w") as output_file:
    print("Listening for speech. Say 'fin' to stop.")
    # Start streaming and recognize speech
    while True:
        data = stream.read(4096)#read in chunks of 4096 bytes
        if rec.AcceptWaveform(data):#accept waveform of input voice
            # Parse the JSON result and get the recognized text
            result = json.loads(rec.Result())
            recognized_text = result['text']
            
            # Write recognized text to the file
            output_file.write(recognized_text + "\n")
            print(recognized_text)
            
            # Check for the termination keyword
            if "fin" in recognized_text.lower():
                print("Termination keyword detected. Stopping...")
                break

# Stop and close the stream
stream.stop_stream()
stream.close()

# Terminate the PyAudio object
p.terminate()

In [8]:
0xdeadbeef

3735928559