<a href="https://colab.research.google.com/github/Guhan2348519/SPR_labs/blob/main/2348519_SPR_lab3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:

# Record audio using PyAudio
def record_audio(filename="output.wav", duration=5):
    CHUNK = 1024
    FORMAT = pyaudio.paInt16
    CHANNELS = 1
    RATE = 16000
    RECORD_SECONDS = duration
    WAVE_OUTPUT_FILENAME = filename

    p = pyaudio.PyAudio()

    stream = p.open(format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)

    print("Speak something...")  # Prompt the user to start speaking

    frames = []

    for _ in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
        data = stream.read(CHUNK)
        frames.append(data)

    print("Recording finished.")  # Inform the user recording has ended

    stream.stop_stream()
    stream.close()
    p.terminate()

    wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
    wf.setnchannels(CHANNELS)
    wf.setsampwidth(p.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(b''.join(frames))
    wf.close()

In [None]:
# Preprocess audio using Librosa (extract MFCC features)
def preprocess_audio(filename):
    print("Recognizing...")  # Indicate that recognition is in progress
    try:
        audio, sr = librosa.load(filename, sr=16000)
        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)

        # Visualize MFCCs
        plt.figure(figsize=(10, 4))
        librosa.display.specshow(mfccs, sr=sr, x_axis='time')
        plt.colorbar()
        plt.title('MFCC')
        plt.tight_layout()
        plt.show()

        return mfccs
    except Exception as e:
        print("Error during recognition:", str(e))
        return None

In [10]:
import pyaudio
import wave
import librosa
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tkinter import Tk, filedialog

In [None]:

# Recognize speech using a pre-trained KNN model
def recognize_speech(mfccs, knn):
    try:
        # Predict the speech-to-text conversion from the MFCCs
        predicted_text = knn.predict([mfccs.mean(axis=1)])  # Example prediction logic
        print(f"Speech recognized: '{predicted_text[0]}'")
        print("Speech successfully converted to text!")
    except Exception as e:
        print(f"Error in recognizing speech: {e}")
        print("Speech Recognition could not understand audio. Please try speaking more clearly.")

In [None]:
def train_model():
    # Simulating the training process
    X = np.random.rand(100, 13)  # Example MFCC features
    y = np.array(["hello", "lights", "off", "on"] * 25)  # Example labels

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    knn = KNeighborsClassifier(n_neighbors=3)
    knn.fit(X_train, y_train)

    # Test the model
    y_pred = knn.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Training complete. Model accuracy: {accuracy * 100:.2f}%")

    return knn

In [None]:
# Function to upload an audio file
def upload_audio_file():
    root = Tk()
    root.withdraw()  # Hide the main window
    filename = filedialog.askopenfilename(title="Select an Audio File", filetypes=[("Audio Files", "*.wav *.mp3")])
    return filename

def main():
    # Train the model (for demonstration purposes)
    knn = train_model()

    # Provide user with two options
    print("Choose an option:")
    print("1. Record speech using microphone")
    print("2. Upload an audio file")

    choice = input("Enter 1 or 2: ")

    if choice == '1':
        # Record audio via microphone
        record_audio()
        filename = "output.wav"  # Recorded audio will be saved as output.wav
    elif choice == '2':
        # Upload audio file
        filename = upload_audio_file()
        if not filename:
            print("No file selected. Exiting.")
            return
    else:
        print("Invalid choice. Exiting.")
        return

    # Preprocess audio and extract MFCCs
    mfccs = preprocess_audio(filename)

    # Recognize speech based on MFCCs
    if mfccs is not None:
        recognize_speech(mfccs, knn)

if __name__ == "__main__":
    main()