In [None]:
#IMPORTING THE LIBRARIES.
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import librosa

In [None]:
parent_directory="/kaggle/input/speechdataset/SpeechCommand-20231108T141130Z-001/SpeechCommand"
folder_list=os.listdir(parent_directory)

In [None]:
class_mapping = {'right': 0, 'eight': 1, 'cat': 2, 'tree': 3, 'bed': 4, 'happy': 5, 'go': 6, 'dog': 7, 'no': 8, 'wow': 9,
                 'nine': 10, 'left': 11, 'stop': 12, 'three': 13, 'sheila': 14, 'one': 15, 'bird': 16, 'zero': 17,
                 'seven': 18, 'up': 19, 'marvin': 20, 'two': 21, 'house': 22, 'down': 23, 'six': 24, 'yes': 25,
                 'on': 26, 'five': 27, 'off': 28, 'four': 29}

In [None]:
# Sample Audio file for MFCC extraction
audio_data, sample_rate = librosa.load("/kaggle/input/speechdataset/SpeechCommand-20231108T141130Z-001/SpeechCommand/bed/00176480_nohash_0.wav")
frame_length = int(sample_rate * 0.025) 
hop_length = int(sample_rate * 0.010)
mfccs = librosa.feature.mfcc(y = audio_data,sr = sample_rate, n_mfcc = 13, n_fft = frame_length, hop_length = hop_length)
deltas = librosa.feature.delta(mfccs)
deltas_deltas = librosa.feature.delta(mfccs,order=2)
print(mfccs.shape)
print(deltas.shape)
print(deltas_deltas.shape)

In [None]:
class_mfcc_list = []
delta_mfcc_list = []
delta_delta_mfcc_list = []
combined_train_data_list = []
class_label_list = []

# LIST FOR EACH CLASS
mfccs_list = []
delta_list = []
delta_delta_list = []
labels = []

# Iterate through each class
for class_name, class_label in class_mapping.items():
    class_directory = os.path.join(parent_directory, class_name)
    mfccs_list = []
    delta_list = []
    delta_delta_list = []
    combined_list = []
    labels = []

    for filename in os.listdir(class_directory):
        if filename.endswith(".wav"):  
            file_path = os.path.join(class_directory, filename)
            audio_data, sample_rate = librosa.load(file_path, sr=22050)  
            
            frame_length = int(sample_rate * 0.025)  # Frame length (25 ms)
            hop_length = int(sample_rate * 0.010)  # Hop length (10 ms)
            frames = librosa.util.frame(audio_data, frame_length=frame_length, hop_length=hop_length)

            n_mfcc = 13 
            mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=n_mfcc, n_fft=frame_length, hop_length=hop_length)
            delta = librosa.feature.delta(mfccs)
            delta_delta = librosa.feature.delta(mfccs, order=2)
    
            mfccs_list.append(mfccs.T)
            delta_list.append(delta.T)
            delta_delta_list.append(delta_delta.T)
            labels.append(class_label)
            
    mfcc_features = np.concatenate(mfccs_list)
    delta_features = np.concatenate(delta_list)
    delta_delta_features = np.concatenate(delta_delta_list)
    combined_features=np.hstack((mfcc_features,delta_features,delta_delta_features))
    print(f"Shape of mfcc_features {mfcc_features.shape}")
    
    class_mfcc_list.append(mfcc_features)
    delta_mfcc_list.append(delta_features)
    delta_delta_mfcc_list.append(delta_delta_features)
    combined_train_data_list.append(combined_features)
    class_label_list.append(labels)

In [None]:
from sklearn.mixture import GaussianMixture

class_gmms = {}

for class_label, features in zip(class_label_list, combined_train_data_list):
    gmm = GaussianMixture(n_components=1, covariance_type='full', random_state=42)
    gmm.fit(features)
    class_gmms[class_label[0]] = gmm
    print(f" Done Class {class_label[0]}")

In [None]:
test_data_directory="/kaggle/input/speechdataset/SpeechCommandTest-20231108T114713Z-001/SpeechCommandTest"

In [None]:
import pandas as pd
data=pd.read_csv("/kaggle/input/test-gmm/test.csv")
data.head()

In [None]:
audio_file_columns=data['AUDIO_FILE']
test_data_list=audio_file_columns.to_list()
len(test_data_list)

In [None]:
test_mfcc_list = []
test_delta_list = []
test_delta_delta_list = []
combined_test_data_list=[]
for audio in test_data_list:
    file_path=os.path.join(test_data_directory,audio)
    audio_data, sample_rate = librosa.load(file_path, sr=None)
    
    frame_length = int(sample_rate * 0.025)  # Frame length (25 ms)
    hop_length = int(sample_rate * 0.010)  # Hop length (10 ms)
    frames = librosa.util.frame(audio_data, frame_length=frame_length, hop_length=hop_length)

    n_mfcc = 13
    mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=n_mfcc, n_fft=frame_length, hop_length=hop_length)
    delta = librosa.feature.delta(mfccs)
    delta_delta = librosa.feature.delta(mfccs, order=2)
    combined_features=np.hstack((mfccs.T,delta.T,delta_delta.T))
    
    test_mfcc_list.append(mfccs.T)
    test_delta_list.append(delta.T)
    test_delta_delta_list.append(delta_delta.T)
    combined_test_data_list.append(combined_features)

In [None]:
combined_test_data_list[0].shape

In [None]:
predicted_labels_list = []

for features in combined_test_data_list:
    likelihoods = {}
    for class_label, gmm_model in class_gmms.items():
        likelihood= gmm_model.score(features)
        
        likelihoods[class_label] = likelihood
    predicted_label = max(likelihoods, key=likelihoods.get)
    predicted_labels_list.append(predicted_label)

In [None]:
data=pd.read_csv("/kaggle/input/test-gmm/test.csv")
data['TARGET']=predicted_labels_list
data.drop(columns=['AUDIO_FILE'], inplace=True)
# Save DataFrame to a CSV file
data.to_csv('predicted_labels.csv', index=False)  # Change 'predicted_labels.csv' to your desired file name
