In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
import librosa
import numpy as np
from sklearn.mixture import GaussianMixture
import pandas as pd

data_folder = "/content/drive/MyDrive/KAGGLE-2/SpeechCommand"
Class_mapping = {'right': 0, 'eight': 1, 'cat': 2, 'tree': 3, 'bed': 4, 'happy': 5, 'go': 6, 'dog': 7, 'no': 8,
                 'wow': 9, 'nine': 10, 'left': 11, 'stop': 12, 'three': 13, 'sheila': 14, 'one': 15, 'bird': 16,
                 'zero': 17, 'seven': 18, 'up': 19, 'marvin': 20, 'two': 21, 'house': 22, 'down': 23, 'six': 24,
                 'yes': 25, 'on': 26, 'five': 27, 'off': 28, 'four': 29}

order = 37
fs = 16000
frame_period = 5



In [None]:
# Function to extract features from a single audio file
def extract_features(file_path):
    x, fs = librosa.load(file_path)
    x = librosa.effects.preemphasis(x)
    
    # Extract chroma and spectral contrast features
    chroma = librosa.feature.chroma_stft(y=x, sr=fs)
    spectral_contrast = librosa.feature.spectral_contrast(y=x, sr=fs)
    
    # MFCC features
    mfcc = librosa.feature.mfcc(y=x, sr=fs, n_mfcc=17)
    delta = librosa.feature.delta(mfcc)
    delta_delta = librosa.feature.delta(mfcc, order=2)

    # Combine all features
    combined_features = np.vstack([mfcc, delta, delta_delta, chroma, spectral_contrast])
    
    return combined_features.T



In [None]:
def load_data_labels(path):
    features = []
    labels = []
    dict1 = {}
    dict2 = {}

    i = 0
    for word in os.listdir(path):
        if word != "_background_noise_":
            dict1[word] = i
            dict2[i] = word
            i += 1
            feat = []
            files_path = os.path.join(path, word)

            for file in os.listdir(files_path):
                file_path = os.path.join(files_path, file)

                if file.endswith(".wav"):
                    feat.extend(extract_features(file_path))
                    labels.append(Class_mapping[word])

            features.append(feat)

    return features, labels, dict2



In [None]:
def train_models(features):
    models = []
    for i in range(30):
        gm = GaussianMixture(n_components=1, random_state=0).fit(features[i])
        models.append(gm)

    return models

import joblib



In [None]:
def save_models(models):
    for i, model in enumerate(models):
        model_filename = f"model_{i}.pkl"
        joblib.dump(model, model_filename)



In [None]:
def save_dict_to_text(dict2, filename):
    with open(filename, 'w') as file:
        for key, value in dict2.items():
            file.write(f"{key}: {value}\n")



In [None]:
def train_and_save_models():
    features, labels, dict2 = load_data_labels(data_folder)
    models = train_models(features)
    save_models(models)
    
    # Save dict2 to a text file
    save_dict_to_text(dict2, "dict2.txt")

if __name__ == "__main__":
    train_and_save_models()
