In [1]:
import os
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Flatten
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pickle

In [2]:
# Define dataset path
DATASET_PATH = "../data/speech_samples/1/"

In [3]:
def load_audio_files(dataset_path):
    data = []
    labels = []
    actors = os.listdir(dataset_path)
    
    for actor in actors:
        actor_path = os.path.join(dataset_path, actor)
        if os.path.isdir(actor_path):
            for file in os.listdir(actor_path):
                if file.endswith(".wav"):
                    file_path = os.path.join(actor_path, file)
                    signal, sr = librosa.load(file_path, sr=22050)
                    mfccs = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
                    mfccs = np.mean(mfccs, axis=1)  # Take mean across time axis
                    
                    data.append(mfccs)
                    labels.append(actor)  # Using actor ID as a label for now
    
    return np.array(data), np.array(labels)

In [4]:
# Load dataset
X, y = load_audio_files(DATASET_PATH)

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

In [5]:
with open('../model/speech_label_encoder.pkl', 'wb') as file:
    pickle.dump(label_encoder, file)

In [6]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)


In [7]:
# Define LSTM Model
model = Sequential([
    LSTM(128, return_sequences=True, input_shape=(X_train.shape[1], 1)),
    Dropout(0.3),
    LSTM(64),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dense(len(np.unique(y_encoded)), activation='softmax')
])

  super().__init__(**kwargs)


In [8]:
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [9]:
# Reshape input data
X_train = X_train[..., np.newaxis]
X_test = X_test[..., np.newaxis]

In [10]:
# Train Model
history = model.fit(X_train, y_train, epochs=30, batch_size=16, validation_data=(X_test, y_test))

Epoch 1/30
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - accuracy: 0.0551 - loss: 3.0895 - val_accuracy: 0.0729 - val_loss: 2.8737
Epoch 2/30
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.0939 - loss: 2.8801 - val_accuracy: 0.1181 - val_loss: 2.6916
Epoch 3/30
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.1273 - loss: 2.7418 - val_accuracy: 0.1389 - val_loss: 2.6301
Epoch 4/30
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.1508 - loss: 2.6696 - val_accuracy: 0.1840 - val_loss: 2.5476
Epoch 5/30
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.1730 - loss: 2.5577 - val_accuracy: 0.2292 - val_loss: 2.4946
Epoch 6/30
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.1982 - loss: 2.4753 - val_accuracy: 0.1875 - val_loss: 2.5070
Epoch 7/30
[1m72/72[0m [32m━━━━━━━

In [11]:
# Train Model
history = model.fit(X_train, y_train, epochs=30, batch_size=16, validation_data=(X_test, y_test))

Epoch 1/30
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.8086 - loss: 0.6259 - val_accuracy: 0.6875 - val_loss: 1.0687
Epoch 2/30
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8082 - loss: 0.5936 - val_accuracy: 0.6840 - val_loss: 1.0283
Epoch 3/30
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.8230 - loss: 0.5539 - val_accuracy: 0.7396 - val_loss: 0.9119
Epoch 4/30
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.8293 - loss: 0.5226 - val_accuracy: 0.7465 - val_loss: 0.9506
Epoch 5/30
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.8391 - loss: 0.5249 - val_accuracy: 0.7604 - val_loss: 0.8990
Epoch 6/30
[1m72/72[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.8237 - loss: 0.5466 - val_accuracy: 0.7153 - val_loss: 1.0056
Epoch 7/30
[1m72/72[0m [32m━━━━━━━━━━

In [13]:
from sklearn.metrics import classification_report
# Predict on test data
y_pred = model.predict(X_test)
y_pred_labels = np.argmax(y_pred, axis=1)

# Print classification report
print(classification_report(y_test, y_pred_labels, target_names=label_encoder.classes_))

[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
              precision    recall  f1-score   support

    Actor_01       0.73      0.73      0.73        11
    Actor_02       0.71      0.77      0.74        13
    Actor_03       0.83      0.83      0.83         6
    Actor_04       0.62      0.83      0.71        12
    Actor_05       0.73      0.57      0.64        14
    Actor_06       0.73      0.67      0.70        12
    Actor_07       0.67      0.86      0.75        14
    Actor_08       0.80      0.89      0.84         9
    Actor_09       0.69      0.90      0.78        10
    Actor_10       1.00      0.44      0.61        16
    Actor_11       1.00      0.94      0.97        16
    Actor_12       0.77      1.00      0.87        10
    Actor_13       0.67      0.92      0.77        13
    Actor_14       0.73      0.67      0.70        12
    Actor_15       1.00      0.83      0.91        12
    Actor_16       0.93      0.93      0.93        15
    Actor

In [15]:
print("Training Accuracy:", history.history['accuracy'][-1])
print("Validation Accuracy:", history.history['val_accuracy'][-1])

Training Accuracy: 0.9505208134651184
Validation Accuracy: 0.7708333134651184


In [16]:
# Save the trained model
model.save('../model/speech_model.keras')

In [17]:
# Save training history
with open('../model/speech_model_history.pkl', 'wb') as file:
    pickle.dump(history.history, file)

print("Model training complete and saved!")

Model training complete and saved!
