In [6]:
#Modules
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import os
import librosa
import librosa.display
from IPython.display import Audio
import warnings
warnings.filterwarnings("ignore")

In [7]:
paths = []
labels = []
mfccs = []
import numpy as np
import scipy.fftpack

np.fft.dct = scipy.fftpack.dct  # Redirecting librosa's incorrect call


for dirname, _, filenames in os.walk('../../dataset/archive/TESS Toronto emotional speech set data'):
    for filename in filenames:
        paths.append(os.path.join(dirname,filename))
        label = filename.split('_')[-1]
        label = label.split('.')[0]
        labels.append(label.lower())
        y , sr = librosa.load(os.path.join(dirname,filename), sr=None)
        mfcc = librosa.feature.mfcc(y=y ,sr=sr ,n_mfcc=13)
        mfcc_mean = np.mean(mfcc,axis=1)
        mfccs.append(mfcc_mean)
      
df = pd.DataFrame()
df['speech'] = paths
df['label'] = labels
df['mfcc'] = mfccs

df.head()

Unnamed: 0,speech,label,mfcc
0,../../dataset/archive/TESS Toronto emotional s...,angry,"[-390.03134, 55.23597, -15.03465, -10.704582, ..."
1,../../dataset/archive/TESS Toronto emotional s...,angry,"[-451.01947, 67.04883, -0.44825765, -16.81213,..."
2,../../dataset/archive/TESS Toronto emotional s...,angry,"[-406.56332, 28.609005, -5.3172164, 2.124631, ..."
3,../../dataset/archive/TESS Toronto emotional s...,angry,"[-386.34003, 53.439655, -8.663545, -13.309978,..."
4,../../dataset/archive/TESS Toronto emotional s...,angry,"[-404.78488, 54.462566, 3.3552904, 10.96758, 2..."


In [8]:
summary_data = []

for file_path in df["speech"][:30]: 
    y, sr = librosa.load(file_path, sr=None)
    file_name = file_path.split("/")[-1]

    mean_amp = y.mean()
    max_amp = y.max()

    summary_data.append([file_name, mean_amp, max_amp])
df_summary = pd.DataFrame(summary_data, columns=["file", "mean_amplitude", "max_amplitude"])


In [9]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

X_mfcc = np.array(df['mfcc'].tolist())  
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_mfcc)

def compute_reconstruction_error(X, n_components):
    pca = PCA(n_components=n_components)
    X_pca = pca.fit_transform(X)
    X_reconstructed = pca.inverse_transform(X_pca) 

    mse = np.mean((X - X_reconstructed) ** 2)
    return mse

mse_1 = compute_reconstruction_error(X_scaled, 1)  # Using 1 principal component
mse_2 = compute_reconstruction_error(X_scaled, 2)  # Using 2 principal components

print(f"Reconstruction Error (1 component): {mse_1:.5f}")
print(f"Reconstruction Error (2 components): {mse_2:.5f}")


Reconstruction Error (1 component): 0.62042
Reconstruction Error (2 components): 0.45137


In [5]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
y_labels = label_encoder.fit_transform(df['label'])

lda = LinearDiscriminantAnalysis(n_components=2)
X_lda = lda.fit_transform(X_scaled, y_labels)

df_lda = pd.DataFrame(X_lda, columns=["LDA_1", "LDA_2"])
df = pd.concat([df, df_lda], axis=1)

print(df.head()) 


                                              speech  label  \
0  ../../dataset/archive/TESS Toronto emotional s...  angry   
1  ../../dataset/archive/TESS Toronto emotional s...  angry   
2  ../../dataset/archive/TESS Toronto emotional s...  angry   
3  ../../dataset/archive/TESS Toronto emotional s...  angry   
4  ../../dataset/archive/TESS Toronto emotional s...  angry   

                                                mfcc     LDA_1     LDA_2  
0  [-390.03134, 55.23597, -15.03465, -10.704582, ... -2.389107 -1.784898  
1  [-451.01947, 67.04883, -0.44825765, -16.81213,... -1.117853 -1.932096  
2  [-406.56332, 28.609005, -5.3172164, 2.124631, ... -2.732071 -0.397837  
3  [-386.34003, 53.439655, -8.663545, -13.309978,... -2.699012 -1.753967  
4  [-404.78488, 54.462566, 3.3552904, 10.96758, 2... -2.936682 -0.124494  


In [6]:
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, roc_curve
from sklearn.preprocessing import LabelEncoder

X = np.vstack(df['mfcc'].values)
y = np.array(df['label'])
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

pca = PCA(n_components=10)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

In [10]:
log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train_pca, y_train)

y_pred_log_reg = log_reg.predict(X_test_pca)

conf_matrix_log_reg = confusion_matrix(y_test, y_pred_log_reg)

print("Logistic Regression Report:\n", classification_report(y_test, y_pred_log_reg))

Logistic Regression Report:
               precision    recall  f1-score   support

           0       0.81      0.80      0.80       160
           1       0.81      0.82      0.82       160
           2       0.84      0.88      0.86       160
           3       0.78      0.74      0.76       160
           4       0.76      0.76      0.76       160
           5       0.69      0.67      0.68       160
           6       0.81      0.82      0.81       160

    accuracy                           0.78      1120
   macro avg       0.78      0.78      0.78      1120
weighted avg       0.78      0.78      0.78      1120



In [8]:
import joblib

# Save the trained models
joblib.dump(log_reg, "logistic_regression_model.pkl")

['logistic_regression_model.pkl']

In [18]:
import joblib
log_reg = joblib.load("logistic_regression_model.pkl")
import librosa

def predict_emotion(audio_path, model):
    y, sr = librosa.load(audio_path, sr=None)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    mfcc_mean = np.mean(mfcc, axis=1).reshape(1, -1)

    # Apply the same preprocessing
    mfcc_scaled = scaler.transform(mfcc_mean)
    mfcc_pca = pca.transform(mfcc_scaled)

    # Predict using the chosen model
    predicted_label = model.predict(mfcc_pca)
    emotion = label_encoder.inverse_transform(predicted_label)[0]
    
    return emotion

# Example usage
audio_file = "sample.wav"
predicted_emotion = predict_emotion(audio_file, log_reg)
print("Predicted Emotion:", predicted_emotion)

Predicted Emotion: angry


In [19]:
import joblib

# Save the StandardScaler used for feature scaling
joblib.dump(scaler, 'scaler.pkl')

# Save the PCA model used for dimensionality reduction
joblib.dump(pca, 'pca.pkl')

# Save the LabelEncoder to convert encoded labels back to their original form
joblib.dump(label_encoder, 'label_encoder.pkl')

print("All models and preprocessors saved successfully!")


All models and preprocessors saved successfully!
