In [1]:
import os
import librosa
import numpy as np
import pandas as pd
import pandas as pd
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split

In [None]:
import os
import librosa
import numpy as np
import pandas as pd

root_dir = r'speech-emotion-recognition-ravdess-data'

data = []

emotion_dict = {
    '01': 'neutral',
    '02': 'calm',
    '03': 'happy',
    '04': 'sad',
    '05': 'angry',
    '06': 'fearful',
    '07': 'disgust',
    '08': 'surprised'
}

for subdir, _, files in os.walk(root_dir):
    for file in files:
        if file.endswith('.wav'):
            file_path = os.path.join(subdir, file)

            identifiers = file.split('-')

            emotion_code = identifiers[2]  # 3rd part of the filename
            emotion_label = emotion_dict.get(emotion_code, 'unknown')

            actor_id_with_ext = identifiers[6]  # 7th part of the filename (string with extension)
            actor_id = actor_id_with_ext.split('.')[0]  # Remove the '.wav' extension
            gender = 'male' if int(actor_id) % 2 != 0 else 'female'  # Check odd/even for gender

            y, sr = librosa.load(file_path, sr=None)

            mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
            chromagram = librosa.feature.chroma_stft(y=y, sr=sr)
            mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
            spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
            tonnetz = librosa.feature.tonnetz(y=librosa.effects.harmonic(y), sr=sr)

            mfccs_mean = np.mean(mfccs, axis=1)
            chromagram_mean = np.mean(chromagram, axis=1)
            mel_spectrogram_mean = np.mean(mel_spectrogram, axis=1)
            spectral_contrast_mean = np.mean(spectral_contrast, axis=1)
            tonnetz_mean = np.mean(tonnetz, axis=1)

            feature_vector = np.concatenate((mfccs_mean, chromagram_mean, mel_spectrogram_mean, spectral_contrast_mean, tonnetz_mean))

            data.append([file_path, *feature_vector, emotion_label, gender])

columns = ['file_path'] + [f'feature_{i}' for i in range(len(feature_vector))] + ['label', 'gender']
df = pd.DataFrame(data, columns=columns)

df.to_csv('extracted_features_with_gender.csv', index=False)

print("Feature extraction completed and saved to 'extracted_features_with_gender.csv'")

In [None]:
df = pd.read_csv('extracted_features_with_gender.csv')

emotion_counts = df['label'].value_counts()
print("Sample counts for each emotion class:")
print(emotion_counts)

min_samples = emotion_counts.min()
print(f"\nMinimum samples in the dataset: {min_samples}")

balanced_data = []
for emotion in emotion_counts.index:
    emotion_subset = df[df['label'] == emotion]

    # If the emotion class has fewer samples than the minimum, oversample
    if len(emotion_subset) < min_samples:
        # Oversampling: Randomly sample with replacement
        oversampled_subset = emotion_subset.sample(min_samples, replace=True)
        balanced_data.append(oversampled_subset)
    else:
        # Undersampling: Randomly sample without replacement
        undersampled_subset = emotion_subset.sample(min_samples)
        balanced_data.append(undersampled_subset)

balanced_df = pd.concat(balanced_data, ignore_index=True)

balanced_df.to_csv('balanced_extracted_features.csv', index=False)
print("\nDataset balanced and saved to 'balanced_extracted_features.csv'")

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

balanced_df = pd.read_csv('balanced_extracted_features.csv')


sns.set(style="whitegrid")

feature_indices = {
    'MFCC': range(1, 14),         # MFCC features are from index 1 to 13 (13 features)
    'Chroma': range(14, 26),      # Chroma features are from index 14 to 25 (12 features)
    'Mel-Spectrogram': range(26, 126),  # Mel features are from index 26 to 125 (100 features)
    'Spectral Contrast': range(126, 133),  # Spectral contrast features are from index 126 to 132 (7 features)
    'Tonnetz': range(133, 139)    # Tonnetz features are from index 133 to 138 (6 features)
}

def plot_feature_distribution(data, feature_indices, feature_name):
    plt.figure(figsize=(12, 6))
    sns.boxplot(data=data.iloc[:, feature_indices], palette='Set2')
    plt.title(f'Distribution of {feature_name} Features')
    plt.xlabel('Features')
    plt.ylabel('Values')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

# Visualize each set of features
for feature_name, indices in feature_indices.items():
    plot_feature_distribution(balanced_df, indices, feature_name)

In [None]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Load the balanced dataset
balanced_df = pd.read_csv('balanced_extracted_features.csv')

# Separate features and labels
features = balanced_df.iloc[:, 1:-2]  # All feature columns (assuming the last two are label and gender)
labels = balanced_df['label']  # Emotion labels

# Initialize the LabelEncoder
label_encoder = LabelEncoder()

# Encode the labels
encoded_labels = label_encoder.fit_transform(labels)

# Initialize the StandardScaler
scaler = StandardScaler()

# Standardize the features
standardized_features = scaler.fit_transform(features)

# Create a new DataFrame with standardized features and encoded labels
processed_df = pd.DataFrame(standardized_features, columns=features.columns)
processed_df['encoded_label'] = encoded_labels

# Optionally, you can also include gender if needed
# processed_df['gender'] = balanced_df['gender'].values

# Save the processed data to a new CSV file
processed_df.to_csv('processed_features_with_encoded_labels.csv', index=False)

print("Label encoding and feature standardization completed and saved to 'processed_features_with_encoded_labels.csv'")

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix
from scipy.stats import randint

# Load the processed dataset
processed_df = pd.read_csv('processed_features_with_encoded_labels.csv')

# Separate features and labels
X = processed_df.drop(columns=['encoded_label'])  # Features
y = processed_df['encoded_label']  # Encoded emotion labels

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Define the parameter grid
param_dist = {
    'hidden_layer_sizes': [(50,), (100,), (50, 50), (100, 50)],  # Different layer configurations
    'activation': ['logistic', 'tanh', 'relu'],                   # Activation functions
    'solver': ['sgd', 'adam'],                                   # Optimization algorithms
    'alpha': [0.0001, 0.001, 0.01],                              # L2 regularization term
    'learning_rate': ['constant', 'invscaling', 'adaptive'],     # Learning rate strategies
    'max_iter': [200, 500, 1000]                                 # Number of iterations
}

# Set up the RandomizedSearchCV
mlp_random_search = RandomizedSearchCV(estimator=MLPClassifier(random_state=42),
                                       param_distributions=param_dist,
                                       n_iter=50,                       # Number of random searches
                                       cv=3,                            # Number of cross-validation folds
                                       verbose=1,                       # Verbosity level
                                       random_state=42,                 # For reproducibility
                                       n_jobs=-1)                       # Use all available cores

# Fit the model to the training data
dt_random_search.fit(X_train, y_train)

# Get the best model
best_dt_model = dt_random_search.best_estimator_

# Make predictions on the test set
y_pred_dt = best_dt_model.predict(X_test)

# Evaluate the best model
dt_conf_matrix = confusion_matrix(y_test, y_pred_dt)
dt_class_report = classification_report(y_test, y_pred_dt)

# Display the results
print("Best Parameters for Decision Tree:\n", dt_random_search.best_params_)
print("Confusion Matrix for Decision Tree:\n", dt_conf_matrix)
print("\nClassification Report for Decision Tree:\n", dt_class_report)

In [None]:
import os
import librosa
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, RandomizedSearchCV, KFold
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from scipy.stats import randint
import seaborn as sns
import matplotlib.pyplot as plt

# Load the processed dataset
processed_df = pd.read_csv('processed_features_with_encoded_labels.csv')

# Separate features and labels
X = processed_df.drop(columns=['encoded_label'])  # Features
y = processed_df['encoded_label']  # Encoded emotion labels

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Define parameter grid for SVM
param_dist = {
    "C": [0.1, 1, 10, 100],  # Regularization parameter
    "kernel": ["linear", "poly", "rbf", "sigmoid"],  # Kernel types
    "gamma": ["scale", "auto"],  # Kernel coefficient
    "degree": randint(2, 5),  # Degree for 'poly' kernel
}

# Set up RandomizedSearchCV for SVM
svm_random_search = RandomizedSearchCV(
    estimator=SVC(random_state=42),
    param_distributions=param_dist,
    n_iter=50,
    cv=3,
    verbose=1,
    random_state=42,
    n_jobs=-1,
)

# Fit the model and find the best parameters
svm_random_search.fit(X_train, y_train)
best_svm_model = svm_random_search.best_estimator_

# Make predictions and evaluate the model
y_pred_svm = best_svm_model.predict(X_test)
svm_conf_matrix = confusion_matrix(y_test, y_pred_svm)
svm_class_report = classification_report(y_test, y_pred_svm)

# Display results
print("Best Parameters for SVM:\n", svm_random_search.best_params_)
print("Confusion Matrix for SVM:\n", svm_conf_matrix)
print("\nClassification Report for SVM:\n", svm_class_report)