In [None]:
import pandas as pd
import numpy as np
import os
import librosa
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.utils import to_categorical
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.layers import Conv1D, MaxPooling1D, GlobalAveragePooling1D, Dense, Dropout
from keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler

# Import the Random Forest classifier from scikit-learn
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

#Import Gaussian Naive Bayes from scikit-learn
from sklearn.naive_bayes import GaussianNB
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import BaggingClassifier

# Define the path to your dataset directory
sample_dataset_dir = "Sample Dataset"
dataset_dir = "Dataset"


In [None]:
# Initialize empty lists to store metadata
file_names, modalities, vocal_channels, emotions, intensities, statements, repetitions, actors = [], [], [], [], [], [], [], []

In [None]:
# Initialize empty lists to store audio features
mean_pitches, pitch_ranges, spectral_centroids, zero_crossing_rates = [], [], [], []
mfccs_mean, mfccs_var, chroma_mean, chroma_var = [], [], [], []
rms_energy, chroma_cens_mean, chroma_cens_var = [], [], []
spectral_contrast_mean, spectral_contrast_var = [], []
tonnetz_mean, tonnetz_var = [], []

In [None]:
for actor_folder in os.listdir(dataset_dir):
    actor_folder_path = os.path.join(dataset_dir, actor_folder)
    
    # Check if it's a directory
    if os.path.isdir(actor_folder_path):
        # Iterates through audio files in the actor folder
        for file in os.listdir(actor_folder_path):
            if file.endswith(".wav"):
                file_path = os.path.join(actor_folder_path, file)
                try:
                    # Loading audio
                    audio, sample_rate = librosa.load(file_path, sr=None)

                    # Extracting pitch
                    pitches, magnitudes = librosa.piptrack(y=audio, sr=sample_rate)
                    pitch = [magnitudes[:, t].argmax() for t in range(pitches.shape[1])]
                    mean_pitch = np.mean(pitch)
                    pitch_range = max(pitch) - min(pitch)

                    # Extracting spectral centroid
                    spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sample_rate)[0, 0]
                    # Extracting zero-crossing rate
                    zero_crossing_rate = librosa.feature.zero_crossing_rate(y=audio)[0, 0]

                    # Extracting Chroma features
                    chroma = librosa.feature.chroma_stft(y=audio, sr=sample_rate)
                    chroma_mean.append(np.mean(chroma))
                    chroma_var.append(np.var(chroma))

                    # Extracting MFCCs
                    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=13)
                    mfccs_mean.append(np.mean(mfccs))
                    mfccs_var.append(np.var(mfccs))

                    # Extracting RMS Energy
                    rms = librosa.feature.rms(y=audio)[0]
                    rms_energy.append(np.mean(rms))

                    # Extracting Chroma CENS features
                    cens = librosa.feature.chroma_cens(y=audio, sr=sample_rate)
                    chroma_cens_mean.append(np.mean(cens))
                    chroma_cens_var.append(np.var(cens))

                    # Extracting Spectral Contrast features
                    contrast = librosa.feature.spectral_contrast(y=audio, sr=sample_rate)
                    spectral_contrast_mean.append(np.mean(contrast))
                    spectral_contrast_var.append(np.var(contrast))

                    # Extracting Tonnetz features
                    tonnetz = librosa.feature.tonnetz(y=audio, sr=sample_rate)
                    tonnetz_mean.append(np.mean(tonnetz))
                    tonnetz_var.append(np.var(tonnetz))

                
                    mean_pitches.append(mean_pitch)
                    pitch_ranges.append(pitch_range)
                    spectral_centroids.append(spectral_centroid)
                    zero_crossing_rates.append(zero_crossing_rate)

                    parts = file.split("-")
                    if len(parts) == 7:
                        file_names.append(file)
                        modalities.append(parts[0])
                        vocal_channels.append(parts[1])
                        emotions.append(parts[2])  
                        intensities.append(parts[3])
                        statements.append(parts[4])
                        repetitions.append(parts[5])
                        actors.append(parts[6].split(".")[0])

                except Exception as e:
                    print(f"Error processing {file}: {str(e)}")
                    # Handle the exception gracefully (e.g., skip the current file)

In [None]:
# Converts categorical labels to numerical format
label_encoder = LabelEncoder()
encoded_emotions = label_encoder.fit_transform(emotions)

In [None]:
# Normalize audio features
scaler = StandardScaler()
normalized_mean_pitches = scaler.fit_transform(np.array(mean_pitches).reshape(-1, 1))
normalized_pitch_ranges = scaler.fit_transform(np.array(pitch_ranges).reshape(-1, 1))
normalized_mfccs_mean = scaler.fit_transform(np.array(mfccs_mean).reshape(-1, 1))
normalized_mfccs_var = scaler.fit_transform(np.array(mfccs_var).reshape(-1, 1))
normalized_chroma_mean = scaler.fit_transform(np.array(chroma_mean).reshape(-1, 1))
normalized_chroma_var = scaler.fit_transform(np.array(chroma_var).reshape(-1, 1))
normalized_rms_energy = scaler.fit_transform(np.array(rms_energy).reshape(-1, 1))
normalized_chroma_cens_mean = scaler.fit_transform(np.array(chroma_cens_mean).reshape(-1, 1))
normalized_chroma_cens_var = scaler.fit_transform(np.array(chroma_cens_var).reshape(-1, 1))
normalized_spectral_contrast_mean = scaler.fit_transform(np.array(spectral_contrast_mean).reshape(-1, 1))
normalized_spectral_contrast_var = scaler.fit_transform(np.array(spectral_contrast_var).reshape(-1, 1))
normalized_tonnetz_mean = scaler.fit_transform(np.array(tonnetz_mean).reshape(-1, 1))
normalized_tonnetz_var = scaler.fit_transform(np.array(tonnetz_var).reshape(-1, 1))

In [None]:
# Converts lists to NumPy arrays
spectral_centroids = np.array(spectral_centroids)
zero_crossing_rates = np.array(zero_crossing_rates)

In [None]:
# Combines all features into one array 
X = np.hstack((normalized_mean_pitches, normalized_pitch_ranges,
               spectral_centroids.reshape(-1, 1),
               zero_crossing_rates.reshape(-1, 1),
               normalized_mfccs_mean, normalized_mfccs_var,
               normalized_chroma_mean, normalized_chroma_var,
               normalized_rms_energy, normalized_chroma_cens_mean, normalized_chroma_cens_var,
               normalized_spectral_contrast_mean, normalized_spectral_contrast_var,
               normalized_tonnetz_mean, normalized_tonnetz_var))

In [None]:
# Combine all the normalized features into one array #for Convolutional neural network
X = np.column_stack((normalized_mean_pitches, normalized_pitch_ranges, normalized_mfccs_mean,
                     normalized_mfccs_var, normalized_chroma_mean, normalized_chroma_var,
                     normalized_rms_energy, normalized_chroma_cens_mean, normalized_chroma_cens_var,
                     normalized_spectral_contrast_mean, normalized_spectral_contrast_var,
                     normalized_tonnetz_mean, normalized_tonnetz_var))

# Converts emotion labels to one-hot encoding
y = to_categorical(encoded_emotions)

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

num_classes = len(set(emotions))
# Define the 1D CNN model
model = Sequential()
model.add(Conv1D(64, 3, activation='relu', input_shape=(X_train.shape[1], 1)))
model.add(MaxPooling1D(2))
model.add(Dropout(0.25))  # Adds dropout after the first convolutional layer

model.add(Conv1D(128, 3, activation='relu'))
model.add(MaxPooling1D(2))
model.add(Dropout(0.25))  # Adds dropout after the second convolutional layer

model.add(Conv1D(256, 3, activation='relu', padding='same'))
model.add(GlobalAveragePooling1D())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.25))  # Adds dropout before the final dense layer
model.add(Dense(num_classes, activation='softmax'))

# Compile the model with a specific learning rate (e.g., 0.001)
model.compile(loss='categorical_crossentropy', optimizer=Adam(learning_rate=0.001), metrics=['accuracy'])

# Reshape the input data for compatibility with the Conv1D
X_train_reshaped = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test_reshaped = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# Train the model
model.fit(X_train_reshaped, y_train, epochs=40, batch_size=128, validation_data=(X_test_reshaped, y_test))

# Evaluates the model on the test set
loss, accuracy = model.evaluate(X_test_reshaped, y_test)
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy * 100:.2f}%")

In [None]:
#confusion matrix for the cnn
y_pred = model.predict(X_test_reshaped)

# Convert one-hot encoded predictions and true labels back to class indices
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_test, axis=1)

# Create the confusion matrix
conf_matrix = confusion_matrix(y_true_classes, y_pred_classes)

# Plot the confusion matrix using seaborn
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

In [None]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, encoded_emotions, test_size=0.2, random_state=42)

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

nb_classifier = GaussianNB()  # No need to specify priors

# Train the classifier on the training data
nb_classifier.fit(X_train_scaled, y_train)

# Make predictions on the testing data
y_pred = nb_classifier.predict(X_test_scaled)

# Evaluate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print("Naive Bayes Classifier Accuracy:", accuracy)


In [None]:
# Defines the parameter grid
param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Initializes the Random Forest classifier
rf_classifier = RandomForestClassifier(random_state=42)

# Performs grid search with cross-validation
grid_search = GridSearchCV(estimator=rf_classifier, param_grid=param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Gets the best parameters
best_params = grid_search.best_params_
print("Best Parameters:", best_params)

# Training the classifier with the best parameters
best_rf_classifier = RandomForestClassifier(random_state=42, **best_params)
best_rf_classifier.fit(X_train, y_train)

# Make prediictions on the testing data using the best classifier
y_pred = best_rf_classifier.predict(X_test)

# Evaluate the accuracy of the best model.
accuracy_best = accuracy_score(y_test, y_pred)
print("Best Random Forest Classifier Accuracy:", accuracy_best)

In [None]:
#emotion mapping for the data visualization
emotion_mapping = {
    '01': 'Neutral',
    '02': 'Calm',
    '03': 'Happy',
    '04': 'Sad',
    '05': 'Angry',
    '06': 'Fearful',
    '07': 'Disgust',
    '08': 'Surprised'
}

In [None]:
# Create a DataFrame with features and emotions
data = pd.DataFrame({
    'Emotion': [emotion_mapping[label] for label in emotions],
    'Mean Pitch': mean_pitches,
    'Spectral Centroid': spectral_centroids,
    'MFCCs Mean': mfccs_mean,
    'RMS energy': rms_energy,
    'MFCCs variance': mfccs_var,
    'Tonnetz variance': tonnetz_var
})

In [None]:
# Violin plot for RMS Energy
plt.figure(figsize=(8, 6))
sns.violinplot(x='Emotion', y='RMS energy', data=data, palette='viridis')
plt.title('Distribution of RMS Energy Across Emotions')
plt.tight_layout()
plt.savefig('rms_energy_plot.png')
plt.show()

In [None]:
# Violin plot for MFCCs Variance
plt.figure(figsize=(8, 6))
sns.violinplot(x='Emotion', y='MFCCs variance', data=data, palette='viridis')
plt.title('Distribution of MFCCs Variance Across Emotions')
plt.tight_layout()
plt.savefig('mfccs_variance_plot.png')
plt.show()

In [None]:
# Violin plot for Tonnetz Variance
plt.figure(figsize=(8, 6))
sns.violinplot(x='Emotion', y='Tonnetz variance', data=data, palette='viridis')
plt.title('Distribution of Tonnetz Variance Across Emotions')
plt.tight_layout()
plt.savefig('tonnetz_variance_plot.png')
plt.show()

In [None]:
# Generates a confusion matrix for rfc
cm = confusion_matrix(y_test, y_pred)

# Plot the confusion matrix as a heatmap with emotion labels
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=emotion_mapping.values(), yticklabels=emotion_mapping.values())
plt.title('Confusion Matrix')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.show()

In [None]:
# Generates a classification report
classification_rep = classification_report(y_test, y_pred, target_names=emotion_mapping.values())

# Prints the classification report
print(classification_rep)

In [None]:
# Geting feature importances from the trained Random Forest model
feature_importances = best_rf_classifier.feature_importances_

# Get the names of the features
feature_names = [
    'Mean Pitches', 'Pitch Ranges',
    'Spectral Centroids', 'Zero Crossing Rates',
    'MFCCs Mean', 'MFCCs Variance',
    'Chroma Mean', 'Chroma Variance',
    'RMS Energy', 'Chroma CENS Mean',
    'Chroma CENS Variance', 'Spectral Contrast Mean',
    'Spectral Contrast Variance', 'Tonnetz Mean',
    'Tonnetz Variance'
]

# Create a DataFrame for better visualization
feature_importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': feature_importances})

# Sort the DataFrame by feature importance in descending order
feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)

# Plot the bar chart
plt.figure(figsize=(12, 8))
sns.barplot(x='Importance', y='Feature', data=feature_importance_df, palette='viridis')
plt.title('Random Forest Feature Importance')
plt.xlabel('Importance')
plt.ylabel('Feature')
plt.show()