In [None]:
import pandas as pd
import numpy as np
import os
import librosa
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.utils import to_categorical

# Import the Random Forest classifier from scikit-learn
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

#Import Gaussian Naive Bayes from scikit-learn
from sklearn.naive_bayes import GaussianNB

# Define the path to your dataset directory
sample_dataset_dir = "Sample Dataset"
dataset_dir = "Dataset"

print("Files inside dataset_dir")
os.listdir(path = dataset_dir) # prints the files in the dataset directory to make sure we got the right path

print("Files inside sample_dataset_dir")
os.listdir(path = sample_dataset_dir)

In [None]:
#this is to make the sample dataset which we already did

#create sample dataset here
# original_dataset_dir = dataset_dir

# sample_fraction = 0.1

# os.makedirs(sample_dataset_dir, exist_ok=True)

# for actor_folder in os.listdir(original_dataset_dir):
#   actor_folder_path = os.path.join(original_dataset_dir, actor_folder)

#   if os.path.isdir(actor_folder_path):
#     actor_files = [file for file in os.listdir(actor_folder_path) if file.endswith(".wav")]
#     num_files_to_sample = int(sample_fraction * len(actor_files))

#     sampled_files = random.sample(actor_files, num_files_to_sample)

#     for file in sampled_files:
#       src_path = os.path.join(actor_folder_path, file)
#       dst_path = os.path.join(sample_dataset_dir, file)
#       shutil.copy(src_path, dst_path)
# print("sample dataset created with 10% of files from each actor")

In [None]:
# Initialize empty lists to store metadata
file_names, modalities, vocal_channels, emotions, intensities, statements, repetitions, actors = [], [], [], [], [], [], [], []

In [None]:
# Initialize empty lists to store audio features
mean_pitches, pitch_ranges, spectral_centroids, zero_crossing_rates = [], [], [], []
# Initialize empty lists to store additional audio features
mfccs_mean = []
mfccs_var = []
chroma_mean = []
chroma_var = []

In [None]:
# Load data
for file in os.listdir(sample_dataset_dir):
    if file.endswith(".wav"):
        file_path = os.path.join(sample_dataset_dir, file)
        try:
            # Loading audio
            audio, sample_rate = librosa.load(file_path, sr=None)

            # Extracting pitch
            pitches, magnitudes = librosa.piptrack(y=audio, sr=sample_rate)
            pitch = [magnitudes[:, t].argmax() for t in range(pitches.shape[1])]
            mean_pitch = np.mean(pitch)
            pitch_range = max(pitch) - min(pitch)

            # Extracting spectral centroid
            spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sample_rate)[0, 0]

            # Extracting zero-crossing rate
            zero_crossing_rate = librosa.feature.zero_crossing_rate(y=audio)[0, 0]

            # Extracting MFCCs
            mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=13)
            mfccs_mean.append(np.mean(mfccs))
            mfccs_var.append(np.var(mfccs))

            # Extracting Chroma features
            chroma = librosa.feature.chroma_stft(y=audio, sr=sample_rate)
            chroma_mean.append(np.mean(chroma))
            chroma_var.append(np.var(chroma))

            mean_pitches.append(mean_pitch)
            pitch_ranges.append(pitch_range)
            spectral_centroids.append(spectral_centroid)
            zero_crossing_rates.append(zero_crossing_rate)

            parts = file.split("-")
            if len(parts) == 7:
                file_names.append(file)
                modalities.append(parts[0])
                vocal_channels.append(parts[1])
                emotions.append(parts[2])
                intensities.append(parts[3])
                statements.append(parts[4])
                repetitions.append(parts[5])
                actors.append(parts[6].split(".")[0])

        except Exception as e:
            print(f"Error processing {file}: {str(e)}")

In [None]:
# Convert categorical labels to numerical format
label_encoder = LabelEncoder()
encoded_emotions = label_encoder.fit_transform(emotions)

In [None]:
# Normalize pitch-related features
scaler_pitch = StandardScaler()
normalized_mean_pitches = scaler_pitch.fit_transform(np.array(mean_pitches).reshape(-1, 1))
normalized_pitch_ranges = scaler_pitch.fit_transform(np.array(pitch_ranges).reshape(-1, 1))

# Normalize additional audio features
scaler_other = StandardScaler()
normalized_mfccs_mean = scaler_other.fit_transform(np.array(mfccs_mean).reshape(-1, 1))
normalized_mfccs_var = scaler_other.fit_transform(np.array(mfccs_var).reshape(-1, 1))
normalized_chroma_mean = scaler_other.fit_transform(np.array(chroma_mean).reshape(-1, 1))
normalized_chroma_var = scaler_other.fit_transform(np.array(chroma_var).reshape(-1, 1))

In [None]:
# Converts lists to NumPy arrays
spectral_centroids = np.array(spectral_centroids)
zero_crossing_rates = np.array(zero_crossing_rates)

In [None]:
# Combines all features into one array 
X = np.hstack((normalized_mean_pitches, normalized_pitch_ranges,
               spectral_centroids.reshape(-1, 1),
               zero_crossing_rates.reshape(-1, 1),
               normalized_mfccs_mean.reshape(-1, 1), normalized_mfccs_var.reshape(-1, 1),
               normalized_chroma_mean.reshape(-1, 1), normalized_chroma_var.reshape(-1, 1)))

In [None]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, encoded_emotions, test_size=0.2, random_state=42)

In [None]:
#naive bayes
nb_classifier = GaussianNB()
# Train the classifier on the training data
nb_classifier.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = nb_classifier.predict(X_test)

# Evaluate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print("Naive Bayes Classifier Accuracy:", accuracy)

In [None]:
# Initialize the Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier on the training data
rf_classifier.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = rf_classifier.predict(X_test)

# Evaluate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print("Random Forest Classifier Accuracy:", accuracy)

In [None]:
# Build a simple feedforward neural network
model = Sequential()
model.add(Dense(128, input_dim=X_train.shape[1], activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(np.unique(encoded_emotions)), activation='softmax'))

In [None]:
# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=64, validation_data=(X_test, y_test))

In [None]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}")