Feature Extraction

In [None]:
data_path = 'YOUR DATA PATH'
test_path = 'YOUR DATA PATH'

# Set the number of Mel-Frequency Cepstral Coefficients (MFCCs) to extract.
num_mfcc = 20

# Initialize empty arrays to hold the feature matrix from audio and corresponding labels.
X_train = []
y_train = []

# Iterate through all of the .wav files in the training directory.
for file in sorted(os.listdir(data_path)):
    if file.endswith(".wav"):
        # Load the audio file using librosa and our files in Google Drive.
        audio_data, sample_rate = librosa.load(os.path.join(data_path, file))

        # Determine the MFCC features from the audio data provided above.
        mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=num_mfcc)

        # Grab the mean and standard deviation of our features.
        mfccs_avg = np.mean(mfccs, axis=1)
        mfccs_std = np.std(mfccs, axis=1)

        # Add the MFCC features to the data matrix
        X_train.append(np.concatenate((mfccs_avg, mfccs_std)))

        # Determine the emotion label of the audio file based on its filename.
        label = ""
        idx = 0
        while not (file[idx].isdigit()):
            label += file[idx]
            idx += 1
        y_train.append(label)

In [None]:
Split Data into Training and Validation

In [None]:
# Convert our data into np.arrays
X_train = np.array(X_train)
y_train = np.array(y_train)

# Use stratified sampling to split the data into a smaller training set and a validation set
X_train_new, X_val, y_train_new, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42, stratify=y_train)

GMM

In [None]:
# Load pre-processed MFCC features and emotion labels
#X = np.load('mfcc_features.npy')
#y = np.load('emotion_labels.npy')

# Split data into training and testing sets
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a GMM model for each emotion class
n_components = 60
covariance_type = 'full'
emotion_classes = ['angry', 'disgust', 'fearful', 'happy', 'neutral', 'sad', 'surprise', 'calm']
models = []
#print(y_train == 'surprise')
for emotion in emotion_classes:
    #print(emotion)
    # Get training data for current emotion class
    X_emotion = X_train_new[y_train_new == emotion]
    # Train a GMM model for current emotion class
    model = GaussianMixture(n_components=n_components, reg_covar=1e-06, covariance_type=covariance_type)
    if X_emotion.shape[0] != 0:
         model.fit(X_emotion)
         models.append(model)

# Classify testing data using the trained GMM models
y_pred = []
for mfcc in X_val:
    likelihoods = []
    for model in models:
        # Calculate likelihood of mfcc belonging to current emotion class
        likelihood = np.exp(model.score(mfcc.reshape(1, -1)))
        likelihoods.append(likelihood)
    # Assign emotion class with highest likelihood as predicted emotion
    pred_emotion = emotion_classes[np.argmax(likelihoods)]
    y_pred.append(pred_emotion)

# Evaluate performance of the GMM classifier
accuracy = accuracy_score(y_val, y_pred)
print('Accuracy:', accuracy)

# Print the classification report and confusion matrix for the validation set
print(classification_report(y_val, y_pred))
print(confusion_matrix(y_val, y_pred))

Test: Predictions

In [None]:
scaler = StandardScaler()
X_test = []
file_names = []
for file in sorted(os.listdir(test_path)):
    if file.endswith(".wav"):

        file_names.append(file.split(".")[0])

        # Load the audio file using librosa and our files in Google Drive.
        audio_data, sample_rate = librosa.load(os.path.join(test_path, file))

        # Determine the MFCC features from the audio data provided above.
        mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=num_mfcc)

        # Grab the mean and standard deviation of our features.
        mfccs_avg = np.mean(mfccs, axis=1)
        mfccs_std = np.std(mfccs, axis=1)

        # Add the MFCC features to the data matrix
        X_test.append(np.concatenate((mfccs_avg.reshape(-1), mfccs_std.reshape(-1))))

X_test = np.array(X_test)

# Predict the labels of the test data using the trained SVM classifier
#y_pred_test = svm_classifier.predict(X_test)
# Classify testing data using the trained GMM models
y_pred_test = []
for mfcc in X_test:
    likelihoods = []
    for model in models:
        # Calculate likelihood of mfcc belonging to current emotion class
        likelihood = np.exp(model.score(mfcc.reshape(1, -1)))
        likelihoods.append(likelihood)
    # Assign emotion class with highest likelihood as predicted emotion
    pred_emotion = emotion_classes[np.argmax(likelihoods)]
    y_pred_test.append(pred_emotion)

# Convert the file names and predicted labels to a two-dimensional numpy array
predictions = np.column_stack((file_names, y_pred_test))

# Sort the predictions array by the first column (the sorted file names)
predictions = predictions[np.lexsort((predictions[:, 0],))]

# Save the predictions to a CSV file
np.savetxt("predictions.csv", predictions, fmt='%s', delimiter=",", encoding='utf-8')
with open("predictions.csv", "r+") as f:
    content = f.read()
    f.seek(0, 0)
    f.write("filename,Label\n" + content)