# Music Emotion Classification

* Install Miniconda from their website
* Put Conda on PATH during the install
* Update conda - Terminal Command: conda update conda
* Clone the Github repository (send Tim or I your github username if you haven’t yet).
* Load environment.yaml into conda environment (this will take a while to download) - Terminal Command: conda env create -n mus2vid -f environment.yaml

## Download Music and Emotion data

In [None]:
# Jamendo/DEAM dataset

['maestro-v3.0.0-wav\\maestro-v3.0.0\\2018/MIDI-Unprocessed_Chamber3_MID--AUDIO_10_R3_2018_wav--1.wav', 'maestro-v3.0.0-wav\\maestro-v3.0.0\\2008/MIDI-Unprocessed_03_R2_2008_01-03_ORIG_MID--AUDIO_03_R2_2008_wav--2.wav', 'maestro-v3.0.0-wav\\maestro-v3.0.0\\2017/MIDI-Unprocessed_066_PIANO066_MID--AUDIO-split_07-07-17_Piano-e_3-02_wav--3.wav', 'maestro-v3.0.0-wav\\maestro-v3.0.0\\2004/MIDI-Unprocessed_XP_21_R1_2004_01_ORIG_MID--AUDIO_21_R1_2004_01_Track01_wav.wav', 'maestro-v3.0.0-wav\\maestro-v3.0.0\\2006/MIDI-Unprocessed_17_R1_2006_01-06_ORIG_MID--AUDIO_17_R1_2006_04_Track04_wav.wav', 'maestro-v3.0.0-wav\\maestro-v3.0.0\\2009/MIDI-Unprocessed_07_R1_2009_04-05_ORIG_MID--AUDIO_07_R1_2009_07_R1_2009_04_WAV.wav', 'maestro-v3.0.0-wav\\maestro-v3.0.0\\2009/MIDI-Unprocessed_11_R1_2009_06-09_ORIG_MID--AUDIO_11_R1_2009_11_R1_2009_07_WAV.wav', 'maestro-v3.0.0-wav\\maestro-v3.0.0\\2013/ORIG-MIDI_03_7_8_13_Group__MID--AUDIO_19_R2_2013_wav--3.wav', 'maestro-v3.0.0-wav\\maestro-v3.0.0\\2009/MIDI-Unp

## Process wav files and extract features


In [None]:
# Consider using MIDI data for this as well as librosa, or just librosa features that Palm found in a paper
import pandas as pd
import numpy as np
import librosa
import pickle
import lzma


def get_matched_librosa(audio, genres):
    """
    Takes in a list of audio filepaths and creates a dataframe column of mel spectrograms with matched
    genres, similar to the midi object dataframe
    """
    ys = []
    srs = []

    iters = 0
    for file in audio:
        print(str(iters) + "/" + str(len(audio)))
        iters += 1

        y, sr = librosa.load("wav_clips/" + file)
        ys.append(y)
        srs.append(srs)

    df = pd.DataFrame({'y': ys, 'sr': srs, 'genre': genres})
    
    return df

matched_librosa_df = get_matched_librosa(audio_clips, clip_genres)



with lzma.open("matched_librosa.xz", "wb") as f:
    pickle.dump(matched_librosa_df, f)


In [None]:
import pandas as pd
import numpy as np
import librosa
import pickle
import lzma

def get_features(y, sr):
    mfcc = librosa.feature.mfcc(y=y,sr=sr)
    rolloff = librosa.feature.spectral_rolloff(y=y,sr=sr)
    centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    rms = librosa.feature.rms(y=y)
    tempo = librosa.feature.tempo(y=y,sr=sr)
    onset_env = librosa.onset.onset_strength(y=y, sr=sr)
    zcr = librosa.feature.zero_crossing_rate(y)
    chromagram = librosa.feature.chroma_stft(y=y, sr=sr)
    pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
    return [mfcc,rolloff,centroid,rms,tempo,onset_env,zcr,chromagram,pitches]

def extract_librosa_features(librosa_df):
    """
    Extracts features and labels from librosa y and sr listed in the DataFrame and concatenates the
    features with their labels into a matrix.

    Parameters:
        librosa_df (pandas.DataFrame): A DataFrame with y and sr values for audio clips with their genres

    Returns:
        numpy.ndarray: A matrix of features along with labels.
    """
    all_features = []  # List to store all extracted features
    iters = 0
    for index, row in librosa_df.iterrows():
        print(iters)
        iters += 1

        obj_features = get_features(row['y'], row['sr'])
        obj_features.append(row['genre'])
        all_features.append(obj_features)
        
    # Return the numpy array of all extracted features along with corresponding genres
    return np.array(all_features)

lib_features = extract_librosa_features(matched_librosa_df)

with lzma.open("labeled_lib_features.xz", "wb") as f:
    pickle.dump(lib_features, f)

## Partition Dataset into Training, Validation, and Testing

In [None]:
from keras.utils import to_categorical
import numpy as np
import pandas as pd

def balance_data(data_array, genres = genre_list):
    # create a data frame with the last column titled genre
    name_list = ['data'] * (data_array.shape[1] - 1)
    name_list.append('genre')
    data_df = pd.DataFrame(data_array)
    data_df.columns = name_list
    
    genre_balance = np.zeros(len(genres))
    for index, row in data_df.iterrows():
        genre_balance[int(row['genre'])] += 1
    max_samples = int(max(genre_balance))

    # resample other genres to have that many samples
    df_list = []
    for genre_num in range(len(genres)):
        df_list.append(data_df[data_df['genre'] == genre_num])

    balanced_list = []
    for df in df_list:
        if (len(df) != max_samples):
            df = resample(df, random_state=42, n_samples=max_samples, replace=True)
        balanced_list.append(df)

    balanced_df = pd.concat(balanced_list)
    balanced_array = balanced_df.to_numpy()
    
    return balanced_array

# Shuffle the features
labeled_features = np.random.permutation(labeled_features)

# Partition the Dataset into 3 Sets: Training, Validation, and Test
num = len(labeled_features)
# Calculate the number of samples for training data (60% of the dataset)
num_training = int(num * 0.6)
# Calculate the number of samples for validation data (20% of the dataset)
num_validation = int(num * 0.8)

# Extract the training data (60% of the labeled features)
training_data = balance_data(labeled_features[:num_training])
# Extract the validation data (20% of the labeled features)
validation_data = balance_data(labeled_features[num_training:num_validation])
# Extract the test data (remaining 20% of the labeled features)
test_data = (labeled_features[num_validation:])

# Separate the features from the labels
num_cols = training_data.shape[1] - 1
# Extract features from the training data
training_features = training_data[:, :num_cols]
# Extract features from the validation data
validation_features = validation_data[:, :num_cols]
# Extract features from the test data
test_features = test_data[:, :num_cols]

# Format the features for this multi-class classification problem
num_classes = len(genre_list)
# Extract years from the training data
training_labels = training_data[:, num_cols].astype(int)
# Extract years from the validation data
validation_labels = validation_data[:, num_cols].astype(int)
# Extract years from the test data
test_labels = test_data[:, num_cols].astype(int)

print(test_labels)  # Print the first 10 test labels
print(to_categorical((test_labels)))  # Print the one-hot encoding of the first 10 test labels

## Train Neural Network (probably use pytorch instead of Keras)

In [None]:
from tensorflow import keras
from keras import metrics
import numpy
import pandas as pd
import matplotlib.pyplot as plt
import pretty_midi as pm


# Define the model architecture
normalizer = keras.layers.BatchNormalization()
model = keras.Sequential([
    normalizer,
    keras.layers.Dense(21, activation='relu'), # There are 20 variables in the feature matrix
    keras.layers.Dense(15, activation='relu'), # 2/3 the input layer + 1
    keras.layers.Dense(num_classes, activation='softmax')
])

"""
optimizer="adam": The optimizer algorithm to use during training. 
Adam optimizer is chosen, which is a popular optimization algorithm known for its efficiency.

loss='categorical_crossentropy': The loss function used to measure the discrepancy between the 
predicted output and the true output labels. Categorical cross-entropy is suitable for
multi-class classification tasks.

metrics=['accuracy']: The metric(s) to be evaluated during training and testing. 
Accuracy is a commonly used metric to assess the model's performance.
"""

model.compile(optimizer="adam", loss='categorical_crossentropy', metrics=['accuracy'])

"""
training_features, train_labels: Input features and corresponding labels for model training.

validation_features, val_labels: Validation set used to monitor the model's performance 
                                         during training.

batch_size=32: Number of samples per gradient update. Training data is divided into batches, 
               and the model's weights are updated after each batch.

epochs=50: Number of times the model will iterate over the entire training dataset.

callbacks: EarlyStopping to stop training if the validation loss does not improve for a certain 
           number of epochs, and ModelCheckpoint to save the best model based on validation loss.
"""

# Encode the training and validation labels using one-hot encoding
train_labels_encoded = to_categorical(training_labels)
val_labels_encoded = to_categorical(validation_labels)

history = model.fit(x=training_features, y=train_labels_encoded, 
                    validation_data=(validation_features, val_labels_encoded),
                    batch_size=10, epochs=200, verbose=2,
                    callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss', patience=10),
                               keras.callbacks.ModelCheckpoint('best_model.h5', save_best_only=True)])

plt.plot(history.history['loss'], label='loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.ylim([0, 10])
plt.xlabel('Epoch')
plt.ylabel('Error [MPG]')
plt.legend()
plt.grid(True)
plt.show()
# Save the entire model to an h5 file
model.save("my_model.h5")

# Use the loaded model for prediction
real_labels = [0,0,0,0]
predicted_labels = [0,0,0,0]
correct = [0,0,0,0]

preds = model.predict(test_features)

for i in range(len(test_labels)):
    real = test_labels[i]
    prediction = np.argmax(preds[i])
    real_labels[real] += 1
    predicted_labels[prediction] += 1

    print("Label:      " + str(real))
    print("Prediction: " + str(prediction))

    if (real == prediction):
        correct[real] += 1

print("Total labels in test data: ", real_labels)
print("Total labels in pred data: ", predicted_labels)
print("correct labels in test data: ", correct)
probabilities = [i / j for i, j in zip(correct, predicted_labels)]
print("prediction probabilities: ", probabilities)
    
danube_midi = pm.PrettyMIDI('The-Blue-Danube-Waltz.mid')
danube_feats = get_features(danube_midi)
danube_feats = np.asarray(danube_feats)
danube_feats = np.expand_dims(danube_feats, axis = 0)
print('danube: ', np.argmax(model.predict(danube_feats)))
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(test_features, to_categorical(test_labels))
print("Test Loss:", test_loss)
print("Test Accuracy:", test_accuracy)