In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
import pathlib
import pretty_midi, mido

In [3]:
cvs_path = 'H:/.shortcut-targets-by-id/1VjZVJYaetZxsv2VWM91CyhZqGyOeiDHX/Music Generator/clean/vgmidi_labelled.csv'

df = pd.read_csv(cvs_path)
df.head()


Unnamed: 0,id,series,console,game,piece,midi,valence,arousal
0,8013,Banjo-Kazooie,N64,Banjo-Kazooie,Boggys Igloo Happy,labelled/phrases/Banjo-Kazooie_N64_Banjo-Kazoo...,1,1
1,8073,Banjo-Kazooie,N64,Banjo-Kazooie,Boggys Igloo Sad,labelled/phrases/Banjo-Kazooie_N64_Banjo-Kazoo...,1,-1
2,8029,Banjo-Kazooie,N64,Banjo-Kazooie,Bubblegloop Swamp,labelled/phrases/Banjo-Kazooie_N64_Banjo-Kazoo...,1,1
3,8022,Banjo-Kazooie,N64,Banjo-Kazooie,Click Clock Wood,labelled/phrases/Banjo-Kazooie_N64_Banjo-Kazoo...,1,1
4,8066,Banjo-Kazooie,N64,Banjo-Kazooie,Ending,labelled/phrases/Banjo-Kazooie_N64_Banjo-Kazoo...,1,-1


In [4]:
trainFiles = glob.glob('H:/.shortcut-targets-by-id/1VjZVJYaetZxsv2VWM91CyhZqGyOeiDHX/Music Generator/clean/labelled/train/*.mid')
print(len(trainFiles))
testFiles = glob.glob('H:/.shortcut-targets-by-id/1VjZVJYaetZxsv2VWM91CyhZqGyOeiDHX/Music Generator/clean/labelled/test/*.mid')
print(len(testFiles))

110
26


In [141]:
def file_name_format(path, suffix):

    # Split the string using underscores
    parts = path.split('_')

    # Extract relevant parts and create the new string
    new_path = ''
    for part in parts[2:]:
        if '.mid' not in part:
            new_path += part + '_'
        else:
            new_path += part[:-4] + '_' + str(suffix) + part[-4:]
    new_path = 'labelled/phrases/' + new_path
    
    # new_path2 = f'labelled/phrases/{parts[2]}_{parts[3]}_{parts[4]}_{parts[5][:-4]}_{suffix}{parts[5][-4:]}'

    return new_path

def analyze_midi(midi_file):
    
    pm = pretty_midi.PrettyMIDI(midi_file)

    # get tempo changes
    tempo_changes = pm.get_tempo_changes()
    bpm = np.mean(tempo_changes)
    
    # get notes
    notes = []
    for instrument in pm.instruments:
        for note in instrument.notes:
            notes.append(note)
    
    # get notes steps
    sorted_notes = sorted(notes, key=lambda note: note.start)
    prev_start = sorted_notes[0].start
    steps = []
    for note in sorted_notes:
        start = note.start
        steps.append(start - prev_start)
        prev_start = start
    avg_step = np.mean(steps)
    max_step = np.max(steps)


    # get note durations
    durations = [note.end - note.start for note in notes]
    # get average duration
    avg_duration = np.mean(durations)
    max_duration = np.max(durations)

    # get note pitches
    pitches = [note.pitch for note in notes]
    # get average pitch
    avg_pitch = np.mean(pitches)
    max_pitch = np.max(pitches)

    for i in range(3):
        file_row = df.loc[df['midi'] == file_name_format(pathlib.Path(midi_file).name, i)]
        if file_row.shape[0] > 0:
            arousal = file_row['arousal'].values[0]
            valence = file_row['valence'].values[0]
            break

    return {
        'bpm': bpm,
        'avg_step': avg_step,
        'max_step': max_step,
        'avg_duration': avg_duration,
        'max_duration': max_duration,
        'avg_pitch': avg_pitch,
        'max_pitch': max_pitch,
        'valence': valence if valence is not None else 0,
        'arousal': arousal if arousal is not None else 0
    }

resut = analyze_midi(trainFiles[6])
print(resut)
# trainDataset = np.empty((len(trainFiles), 9), dtype=float)
# for file_ind in range(len(trainFiles)):
#     data = analyze_midi(trainFiles[file_ind])

#      # Convert the dictionary values to a NumPy array
#     data_array = np.array(list(data.values())).reshape(1, -1)

#     # Stack the data_array vertically to trainDataset
#     trainDataset[file_ind] = data_array

# print(trainDataset[:5])

{'bpm': 60.0, 'avg_step': 0.1299407114624506, 'max_step': 0.5, 'avg_duration': 0.4683794466403162, 'max_duration': 2.0, 'avg_pitch': 68.75494071146245, 'max_pitch': 78, 'valence': -1, 'arousal': 1}


In [6]:
trainDataset = np.empty((len(trainFiles), 9), dtype=float)
for file_ind in range(len(trainFiles)):
    data = analyze_midi(trainFiles[file_ind])

     # Convert the dictionary values to a NumPy array
    data_array = np.array(list(data.values())).reshape(1, -1)

    # Stack the data_array vertically to trainDataset
    trainDataset[file_ind] = data_array

print('trainDataset shape: ', trainDataset.shape)

testDataset = np.empty((len(testFiles), 9), dtype=float)
for file_ind in range(len(testFiles)):
    data = analyze_midi(testFiles[file_ind])

     # Convert the dictionary values to a NumPy array
    data_array = np.array(list(data.values())).reshape(1, -1)

    # Stack the data_array vertically to trainDataset
    testDataset[file_ind] = data_array

print('testDataset shape: ', testDataset.shape)

trainDataset shape:  (110, 9)
testDataset shape:  (26, 9)


In [7]:
from sklearn.preprocessing import StandardScaler
from  sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X = trainDataset[:, :-2]
y_valence = trainDataset[:, -2]
y_arousal = trainDataset[:, -1]

# print('X shape: ', X.shape)
# print('y shape: ', y_valence.shape)
# print('y shape: ', y_arousal.shape)
# print('X: ', X[:5])
# print('y: ', y_valence[:5])
# print('y: ', y_arousal[:5])

# Split the data into training and validation sets
X_train, X_val, y_valence_train, y_valence_val, y_arousal_train, y_arousal_val = train_test_split(
    X, y_valence, y_arousal, test_size=0.1, random_state=42
)

print("X_train shape:", X_train.shape)
print("X_val shape:", X_val.shape)
print("y_valence_train shape:", y_valence_train.shape)
print("y_arousal_val shape:", y_arousal_train.shape)

X_train shape: (99, 7)
X_val shape: (11, 7)
y_train shape: (99,)
y_val shape: (99,)


In [8]:
from sklearn.svm import SVC
from sklearn.multioutput import MultiOutputClassifier

# Create a classifier: a support vector classifier
svm_valence_classifier = SVC(kernel='linear', C =1.0, tol=1e-3)
svm_arousal_classifier = SVC(kernel='linear', C =1.0, tol=1e-3)

# Fit the classifier to the training data
svm_valence_classifier.fit(X_train, y_valence_train)
svm_arousal_classifier.fit(X_train, y_arousal_train)


# Predict the labels of the validation set
y_valence_pred = svm_valence_classifier.predict(X_val)
y_arousal_pred = svm_arousal_classifier.predict(X_val)

# Compute metrics
print("Accuracy valence: ", accuracy_score(y_valence_val, y_valence_pred))
print("Accuracy arousal: ", accuracy_score(y_arousal_val, y_arousal_pred))

Accuracy valence:  0.45454545454545453
Accuracy arousal:  0.8181818181818182


In [9]:
X_test = testDataset[:, :-2]
y_valence_test = testDataset[:, -2]
y_arousal_test = testDataset[:, -1]

# Predict the labels of the test set
y_valence_pred = svm_valence_classifier.predict(X_test)
y_arousal_pred = svm_arousal_classifier.predict(X_test)

# Compute metrics
print("Accuracy valence: ", accuracy_score(y_valence_test, y_valence_pred))
print("Accuracy arousal: ", accuracy_score(y_arousal_test, y_arousal_pred))

Accuracy valence:  0.5384615384615384
Accuracy arousal:  0.6923076923076923


In [10]:
ind = np.random.randint(0, X_test.shape[0])
sample_data = X_test[ind]
sample_valence = y_valence_test[ind]
sample_arousal = y_arousal_test[ind]
sample_file = testFiles[ind]
sample_valence_pred = svm_valence_classifier.predict(sample_data.reshape(1, -1))
sample_arousal_pred = svm_arousal_classifier.predict(sample_data.reshape(1, -1))

print('True values: ', sample_valence, sample_arousal)
print('Predicted values: ', sample_valence_pred, sample_arousal_pred)

True values:  1.0 1.0
Predicted values:  [1.] [1.]


## try augmenting the dataset

do small random variations in each midi file keeping the labels to have new files to analyze

In [128]:
augment_train_dataset = trainDataset.copy()

positive_positive = augment_train_dataset[(augment_train_dataset[:, -2] == 1) & (augment_train_dataset[:, -1] == 1)]
positive_negative = augment_train_dataset[(augment_train_dataset[:, -2] == 1) & (augment_train_dataset[:, -1] == -1)]
negative_positive = augment_train_dataset[(augment_train_dataset[:, -2] == -1) & (augment_train_dataset[:, -1] == 1)]
negative_negative = augment_train_dataset[(augment_train_dataset[:, -2] == -1) & (augment_train_dataset[:, -1] == -1)]

print(len(positive_positive), len(positive_negative), len(negative_positive), len(negative_negative))

44 30 21 15


In [135]:
target_size = 100

# Function to perform data augmentation
def augment_data(data, mean, std, labels, epsilon=0.1):
    num_samples = target_size // len(data)
    augmented_data = []
    noise = np.random.normal(loc=mean, scale=std, size=data.shape)
    for _ in range(num_samples):
        augmented_data.append(data + epsilon * noise)
        augmented_data.append(data - epsilon * noise)
    return np.vstack(augmented_data)[:target_size, :]

# Data augmentation for each category
augmented_positive_positive = augment_data(positive_positive, np.mean(positive_positive, axis=0), np.std(positive_positive, axis=0), [1, 1])
augmented_positive_negative = augment_data(positive_negative, np.mean(positive_negative, axis=0), np.std(positive_negative, axis=0), [1, -1])
augmented_negative_positive = augment_data(negative_positive, np.mean(negative_positive, axis=0), np.std(negative_positive, axis=0), [-1, 1])
augmented_negative_negative = augment_data(negative_negative, np.mean(negative_negative, axis=0), np.std(negative_negative, axis=0), [-1, -1])

# print(augmented_negative_negative[0])
# print(len(augmented_positive_positive), len(augmented_positive_negative), len(augmented_negative_positive), len(augmented_negative_negative))

In [136]:
augmented_data = np.vstack([augment_train_dataset, augmented_positive_positive, augmented_positive_negative, augmented_negative_positive, augmented_negative_negative])

# turn labels into integers again
augmented_data[:, -2:] = np.sign(augmented_data[:, -2:])

# Shuffle the augmented data
shuffled_indices = np.random.permutation(len(augmented_data))
augmented_train_dataset = augmented_data[shuffled_indices]
print(len(augment_train_dataset))
print(len(augmented_train_dataset))

110
510


In [137]:
pos_pos = augmented_train_dataset[(augmented_train_dataset[:, -2] == 1) & (augmented_train_dataset[:, -1] == 1)]
pos_neg = augmented_train_dataset[(augmented_train_dataset[:, -2] == 1) & (augmented_train_dataset[:, -1] == -1)]
neg_pos = augmented_train_dataset[(augmented_train_dataset[:, -2] == -1) & (augmented_train_dataset[:, -1] == 1)]
neg_neg = augmented_train_dataset[(augmented_train_dataset[:, -2] == -1) & (augmented_train_dataset[:, -1] == -1)]

print(len(pos_pos), len(pos_neg), len(neg_pos), len(neg_neg))

144 130 121 115


In [138]:
X = augmented_train_dataset[:, :-2]
y_valence = augmented_train_dataset[:, -2]
y_arousal = augmented_train_dataset[:, -1]

# Split the data into training and validation sets
X_train, X_val, y_valence_train, y_valence_val, y_arousal_train, y_arousal_val = train_test_split(
    X, y_valence, y_arousal, test_size=0.1, random_state=42
)

print("X_train shape:", X_train.shape)
print("X_val shape:", X_val.shape)
print("y_valence_train shape:", y_valence_train.shape)
print("y_valece_val shape:", y_valence_val.shape)

X_train shape: (459, 7)
X_val shape: (51, 7)
y_valence_train shape: (459,)
y_valece_val shape: (51,)


In [139]:
# Create a classifier: a support vector classifier
svm_valence_classifier = SVC(kernel='linear', C =1.0, tol=1e-3)
svm_arousal_classifier = SVC(kernel='linear', C =1.0, tol=1e-3)

# Fit the classifier to the training data
svm_valence_classifier.fit(X_train, y_valence_train)
svm_arousal_classifier.fit(X_train, y_arousal_train)


# Predict the labels of the validation set
y_valence_pred = svm_valence_classifier.predict(X_val)
y_arousal_pred = svm_arousal_classifier.predict(X_val)

# Compute metrics
print("Accuracy valence: ", accuracy_score(y_valence_val, y_valence_pred))
print("Accuracy arousal: ", accuracy_score(y_arousal_val, y_arousal_pred))

Accuracy valence:  0.6078431372549019
Accuracy arousal:  0.8431372549019608


In [140]:
X_test = testDataset[:, :-2]
y_valence_test = testDataset[:, -2]
y_arousal_test = testDataset[:, -1]

# Predict the labels of the test set
y_valence_pred = svm_valence_classifier.predict(X_test)
y_arousal_pred = svm_arousal_classifier.predict(X_test)

# Compute metrics
print("Accuracy valence: ", accuracy_score(y_valence_test, y_valence_pred))
print("Accuracy arousal: ", accuracy_score(y_arousal_test, y_arousal_pred))

Accuracy valence:  0.46153846153846156
Accuracy arousal:  0.7692307692307693


## try with random forest

In [157]:
from sklearn.ensemble import RandomForestClassifier 

X_train = trainDataset[:, :-2]
y_valence_train = trainDataset[:, -2]
y_arousal_train = trainDataset[:, -1]
# y_train = np.column_stack((y_valence_train, y_arousal_train))

X_test = testDataset[:, :-2]
y_valence_test = testDataset[:, -2]
y_arousal_test = testDataset[:, -1]
# y_test = np.column_stack((y_valence_test, y_arousal_test))

print("X_train shape:", X_train.shape)
# print("X_val shape:", X_val.shape)
print("y_train shape:", y_valence_train.shape)
# print("y_arousal_val shape:", y_arousal_train.shape)

print("X_test shape:", X_test.shape)
print("y_test shape:", y_valence_test.shape)

X_train shape: (110, 7)
y_train shape: (110,)
X_test shape: (26, 7)
y_test shape: (26,)


In [158]:
from sklearn.metrics import mean_squared_error

# Create a classifier: a random forest classifier
valence_model = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=0)
arousal_model = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=0)

# Train the model
valence_model.fit(X_train, y_valence_train)
arousal_model.fit(X_train, y_arousal_train)

# Make predictions on the test set
valence_predictions = valence_model.predict(X_test)
arousal_predictions = arousal_model.predict(X_test)

# Evaluate the model
valence_mse = mean_squared_error(y_valence_test, valence_predictions)
arousal_mse = mean_squared_error(y_arousal_test, arousal_predictions)
print(f'Mean Squared Error valence: {valence_mse}')
print(f'Mean Squared Error arousal: {arousal_mse}')


Mean Squared Error valence: 1.5384615384615385
Mean Squared Error arousal: 1.0769230769230769
