In [112]:
%pip install music21 

Note: you may need to restart the kernel to use updated packages.


In [113]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from sklearn.metrics import classification_report


In [114]:

from imblearn.over_sampling import SMOTE

In [115]:
df = pd.read_csv('./data/preprocessed_data_with_midi.csv')

In [116]:
len(df['tempos'].head(1).values[0])

42

## More preprocessing. Zero pad arrays

In [117]:
MAX_LEN = 500  

In [118]:
df.head()

Unnamed: 0,midi_file,composer,path,notes,chords,tempos
0,Bwv0997 Partita for Lute 1mov.mid,Bach,./data/Bach/,"['C3', 'C5', 'D5', 'E-5', 'G5', 'B5', 'C6', 'B...","['9.10', '7.8', '6.9', '0.3', '0.6', '2', '7.1...","[80, 80, 60, 60, 120, 120, 60, 60, 80, 80]"
1,Bwv0535 Prelude and Fugue.mid,Bach,./data/Bach/,"['G3', 'D3', 'B-2', 'D3', 'G2', 'A3', 'B-3', '...","['2.7', '0.6', '7.10', '9', '7.10', '7.11', '7...","[80, 80, 80, 50, 50, 50, 65, 65, 65, 60, 60, 6..."
2,Bwv0806 English Suite n1 05mov.mid,Bach,./data/Bach/,"['A4', 'A4', 'A2', 'E4', 'C#4', 'A3', 'G#3', '...","['5.6', '1.2', '11.4', '1.2', '4.6', '2.4', '4...","[144, 144]"
3,Bwv0998 Prelude Fugue Allegro for Lute 3mov.mid,Bach,./data/Bach/,"['E-2', 'E-4', 'D4', 'C4', 'B-3', 'G#3', 'G3',...","['2.5', '7.8', '7.10', '4.7', '5.8']","[100, 100, 100, 8, 8, 8]"
4,Jesu Joy of Man Desiring.mid,Bach,./data/Bach/,"['G2', 'G1', 'G1', 'G4', 'G4', 'A4', 'A4', 'B4...","['11.0', '11.0', '11.0', '11.0', '11.0', '11.0...","[65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 6..."


In [119]:
print(len(df['notes'].values[0]))

6024


In [120]:
df['composer'].value_counts()

Bach         925
Mozart       257
Beethoven    212
Chopin       136
Name: composer, dtype: int64

In [121]:
counts = ['notes', 'chords', 'tempos']
for item in counts:
    lengths = df[item].apply(len)
    top_lengths = lengths.value_counts().sort_index(ascending=False).head(10)
    print(f"for {item}, top_lengths: {top_lengths}")

for notes, top_lengths: 186381    1
179790    1
178186    1
155545    1
117069    1
113611    1
112641    1
109857    1
105973    1
105425    1
Name: notes, dtype: int64
for chords, top_lengths: 71207    1
63651    1
59707    1
57630    1
55964    1
50682    1
49945    1
46977    1
46138    1
42725    1
Name: chords, dtype: int64
for tempos, top_lengths: 247395    1
232080    1
202202    1
156143    1
140998    1
134680    1
129366    1
116160    1
115121    1
104706    1
Name: tempos, dtype: int64


In [122]:
def encode_notes(notes):
    encoded = []
    for note in notes:
        try:
            pitch = music21.note.Note(note).pitch.midi
            encoded.append(pitch)
        except:
            encoded.append(0) 
    return encoded

def encode_chords(chords):
    encoded = []
    for chord in chords:
        try:
            chord_pitches = [int(p) for p in chord.split('.')]
            root = chord_pitches[0]
            encoded.append(root)
        except:
            encoded.append(0)  
    return encoded


In [104]:
df['encoded_notes'] = df['notes'].apply(encode_notes)
df['encoded_chords'] = df['chords'].apply(encode_chords)

In [105]:
def pad_truncate_sequences(sequences, max_len=MAX_LEN):
    return pad_sequences(sequences, maxlen=max_len, padding='post', truncating='post')


In [106]:
df['tempos'] = df['tempos'].apply(eval)


In [107]:
X_notes = pad_truncate_sequences(df['encoded_notes'])
X_chords = pad_truncate_sequences(df['encoded_chords'])
X_tempos = pad_truncate_sequences(df['tempos'])


In [108]:
X = np.stack((X_notes, X_chords, X_tempos), axis=-1)


In [109]:
print(X.shape)

(1530, 500, 3)


In [110]:
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df['composer'])


In [111]:
def apply_smote(X, y):
    smote = SMOTE(random_state=42)  

    num_samples, num_timesteps, num_features = X.shape
    X_flatten = X.reshape(num_samples, -1)

    smote = SMOTE(random_state=42)
    X_smote, y_smote = smote.fit_resample(X_flatten, y)

    print(pd.Series(y_smote).value_counts())
    X_smote_reshaped = X_smote.reshape(X_smote.shape[0], num_timesteps, num_features)

    return X_smote_reshaped, y_smote

In [133]:
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)

# Need to apply smote to training before we convert to categorical
y_temp = to_categorical(y_temp, num_classes=num_classes)

X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)


In [134]:
X_train, y_train = apply_smote(X_train, y_train)

0    561
2    561
3    561
1    561
dtype: int64


In [135]:
num_classes = len(label_encoder.classes_)
y_train = to_categorical(y_train, num_classes=num_classes)

In [142]:
def build_cnn_model(input_shape, num_classes):
    model = Sequential([
        Conv1D(128, 3, activation='relu', input_shape=input_shape),
        MaxPooling1D(2),
        Dropout(0.5),
        Conv1D(128, 3, activation='relu'),
        MaxPooling1D(2),
        Dropout(0.5),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(num_classes, activation='softmax')
    ])
    
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [143]:
input_shape = (MAX_LEN, 3)
cnn_model = None
cnn_model = build_cnn_model(input_shape, num_classes)
cnn_model.summary()


Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_10 (Conv1D)          (None, 498, 128)          1280      
                                                                 
 max_pooling1d_10 (MaxPooli  (None, 249, 128)          0         
 ng1D)                                                           
                                                                 
 dropout_15 (Dropout)        (None, 249, 128)          0         
                                                                 
 conv1d_11 (Conv1D)          (None, 247, 128)          49280     
                                                                 
 max_pooling1d_11 (MaxPooli  (None, 123, 128)          0         
 ng1D)                                                           
                                                                 
 dropout_16 (Dropout)        (None, 123, 128)         

In [144]:
history = cnn_model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=30, batch_size=32)


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [145]:
test_loss, test_acc = cnn_model.evaluate(X_test, y_test, verbose=2)
print(f'Test accuracy: {test_acc}')

10/10 - 0s - loss: 2.2490 - accuracy: 0.6634 - 67ms/epoch - 7ms/step
Test accuracy: 0.6633986830711365


In [146]:
y_pred = cnn_model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

print(classification_report(y_true, y_pred_classes, target_names=label_encoder.classes_))

              precision    recall  f1-score   support

        Bach       0.83      0.85      0.84       186
   Beethoven       0.51      0.55      0.53        38
      Chopin       0.44      0.20      0.27        35
      Mozart       0.29      0.36      0.32        47

    accuracy                           0.66       306
   macro avg       0.52      0.49      0.49       306
weighted avg       0.66      0.66      0.66       306

