In [35]:
pip install music21

Note: you may need to restart the kernel to use updated packages.


In [110]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from sklearn.metrics import classification_report


In [42]:
df = pd.read_csv('./data/preprocessed_data_with_midi.csv')

In [57]:
len(df['tempos'].head(1).values[0])

42

## More preprocessing. Zero pad arrays

In [58]:
MAX_LEN = 500  

In [63]:
df.head()

Unnamed: 0,midi_file,composer,path,notes,chords,tempos,encoded_notes,encoded_chords
0,Bwv0997 Partita for Lute 1mov.mid,Bach,./data/Bach/,"['C3', 'C5', 'D5', 'E-5', 'G5', 'B5', 'C6', 'B...","['9.10', '7.8', '6.9', '0.3', '0.6', '2', '7.1...","[80, 80, 60, 60, 120, 120, 60, 60, 80, 80]","[0, 0, 60, 0, 0, 0, 0, 0, 60, 0, 0, 0, 0, 0, 6...","[0, 0, 9, 0, 1, 0, 0, 0, 0, 0, 7, 0, 8, 0, 0, ..."
1,Bwv0535 Prelude and Fugue.mid,Bach,./data/Bach/,"['G3', 'D3', 'B-2', 'D3', 'G2', 'A3', 'B-3', '...","['2.7', '0.6', '7.10', '9', '7.10', '7.11', '7...","[80, 80, 80, 50, 50, 50, 65, 65, 65, 60, 60, 6...","[0, 0, 67, 0, 0, 0, 0, 0, 62, 0, 0, 0, 0, 0, 7...","[0, 0, 2, 0, 7, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, ..."
2,Bwv0806 English Suite n1 05mov.mid,Bach,./data/Bach/,"['A4', 'A4', 'A2', 'E4', 'C#4', 'A3', 'G#3', '...","['5.6', '1.2', '11.4', '1.2', '4.6', '2.4', '4...","[144, 144]","[0, 0, 69, 0, 0, 0, 0, 0, 69, 0, 0, 0, 0, 0, 6...","[0, 0, 5, 0, 6, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, ..."
3,Bwv0998 Prelude Fugue Allegro for Lute 3mov.mid,Bach,./data/Bach/,"['E-2', 'E-4', 'D4', 'C4', 'B-3', 'G#3', 'G3',...","['2.5', '7.8', '7.10', '4.7', '5.8']","[100, 100, 100, 8, 8, 8]","[0, 0, 64, 0, 0, 0, 0, 0, 0, 64, 0, 0, 0, 0, 0...","[0, 0, 2, 0, 5, 0, 0, 0, 0, 7, 0, 8, 0, 0, 0, ..."
4,Jesu Joy of Man Desiring.mid,Bach,./data/Bach/,"['G2', 'G1', 'G1', 'G4', 'G4', 'A4', 'A4', 'B4...","['11.0', '11.0', '11.0', '11.0', '11.0', '11.0...","[65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 65, 6...","[0, 0, 67, 0, 0, 0, 0, 0, 67, 0, 0, 0, 0, 0, 6...","[0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, ..."


In [68]:
print(len(df['notes'].values[0]))

6024


In [76]:
counts = ['notes', 'chords', 'tempos']
for item in counts:
    lengths = df[item].apply(len)
    top_lengths = lengths.value_counts().sort_index(ascending=False).head(10)
    print(f"for {item}, top_lengths: {top_lengths}")

for notes, top_lengths: 186381    1
179790    1
178186    1
155545    1
117069    1
113611    1
112641    1
109857    1
105973    1
105425    1
Name: notes, dtype: int64
for chords, top_lengths: 71207    1
63651    1
59707    1
57630    1
55964    1
50682    1
49945    1
46977    1
46138    1
42725    1
Name: chords, dtype: int64
for tempos, top_lengths: 247395    1
232080    1
202202    1
156143    1
140998    1
134680    1
129366    1
116160    1
115121    1
104706    1
Name: tempos, dtype: int64


In [46]:
def encode_notes(notes):
    encoded = []
    for note in notes:
        try:
            pitch = music21.note.Note(note).pitch.midi
            encoded.append(pitch)
        except:
            encoded.append(0) 
    return encoded

def encode_chords(chords):
    encoded = []
    for chord in chords:
        try:
            chord_pitches = [int(p) for p in chord.split('.')]
            root = chord_pitches[0]
            encoded.append(root)
        except:
            encoded.append(0)  
    return encoded


In [47]:
df['encoded_notes'] = df['notes'].apply(encode_notes)
df['encoded_chords'] = df['chords'].apply(encode_chords)

In [80]:
def pad_truncate_sequences(sequences, max_len=MAX_LEN):
    return pad_sequences(sequences, maxlen=max_len, padding='post', truncating='post')


In [82]:
df['tempos'] = df['tempos'].apply(eval)


In [83]:
X_notes = pad_truncate_sequences(df['encoded_notes'])
X_chords = pad_truncate_sequences(df['encoded_chords'])
X_tempos = pad_truncate_sequences(df['tempos'])


In [84]:
X = np.stack((X_notes, X_chords, X_tempos), axis=-1)


In [101]:
X.shape

(1530, 500, 3)

In [102]:
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df['composer'])
num_classes = len(label_encoder.classes_)
y = to_categorical(y, num_classes=num_classes)

In [103]:
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)


In [92]:
def build_cnn_model(input_shape, num_classes):
    model = Sequential([
        Conv1D(32, 3, activation='relu', input_shape=input_shape),
        MaxPooling1D(2),
        Dropout(0.3),
        Conv1D(64, 3, activation='relu'),
        MaxPooling1D(2),
        Dropout(0.3),
        Conv1D(128, 3, activation='relu'),
        MaxPooling1D(2),
        Dropout(0.3),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.3),
        Dense(num_classes, activation='softmax')
    ])
    
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [107]:
input_shape = (MAX_LEN, 3)
cnn_model = build_cnn_model(input_shape, num_classes)
cnn_model.summary()


Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_6 (Conv1D)           (None, 498, 32)           320       
                                                                 
 max_pooling1d_6 (MaxPoolin  (None, 249, 32)           0         
 g1D)                                                            
                                                                 
 dropout_12 (Dropout)        (None, 249, 32)           0         
                                                                 
 conv1d_7 (Conv1D)           (None, 247, 64)           6208      
                                                                 
 max_pooling1d_7 (MaxPoolin  (None, 123, 64)           0         
 g1D)                                                            
                                                                 
 dropout_13 (Dropout)        (None, 123, 64)          

In [108]:
history = cnn_model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=20, batch_size=32)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [109]:
test_loss, test_acc = cnn_model.evaluate(X_test, y_test, verbose=2)
print(f'Test accuracy: {test_acc}')

8/8 - 0s - loss: 0.7810 - accuracy: 0.6304 - 52ms/epoch - 6ms/step
Test accuracy: 0.6304348111152649


In [111]:
y_pred = cnn_model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

print(classification_report(y_true, y_pred_classes, target_names=label_encoder.classes_))

              precision    recall  f1-score   support

        Bach       0.71      0.98      0.83       140
   Beethoven       0.00      0.00      0.00        33
      Chopin       0.00      0.00      0.00        24
      Mozart       0.21      0.24      0.23        33

    accuracy                           0.63       230
   macro avg       0.23      0.31      0.26       230
weighted avg       0.46      0.63      0.53       230



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
