In [None]:
import pandas as pd
import os
train_files = sorted([os.path.join("dataset", "train", filename) for filename in os.listdir(os.path.join("dataset", "train")) if filename.endswith('.csv')])
test_files = sorted([os.path.join("dataset", "test", filename) for filename in os.listdir(os.path.join("dataset", "test")) if filename.endswith('.csv')])
valid_files = sorted([os.path.join("dataset", "valid", filename) for filename in os.listdir(os.path.join("dataset", "valid")) if filename.endswith('.csv')])

In [None]:
train_data = [pd.read_csv(f).values.tolist() for f in train_files]
test_data = [pd.read_csv(f).values.tolist() for f in test_files]
test_data = [pd.read_csv(f).values.tolist() for f in test_files]


In [None]:
from music21 import stream, chord

chorale  = train_data[20]
s = stream.Stream()
for row in chorale:
    s.append(chord.Chord([n for n in row if n], quaterLength = 1))
s.show('midi')

In [None]:
import numpy as np 

min_note, max_note = 36, 81

window_size, window_offset, batch_size = 32, 16, 32

def make_xy(chorales):
    windows = [c[i: i+ window_size +1] for c in chorales for i in range(0, len(c) - window_size, window_offset)]
    data = np.array(windows, dtype=int)
    data = np.where(data == 0, 0, data - min_note+1)
    data = np.clip(data, 0, max_note - min_note +1)
    flat = data.reshape(data.shape[0], -1)
    return flat[:, :-1], flat[:, 1:]
x_train, Y_train = make_xy(train_data)
x_test, y_test = make_xy(test_data)
x_valid, y_valid = make_xy(test_data)

In [None]:
Y_train.shape

(3111, 131)

In [None]:
### Training the model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Dense, Embedding, LSTM, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Nadam

In [None]:
model = Sequential()

model.add(Embedding(input_dim=47, output_dim=5, input_shape = [None]))
model.add(Conv1D(32, kernel_size=2, padding='causal', activation='relu'))

model.add(BatchNormalization())
model.add(Conv1D(48, kernel_size=2, padding='causal', activation='relu', dilation_rate=2))

model.add(BatchNormalization())
model.add(Conv1D(64, kernel_size=2, padding='causal', activation='relu', dilation_rate=4))

model.add(BatchNormalization())
model.add(Conv1D(96, kernel_size=2, padding='causal', activation='relu', dilation_rate=8))


model.add(BatchNormalization())
model.add(Conv1D(128, kernel_size=2, padding='causal', activation='relu', dilation_rate=16))

model.add(BatchNormalization())
model.add(Dropout(0.05))
model.add(LSTM(256, return_sequences=True))
model.add(Dense(47, activation='softmax'))

model.summary()


  super().__init__(**kwargs)


In [None]:
optimizer = Nadam(learning_rate=1e-3)
model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer,  metrics=['accuracy'])
model.fit(x_train, Y_train, epochs = 20, validation_data=[x_valid, y_valid], batch_size=batch_size)

Epoch 1/20
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 155ms/step - accuracy: 0.5320 - loss: 1.8274 - val_accuracy: 0.0594 - val_loss: 3.8795
Epoch 2/20
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 165ms/step - accuracy: 0.7745 - loss: 0.8429 - val_accuracy: 0.0948 - val_loss: 3.9723
Epoch 3/20
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 162ms/step - accuracy: 0.8011 - loss: 0.7007 - val_accuracy: 0.1591 - val_loss: 3.9085
Epoch 4/20
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 163ms/step - accuracy: 0.8162 - loss: 0.6307 - val_accuracy: 0.2519 - val_loss: 3.1218
Epoch 5/20
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 161ms/step - accuracy: 0.8277 - loss: 0.5793 - val_accuracy: 0.4458 - val_loss: 1.9546
Epoch 6/20
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 166ms/step - accuracy: 0.8372 - loss: 0.5402 - val_accuracy: 0.6880 - val_loss: 1.0341
Epoch 7/20
[1m98/98[

<keras.src.callbacks.history.History at 0x17a773e00>

In [None]:
def sample_next_note(probs):
    probabilities = np.asarray(probs, dtype=float)
    prob_sum = probabilities.sum()
    if prob_sum <= 0 or not np.isfinite(prob_sum):
        return int(np.argmax(probabilities))
    probabilities /= prob_sum
    return np.random.choice(len(probabilities), p=probabilities)

In [None]:
def generate_chorale(mode, seed_chords, length):
    token_sequence = np.array(seed_chords, dtype=int)
    token_sequence = np.where(token_sequence == 0, 0, token_sequence - min_note +1)
    token_sequence = token_sequence.reshape(1, -1)

    for _ in range(length *4):
        next_token_probabilities = model.predict(token_sequence)[0, -1]
        next_token = sample_next_note(next_token_probabilities)
        token_sequence = np.concatenate([token_sequence, [[next_token]]], axis=1)
    token_sequence = np.where(token_sequence == 0, 0, token_sequence + min_note -1)
    return token_sequence.reshape(-1, 4)

In [None]:
seed_chords = test_data[3]

chorale = seed_chords
s = stream.Stream()
for row in chorale:
    s.append(chord.Chord([n for n in row if n], quaterLength=1))
s.show('midi')

In [None]:
seed_chords = test_data[3][:8]
new_chorale = generate_chorale(model, seed_chords, 56)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23

In [None]:
print(new_chorale)

[[73 68 61 53]
 [73 68 61 53]
 [73 68 61 53]
 [73 68 61 53]
 [69 66 61 54]
 [69 66 61 54]
 [69 66 61 54]
 [69 66 61 54]
 [68 64 59 56]
 [68 64 59 56]
 [68 64 59 56]
 [68 64 59 56]
 [66 64 61 57]
 [66 64 61 57]
 [66 62 61 57]
 [66 62 61 59]
 [64 62 59 59]
 [66 62 59 59]
 [66 62 59 59]
 [66 62 59 59]
 [66 62 59 59]
 [66 62 59 59]
 [66 62 59 59]
 [66 62 59 59]
 [66 62 59 59]
 [68 62 59 59]
 [68 62 59 59]
 [69 66 61 57]
 [69 66 61 57]
 [69 66 61 57]
 [69 66 61 57]
 [71 61 61 56]
 [71 61 61 56]
 [71 63 57 56]
 [71 62 57 56]
 [71 59 56 49]
 [71 59 56 49]
 [71 61 57 49]
 [71 61 57 49]
 [69 66 57 50]
 [69 66 57 50]
 [69 64 57 49]
 [69 64 57 49]
 [71 62 59 47]
 [71 62 59 47]
 [71 62 59 47]
 [71 62 59 47]
 [71 62 59 47]
 [71 62 59 47]
 [71 62 59 47]
 [71 62 59 47]
 [71 62 59 47]
 [71 62 59 47]
 [71 62 59 47]
 [71 62 59 47]
 [71 62 59 47]
 [71 62 59 47]
 [71 62 59 47]
 [71 62 59 47]
 [71 62 59 47]
 [71 62 59 47]
 [71 62 59 47]
 [71 62 59 47]
 [71 62 59 47]]


In [None]:
# Let's generate an exclusive song
chorale = new_chorale.tolist()
s = stream.Stream()
for row in chorale:
    s.append(chord.Chord([n for n in row if n], quaterLength=1))
s.show('midi')