In [2]:
import pandas as pd
import numpy as np

In [3]:
import os

train_files = sorted([os.path.join('Data','train',f) for f in os.listdir(os.path.join('Data','train')) if f.endswith('.csv')])

test_files = sorted([os.path.join('Data','test',f) for f in os.listdir(os.path.join('Data','test')) if f.endswith('.csv')])

valid_files = sorted([os.path.join('Data','valid',f) for f in os.listdir(os.path.join('Data','valid')) if f.endswith('.csv')])

In [4]:
train_data = [pd.read_csv(f).values.tolist() for f in train_files]

test_data = [pd.read_csv(f).values.tolist() for f in test_files]

valid_data = [pd.read_csv(f).values.tolist() for f in valid_files]

In [5]:
from music21 import stream,chord

chorale = train_data[20]

s = stream.Stream()
for row in chorale:
    s.append(chord.Chord([n for n in row if n],quarterLength=1))

s.show('midi')

### Preprocessing

In [6]:
min_note,max_note =36,81
window_size,window_offset,batch_size = 32,16,32

def make_xy(chorales):
    windows = [c[i:i + window_size+1] for c in chorales for i in range(0,len(c) - window_size,window_offset)]

    data = np.array(windows,dtype=int)
    data = np.where(data == 0,0,data-min_note + 1)

    flat = data.reshape(data.shape[0],-1)

    return flat[:, :-1],flat[:, 1:]

x_train,y_train = make_xy(train_data)
x_test,y_test = make_xy(test_data)
x_valid,y_valid = make_xy(valid_data)


### Training the Model

In [7]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D,Dense,Embedding,LSTM,Dropout,BatchNormalization
from tensorflow.keras.optimizers import Nadam

In [8]:
model = Sequential()

model.add(Embedding(input_dim=47,output_dim=5,input_shape=[None]))
model.add(Conv1D(32,kernel_size=2,padding='causal',activation='relu'))
model.add(BatchNormalization())
model.add(Conv1D(48,kernel_size=2,padding='causal',activation='relu',dilation_rate=2))
model.add(BatchNormalization())
model.add(Conv1D(64,kernel_size=2,padding='causal',activation='relu',dilation_rate=4))
model.add(BatchNormalization())
model.add(Conv1D(96,kernel_size=2,padding='causal',activation='relu',dilation_rate=8))
model.add(BatchNormalization())
model.add(Conv1D(128,kernel_size=2,padding='causal',activation='relu',dilation_rate=16))
model.add(BatchNormalization())
model.add(Dropout(0.05))
model.add(LSTM(256,return_sequences=True))
model.add(Dense(47,activation='softmax'))

model.summary()

  super().__init__(**kwargs)


In [9]:
optimizer = Nadam(learning_rate=1e-3)
model.compile(loss='sparse_categorical_crossentropy',optimizer=optimizer,metrics=['accuracy'])
model.fit(x_train,y_train,epochs=20,validation_data=(x_valid,y_valid),batch_size=batch_size)

Epoch 1/20
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 180ms/step - accuracy: 0.5335 - loss: 1.8055 - val_accuracy: 0.0233 - val_loss: 4.0191
Epoch 2/20
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 163ms/step - accuracy: 0.7714 - loss: 0.8554 - val_accuracy: 0.0277 - val_loss: 5.5791
Epoch 3/20
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 139ms/step - accuracy: 0.7980 - loss: 0.7160 - val_accuracy: 0.0384 - val_loss: 5.2194
Epoch 4/20
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 132ms/step - accuracy: 0.8122 - loss: 0.6468 - val_accuracy: 0.0736 - val_loss: 4.0877
Epoch 5/20
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 133ms/step - accuracy: 0.8242 - loss: 0.5962 - val_accuracy: 0.2506 - val_loss: 2.5513
Epoch 6/20
[1m98/98[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 138ms/step - accuracy: 0.8320 - loss: 0.5596 - val_accuracy: 0.5081 - val_loss: 1.5720
Epoch 7/20
[1m98/98[

<keras.src.callbacks.history.History at 0x1bf4dee1250>

### Generating Music

In [10]:
def sample_next_note(probs):
    probabilities = np.asarray(probs,dtype=float)

    prob_sum = probabilities.sum()

    if prob_sum <= 0 or not np.isfinite(prob_sum):
        return int(np.argmax(probabilities))

    probabilities /= prob_sum

    return np.random.choice(len(probabilities), p=probabilities)



In [17]:
def generate_chorale(model,seed_chrods,length):
    token_sequence = np.array(seed_chrods,dtype=int)
    token_sequence = np.where(token_sequence == 0,0,token_sequence - min_note + 1)
    token_sequence = token_sequence.reshape(1,-1)

    for _ in range(length * 4):
        next_token_probabilities = model.predict(token_sequence)[0,-1]
        next_token = sample_next_note(next_token_probabilities)
        token_sequence = np.concatenate(([token_sequence,[[next_token]]]),axis=1)

    token_sequence = np.where(token_sequence == 0,0,token_sequence + min_note - 1)
    return token_sequence.reshape(-1,4)

In [18]:
seed_chords = test_data[2][:8]

chorale = seed_chords
s=stream.Stream()
for row in chorale:
    s.append(chord.Chord([n for n in row if n],quarterLength=1))

s.show('midi')

In [19]:
seed_chords = test_data[2][8:]
new_chorale = generate_chorale(model,seed_chords,56)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 393ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6

In [20]:
chorale = new_chorale.tolist()
s=stream.Stream()
for row in chorale:
    s.append(chord.Chord([n for n in row if n],quarterLength=1))

s.show('midi')