References:
https://www.youtube.com/watch?v=CbTU92pbDKw
https://arxiv.org/pdf/2203.12105

In [None]:
!pip install miditok miditoolkit muspy

In [None]:
from miditok import REMI, TokenizerConfig
from miditoolkit import MidiFile
from pathlib import Path
import numpy as np

# --- Tokenizer config ---
config = TokenizerConfig(
    use_chords=True,
    use_rests=True,
    use_tempos=True,
    use_time_signatures=True,
    nb_tempos=32,
    tempo_range=(40, 250),
    chord_types='all'
)

tokenizer = REMI(config)

# --- Tokenize all MIDI files directly in memory ---
midi_folder = Path("/kaggle/input/classical-piano-midi")
all_token_ids = []

for midi_path in midi_folder.glob("*.mid"):
    try:
        midi = MidiFile(midi_path)
        tokens = tokenizer(midi)  # may return list or single

        if isinstance(tokens, list):
            for seq in tokens:
                all_token_ids.extend(seq.ids)
        else:
            all_token_ids.extend(tokens.ids)

    except Exception as e:
        print(f"Error with {midi_path.name}: {e}")



In [None]:
seq_len = 64  # Sequence length
X, y = [], []

for i in range(len(all_token_ids) - seq_len):
    X.append(all_token_ids[i:i + seq_len])
    y.append(all_token_ids[i + seq_len])

X = np.array(X)
y = np.array(y)

print(f"X shape: {X.shape}, y shape: {y.shape}")

vocab_size = tokenizer.vocab_size

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, shuffle=False
)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf


# --- Define model ---
# model = Sequential([
#     layers.Input(shape=(3, 2)),
#     layers.Bidirectional(layers.LSTM(128, return_sequences=True)),
#     layers.Bidirectional(layers.LSTM(64)),
#     layers.BatchNormalization(),
#     layers.Dense(128, activation='relu'),
#     layers.Dropout(0.3),
#     layers.Dense(64, activation='relu'),
#     layers.Dense(2)
# ])

model = Sequential([
  layers.Embedding(input_dim=vocab_size, output_dim=256, input_length=seq_len),
  layers.LSTM(512, return_sequences=True),
  layers.LSTM(512, return_sequences=True),
  layers.LSTM(512),
  layers.Dropout(0.3),
  layers.BatchNormalization(),
  layers.Dense(256),
  layers.Dropout(0.3),
  layers.Dense(128),
  layers.Dropout(0.3),
  layers.Activation(activation='relu'),
  layers.BatchNormalization(),
  layers.Dense(vocab_size, activation='softmax')
])

model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=Adam(learning_rate=0.001),
    metrics=['accuracy']
)

# --- Early stopping ---
# early_stop = EarlyStopping(
#     monitor='val_loss',
#     patience=10,
#     restore_best_weights=True
# )

log_dir = "logs/fit/0.0001-300epoch"
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

# --- Train ---
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=100,
    # callbacks=[early_stop]
    callbacks=[tensorboard_callback]
)


In [None]:
import random
import numpy as np
import muspy

# 1. Generate tokens
seed = X[random.randint(0, len(X) - 1)].tolist()
generated = seed.copy()

seq_len = 64
num_tokens_to_generate = 200

for _ in range(num_tokens_to_generate):
    input_seq = np.array(generated[-seq_len:]).reshape(1, seq_len)
    preds = model.predict(input_seq, verbose=0)[0]
    next_token = np.argmax(preds)
    generated.append(next_token)

# Decode token sequence into ScoreTick
score_tick = tokenizer.decode([generated])

# Save MIDI file
score_tick.dump_midi("generated_music.mid")