In [14]:
import numpy as np
import glob
import music21
import random
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, Embedding, Bidirectional, BatchNormalization
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.utils import to_categorical
import pickle
from keras.layers import GRU


In [27]:
SEQUENCE_LENGTH = 80
EPOCHS = 50
BATCH_SIZE = 64
GENERATED_LENGTH = 300
MIDI_PATH = "midi_files/*.mid"
OUTPUT_FILE = "local_files/generated.mid"

In [18]:
# -----------------------------
# STEP 2: Data Preprocessing
# -----------------------------
def parse_midi_files(midi_path):
    """Extract notes & chords from MIDI files"""
    notes = []
    for file in glob.glob(midi_path):
        midi = music21.converter.parse(file)
        notes_to_parse = None

        try:
            s2 = music21.instrument.partitionByInstrument(midi)
            notes_to_parse = s2.parts[0].recurse()
        except:
            notes_to_parse = midi.flat.notes

        for element in notes_to_parse:
            if isinstance(element, music21.note.Note):
                notes.append(str(element.pitch))
            elif isinstance(element, music21.chord.Chord):
                notes.append('.'.join(str(n) for n in element.normalOrder))
    return notes

notes = parse_midi_files(MIDI_PATH)
print(f"Total notes extracted: {len(notes)}")

Total notes extracted: 4993


In [20]:
# Saving notes for future use as a pickle file
with open('midi_files/input_notes.pkl', 'wb') as filepath:
    pickle.dump(notes, filepath)

In [21]:
# Mapping notes to integers
unique_notes = sorted(set(notes))
note_to_int = {note: i for i, note in enumerate(unique_notes)}
int_to_note = {i: note for i, note in enumerate(unique_notes)}


In [22]:
# Sequence preparation
network_input, network_output = [], []
for i in range(len(notes) - SEQUENCE_LENGTH):
    seq_in = notes[i:i+SEQUENCE_LENGTH]
    seq_out = notes[i+SEQUENCE_LENGTH]
    network_input.append([note_to_int[ch] for ch in seq_in])
    network_output.append(note_to_int[seq_out])

X = np.array(network_input)
y = to_categorical(network_output, num_classes=len(unique_notes))

print("Shape of Input:", X.shape)
print("Shape of Output:", y.shape)

Shape of Input: (4913, 80)
Shape of Output: (4913, 153)


In [23]:

# -----------------------------
# STEP 3: Advanced Model
# -----------------------------
model = Sequential([
    Embedding(len(unique_notes), 100, input_length=SEQUENCE_LENGTH),  # Embeddings for notes
    Bidirectional(GRU(256, return_sequences=True)),
    Dropout(0.3),
    BatchNormalization(),
    Bidirectional(GRU(256, return_sequences=True)),
    Dropout(0.3),
    BatchNormalization(),
    GRU(256),
    Dense(256, activation="relu"),
    Dropout(0.3),
    Dense(len(unique_notes), activation="softmax")
])

model.compile(loss="categorical_crossentropy", optimizer="adam")

# -----------------------------
# STEP 4: Training with Callbacks
# -----------------------------
checkpoint = ModelCheckpoint(
    "best_model.h5",
    monitor="loss",
    save_best_only=True,
    mode="min"
)

early_stop = EarlyStopping(
    monitor="loss",
    patience=10,
    restore_best_weights=True
)

reduce_lr = ReduceLROnPlateau(
    monitor="loss",
    factor=0.5,
    patience=5,
    verbose=1
)

history = model.fit(
    X, y,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=[checkpoint, early_stop, reduce_lr]
)



Epoch 1/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - loss: 4.1421



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m137s[0m 2s/step - loss: 4.1367 - learning_rate: 0.0010
Epoch 2/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 3.0044



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m113s[0m 1s/step - loss: 3.0037 - learning_rate: 0.0010
Epoch 3/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - loss: 2.5568



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m117s[0m 2s/step - loss: 2.5573 - learning_rate: 0.0010
Epoch 4/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 2.3145



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m100s[0m 1s/step - loss: 2.3145 - learning_rate: 0.0010
Epoch 5/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 2.0906



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 1s/step - loss: 2.0907 - learning_rate: 0.0010
Epoch 6/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - loss: 1.8389



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m121s[0m 2s/step - loss: 1.8392 - learning_rate: 0.0010
Epoch 7/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 1.6386



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m114s[0m 1s/step - loss: 1.6392 - learning_rate: 0.0010
Epoch 8/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 1.4592



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 1s/step - loss: 1.4600 - learning_rate: 0.0010
Epoch 9/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 1.3120



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m97s[0m 1s/step - loss: 1.3125 - learning_rate: 0.0010
Epoch 10/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 1.1411



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 1s/step - loss: 1.1421 - learning_rate: 0.0010
Epoch 11/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 1.0334



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m109s[0m 1s/step - loss: 1.0341 - learning_rate: 0.0010
Epoch 12/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 0.9246



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m111s[0m 1s/step - loss: 0.9254 - learning_rate: 0.0010
Epoch 13/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 0.8315



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m109s[0m 1s/step - loss: 0.8322 - learning_rate: 0.0010
Epoch 14/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 0.7442



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 1s/step - loss: 0.7448 - learning_rate: 0.0010
Epoch 15/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 0.6561



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 1s/step - loss: 0.6571 - learning_rate: 0.0010
Epoch 16/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 0.5948



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 1s/step - loss: 0.5953 - learning_rate: 0.0010
Epoch 17/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 0.5393



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 1s/step - loss: 0.5396 - learning_rate: 0.0010
Epoch 18/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 0.4824



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 1s/step - loss: 0.4830 - learning_rate: 0.0010
Epoch 19/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 0.4620



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 1s/step - loss: 0.4625 - learning_rate: 0.0010
Epoch 20/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 0.4372



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m107s[0m 1s/step - loss: 0.4377 - learning_rate: 0.0010
Epoch 21/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 0.3869



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m109s[0m 1s/step - loss: 0.3874 - learning_rate: 0.0010
Epoch 22/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 0.3950



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m111s[0m 1s/step - loss: 0.3952 - learning_rate: 0.0010
Epoch 23/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 0.3706



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m98s[0m 1s/step - loss: 0.3709 - learning_rate: 0.0010
Epoch 24/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 0.3304



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 1s/step - loss: 0.3307 - learning_rate: 0.0010
Epoch 25/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 0.3106



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m96s[0m 1s/step - loss: 0.3108 - learning_rate: 0.0010
Epoch 26/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 1s/step - loss: 0.3039 - learning_rate: 0.0010
Epoch 27/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 0.2924



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m109s[0m 1s/step - loss: 0.2927 - learning_rate: 0.0010
Epoch 28/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 0.2752



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m111s[0m 1s/step - loss: 0.2754 - learning_rate: 0.0010
Epoch 29/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 0.2517



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m112s[0m 1s/step - loss: 0.2521 - learning_rate: 0.0010
Epoch 30/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 0.2407



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m111s[0m 1s/step - loss: 0.2411 - learning_rate: 0.0010
Epoch 31/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m117s[0m 2s/step - loss: 0.2543 - learning_rate: 0.0010
Epoch 32/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 0.2351



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m111s[0m 1s/step - loss: 0.2354 - learning_rate: 0.0010
Epoch 33/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 1s/step - loss: 0.2488 - learning_rate: 0.0010
Epoch 34/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 0.2240



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m106s[0m 1s/step - loss: 0.2243 - learning_rate: 0.0010
Epoch 35/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 0.2010



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 1s/step - loss: 0.2015 - learning_rate: 0.0010
Epoch 36/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 0.2011



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 1s/step - loss: 0.2015 - learning_rate: 0.0010
Epoch 37/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m122s[0m 2s/step - loss: 0.2179 - learning_rate: 0.0010
Epoch 38/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - loss: 0.2160



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m117s[0m 2s/step - loss: 0.2163 - learning_rate: 0.0010
Epoch 39/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m115s[0m 1s/step - loss: 0.2268 - learning_rate: 0.0010
Epoch 40/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 0.1860



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m111s[0m 1s/step - loss: 0.1864 - learning_rate: 0.0010
Epoch 41/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m117s[0m 2s/step - loss: 0.2049 - learning_rate: 0.0010
Epoch 42/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m115s[0m 1s/step - loss: 0.1985 - learning_rate: 0.0010
Epoch 43/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m114s[0m 1s/step - loss: 0.1862 - learning_rate: 0.0010
Epoch 44/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m115s[0m 1s/step - loss: 0.1872 - learning_rate: 0.0010
Epoch 45/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 0.1998
Epoch 45: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m114s[0m 1s/step - loss: 0.2000 - learning_rate: 0.0010
Epoch 46/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - loss: 0.1459



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m115s[0m 1s/step - loss: 0.1460 - learning_rate: 5.0000e-04
Epoch 47/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - loss: 0.0965



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m121s[0m 2s/step - loss: 0.0967 - learning_rate: 5.0000e-04
Epoch 48/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - loss: 0.0961



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m122s[0m 2s/step - loss: 0.0961 - learning_rate: 5.0000e-04
Epoch 49/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - loss: 0.0837



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m124s[0m 2s/step - loss: 0.0838 - learning_rate: 5.0000e-04
Epoch 50/50
[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2s/step - loss: 0.0874



[1m77/77[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m121s[0m 2s/step - loss: 0.0875 - learning_rate: 5.0000e-04


In [25]:
# saving the model
model.save('local_files/music_model.keras')

In [26]:
# -----------------------------
# STEP 5: Advanced Music Generation
# -----------------------------
def sample_with_temperature(preds, temperature=1.0):
    """Sample an index from a probability array with temperature scaling"""
    preds = np.asarray(preds).astype("float64")
    preds = np.log(preds + 1e-8) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

def generate_notes(model, seed, length=200, temperature=1.0):
    """Generate notes from trained model"""
    generated = []
    pattern = seed.copy()

    for i in range(length):
        prediction_input = np.array(pattern).reshape(1, -1)
        prediction = model.predict(prediction_input, verbose=0)[0]
        index = sample_with_temperature(prediction, temperature)
        result = int_to_note[index]
        generated.append(result)
        pattern.append(index)
        pattern = pattern[1:]
    return generated

# Pick a random seed
start_index = np.random.randint(0, len(X)-1)
seed = network_input[start_index]
generated_notes = generate_notes(model, seed, length=GENERATED_LENGTH, temperature=0.8)


In [29]:
# -----------------------------
# STEP 6: Convert back to MIDI
# -----------------------------
from music21 import stream, note, chord, instrument

offset = 0
output_notes = []

for pattern in generated_notes:
    if '.' in pattern:  # chord
        notes_in_chord = pattern.split('.')
        chord_notes = []
        for n in notes_in_chord:
            try:
                # Convert pitch class to note name (C=0, C#=1, ..., B=11)
                pitch_class = int(n)
                note_name = music21.pitch.Pitch(pitch_class).name
                chord_note = note.Note(note_name + '4')  # default octave 4
            except Exception:
                chord_note = note.Note('C4')  # fallback to C4 if conversion fails
            chord_note.storedInstrument = instrument.Piano()
            chord_notes.append(chord_note)
        new_chord = chord.Chord(chord_notes)
        new_chord.offset = offset
        output_notes.append(new_chord)
    else:  # single note
        try:
            # Try to interpret as pitch class
            pitch_class = int(pattern)
            note_name = music21.pitch.Pitch(pitch_class).name
            new_note = note.Note(note_name + '4')
        except Exception:
            # If not an integer, try as note name directly
            try:
                new_note = note.Note(pattern)
            except Exception:
                new_note = note.Note('C4')
        new_note.offset = offset
        new_note.storedInstrument = instrument.Piano()
        output_notes.append(new_note)
    offset += 0.5

midi_stream = stream.Stream(output_notes)
midi_stream.write("midi", fp=OUTPUT_FILE)

print(f"✅ Generated music saved to {OUTPUT_FILE}")

✅ Generated music saved to local_files/generated.mid
