### Imports and Installations Required for Generating Music

In [32]:
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
import random
from collections import defaultdict
from numpy.random import choice
from symusic import Score
from miditok import REMI, TokenizerConfig
from midiutil import MIDIFile
import mido
from midi_utils import midi_to_note_duration_sequence, note_duration_sequence_to_midi
import glob

### Helper methods for sequence extraction:
 - `midi_to_note_duration_sequence()`: Extract (pitch, duration) tuples from a MIDI file.
 - `note_sequence_to_midi()`: Create a MIDI file from a sequence of notes, assigning a fixed duration (note_length) to all notes.
 - `note_duration_sequence_to_midi()` : Generate a MIDI file with custom durations for each note, allowing variable-length notes.

In [42]:
def midi_to_note_duration_sequence(filename):
    mid = mido.MidiFile(filename)
    notes = []
    abs_time = 0
    note_on_times = {}
    for msg in mid:
        abs_time += msg.time
        if msg.type == 'note_on' and msg.velocity > 0:
            note_on_times[msg.note] = abs_time
        elif (msg.type == 'note_off' or (msg.type == 'note_on' and msg.velocity == 0)) and msg.note in note_on_times:
            duration = abs_time - note_on_times[msg.note]
            notes.append((msg.note, int(duration * 480)))  # scale duration for MIDI ticks
            del note_on_times[msg.note]
    return notes

def note_sequence_to_midi(note_sequence, filename, velocity=64, tempo=500000, note_length=120):
    mid = mido.MidiFile()
    track = mido.MidiTrack()
    mid.tracks.append(track)
    track.append(mido.MetaMessage('set_tempo', tempo=tempo))
    for note in note_sequence:
        track.append(mido.Message('note_on', note=note, velocity=velocity, time=0))
        track.append(mido.Message('note_off', note=note, velocity=velocity, time=note_length))
    mid.save(filename)

def note_duration_sequence_to_midi(note_duration_sequence, filename, velocity=64, tempo=500000):
    mid = mido.MidiFile()
    track = mido.MidiTrack()
    mid.tracks.append(track)
    track.append(mido.MetaMessage('set_tempo', tempo=tempo))
    for note, duration in note_duration_sequence:
        track.append(mido.Message('note_on', note=note, velocity=velocity, time=0))
        track.append(mido.Message('note_off', note=note, velocity=velocity, time=duration))
    mid.save(filename)

### 1.Parameter Setup, defining constraints for quantization and model input/output:
 - `seq_length`: Context window for the LSTM.

 - `possible_durations`: Quantizes note durations to these values.

 - `min_pitch`/`max_pitch`: Restricts pitches to the piano range.

In [40]:
seq_length = 20
possible_durations = [120, 240, 360, 480]
min_pitch = 21
max_pitch = 108

# Random seed can be changed to get different results
# default is 42, from CSE_153R Homework 3(Spring 2025)
random.seed(42)

### 2.Encoding/Decoding Logic. Maps (pitch, duration) pairs to/from a single integer for model compatibility:
 - `encode_pair()`: Combines pitch and duration into a unique index.

 - `decode_pair()`: Reverses the encoding to recover original values.

In [41]:
def encode_pair(note, duration):
    return (note - min_pitch) * len(possible_durations) + possible_durations.index(duration)

def decode_pair(idx):
    pitch = min_pitch + idx // len(possible_durations)
    duration = possible_durations[idx % len(possible_durations)]
    return pitch, duration

### 3.Load the music data, extract the sequences, and encode the sequences
 - Quantizes raw durations to the nearest value in `possible_durations`.

 - Encodes all notes into a flat list of integers for training.

In [44]:
# first is popular pop songs I enjoy
midi_files = glob.glob('data/*.mid')
# same midi file data We have trained on for Assigment 1 Task 1
#midi_files = glob('Assignment1(Task1_midis)/*.midi')
len(midi_files)

sequences = [midi_to_note_duration_sequence(f) for f in midi_files]
encoded = [encode_pair(note, min(possible_durations, key=lambda x: abs(x-duration))) for seq in sequences for note, duration in seq]

### 4. Training Sequence Preparation
 - Create input-output pairs using a sliding window of `seq_length (20)`.
 - Example: If `encoded` = `[0, 1, 2, 3, 4]`, then `X = [[0,1,2], [1,2,3]]`, `y = [3, 4]`.

In [46]:
X, y = [], []
for i in range(len(encoded) - seq_length):
    X.append(encoded[i:i+seq_length])
    y.append(encoded[i+seq_length])
X = np.array(X)
y = np.array(y)

### 5. Model Architecture
 - Embedding: Converts encoded indices to dense vectors (dimensionality reduction).

 - LSTM: Learns temporal patterns in sequences.

 - Dense + Softmax: Predicts the probability distribution over possible pitch-duration pairs.

In [50]:
vocab_size = (max_pitch - min_pitch + 1) * len(possible_durations)
model = keras.Sequential([
    layers.Embedding(input_dim=vocab_size, output_dim=64, input_length=seq_length),
    layers.LSTM(128, return_sequences=False),
    layers.Dense(vocab_size, activation='softmax')
])

### 6. Training Configuration
 - Uses `sparse_categorical_crossentropy` since labels are integers (not one-hot).

 - Trains for 20 epochs with Adam optimizer.

In [51]:
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam')
model.fit(X, y, epochs=20, batch_size=64)

Epoch 1/20
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 13ms/step - loss: 4.1903
Epoch 2/20
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - loss: 2.9087
Epoch 3/20
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step - loss: 2.3554
Epoch 4/20
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - loss: 2.0626
Epoch 5/20
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - loss: 1.8522
Epoch 6/20
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - loss: 1.6766
Epoch 7/20
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - loss: 1.5174
Epoch 8/20
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - loss: 1.3693
Epoch 9/20
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step - loss: 1.2369
Epoch 10/20
[1m280/280[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 13ms

<keras.src.callbacks.history.History at 0x1c9e2e3c1a0>

### 7. Music Generation Loop
 - Autoregressive generation: Uses the last `seq_length` notes to predict the next note.

 - Greedy sampling (`argmax`) selects the most probable note at each step.

In [52]:
seed_idx = random.randint(0, len(X)-1)
seed = X[seed_idx]
generated = list(seed)
for _ in range(256):
    input_seq = np.array(generated[-seq_length:]).reshape(1, seq_length)
    pred = np.argmax(model.predict(input_seq, verbose=0))
    generated.append(pred)


### 8. Decode and Convert into MIDI file

In [53]:
decoded_sequence = [decode_pair(idx) for idx in generated]
note_duration_sequence_to_midi(decoded_sequence, 'lstm_generated.mid')
print("Generated sequence saved as lstm_generated.mid")

Generated sequence saved as lstm_generated.mid
