In [10]:
import os
import glob
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pretty_midi
from tqdm import tqdm

# Device setup
device = torch.device('cuda')

In [11]:
class MidiDataset(Dataset):
    def __init__(self, midi_dir, seq_len=200, step=0.1):
        self.paths = glob.glob(os.path.join(midi_dir, '*.midi'))
        self.seq_len = seq_len
        self.step = step  # seconds per timestep

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        pm = pretty_midi.PrettyMIDI(self.paths[idx])
        pr = pm.get_piano_roll(fs=1/self.step)
        pr = (pr > 0).astype(float)
        pr = pr[:, :self.seq_len]
        if pr.shape[1] < self.seq_len:
            pad = np.zeros((128, self.seq_len - pr.shape[1]))
            pr = np.concatenate([pr, pad], axis=1)
        # Split melody/harmony proxies:
        melody = pr[::2].T   # (T, 64)
        harmony = pr[1::2].T # (T, 64)
        return torch.from_numpy(melody), torch.from_numpy(harmony)

# Instantiate dataset & loader
seq_len = 200
midi_trimmed_dir = r"C:\Users\sugia\Desktop\UCSD\CSE 153\A2\maestro-v3.0.0\midi\trimmed_20s"
dataset = MidiDataset(midi_trimmed_dir, seq_len=seq_len)
# Ensure directory contains MIDI files
assert len(dataset) > 0, f"No MIDI files found in {midi_trimmed_dir}. Please check the directory path."
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)

class HarmonizerLSTM(nn.Module):
    def __init__(self, input_dim=64, hidden_dim=128, output_dim=64):
        super().__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.activation = nn.Sigmoid()

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out)
        return self.activation(out)

model = HarmonizerLSTM().to(device)
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

In [12]:
num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for mel, harm in tqdm(dataloader, desc=f"Epoch {epoch+1}"):
        mel = mel.float().to(device)
        harm = harm.float().to(device)
        pred = model(mel)
        loss = criterion(pred, harm)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1} avg loss: {total_loss/len(dataloader):.4f}")

Epoch 1: 100%|██████████| 80/80 [00:04<00:00, 18.96it/s]


Epoch 1 avg loss: 0.2035


Epoch 2: 100%|██████████| 80/80 [00:03<00:00, 20.14it/s]


Epoch 2 avg loss: 0.0698


Epoch 3: 100%|██████████| 80/80 [00:04<00:00, 19.47it/s]


Epoch 3 avg loss: 0.0682


Epoch 4: 100%|██████████| 80/80 [00:04<00:00, 19.87it/s]


Epoch 4 avg loss: 0.0671


Epoch 5: 100%|██████████| 80/80 [00:04<00:00, 18.41it/s]


Epoch 5 avg loss: 0.0663


Epoch 6: 100%|██████████| 80/80 [00:03<00:00, 20.36it/s]


Epoch 6 avg loss: 0.0657


Epoch 7: 100%|██████████| 80/80 [00:04<00:00, 18.17it/s]


Epoch 7 avg loss: 0.0653


Epoch 8: 100%|██████████| 80/80 [00:04<00:00, 17.38it/s]


Epoch 8 avg loss: 0.0649


Epoch 9: 100%|██████████| 80/80 [00:04<00:00, 18.61it/s]


Epoch 9 avg loss: 0.0644


Epoch 10: 100%|██████████| 80/80 [00:04<00:00, 18.10it/s]


Epoch 10 avg loss: 0.0642


Epoch 11: 100%|██████████| 80/80 [00:04<00:00, 18.16it/s]


Epoch 11 avg loss: 0.0638


Epoch 12: 100%|██████████| 80/80 [00:04<00:00, 16.38it/s]


Epoch 12 avg loss: 0.0635


Epoch 13: 100%|██████████| 80/80 [00:04<00:00, 19.17it/s]


Epoch 13 avg loss: 0.0633


Epoch 14: 100%|██████████| 80/80 [00:03<00:00, 20.20it/s]


Epoch 14 avg loss: 0.0631


Epoch 15: 100%|██████████| 80/80 [00:04<00:00, 19.40it/s]


Epoch 15 avg loss: 0.0629


Epoch 16: 100%|██████████| 80/80 [00:04<00:00, 18.99it/s]


Epoch 16 avg loss: 0.0628


Epoch 17: 100%|██████████| 80/80 [00:04<00:00, 18.22it/s]


Epoch 17 avg loss: 0.0626


Epoch 18: 100%|██████████| 80/80 [00:04<00:00, 18.81it/s]


Epoch 18 avg loss: 0.0625


Epoch 19: 100%|██████████| 80/80 [00:04<00:00, 18.87it/s]


Epoch 19 avg loss: 0.0623


Epoch 20: 100%|██████████| 80/80 [00:06<00:00, 12.28it/s]

Epoch 20 avg loss: 0.0623





In [18]:
# Generation & MIDI Export (corrected):
idx = 10
input_path = dataset.paths[idx]
print(f"Generating harmonization for: {input_path}")

# Load melody-only piano-roll
melody, _ = dataset[idx]              # shape: (T, 64)
melody = melody.unsqueeze(0).float().to(device)  # shape: (1, T, 64)

# Generate harmony
model.eval()
with torch.no_grad():
    raw = model(melody)[0].cpu().numpy()       # raw shape: (T, 64)
    pred_harm = raw.T                          # shape: (64, T)

# Reconstruct full piano-roll by interleaving melody & predicted harmony
full_roll = np.zeros((128, seq_len))
# Squeeze out batch dim before transpose: melody.squeeze(0) -> (T,64)
full_roll[::2] = melody.squeeze(0).cpu().numpy().T    # shape: (64, T)
full_roll[1::2] = (pred_harm > 0.5).astype(int)       # shape: (64, T)

# Convert piano-roll to PrettyMIDI
pm_out = pretty_midi.PrettyMIDI()
inst = pretty_midi.Instrument(program=0)
for pitch in range(128):
    frames = full_roll[pitch]
    # detect on/off transitions
    on_idxs = np.where((frames[:-1] == 0) & (frames[1:] == 1))[0] + 1
    off_idxs = np.where((frames[:-1] == 1) & (frames[1:] == 0))[0] + 1
    for on, off in zip(on_idxs, off_idxs):
        inst.notes.append(
            pretty_midi.Note(
                velocity=100,
                pitch=pitch,
                start=on * dataset.step,
                end=off * dataset.step
            )
        )
pm_out.instruments.append(inst)
pm_out.write('harmonized_output_NEW.mid')

print("Harmonization complete. Saved to 'harmonized_output_NEW.mid'.")

Generating harmonization for: C:\Users\sugia\Desktop\UCSD\CSE 153\A2\maestro-v3.0.0\midi\trimmed_20s\MIDI-UNPROCESSED_01-03_R1_2014_MID--AUDIO_03_R1_2014_wav--4.midi
Harmonization complete. Saved to 'harmonized_output_NEW.mid'.


In [15]:

import os
import numpy as np
import torch
import pretty_midi

# 6.1 Point this at any .mid/.midi file
input_path = r"C:\Users\sugia\Desktop\UCSD\CSE 153\A2\Twinkle Twinkle Little Star (MIDI Version).midi"
print(f"Harmonizing: {input_path}")

# 6.2 Load and get the full piano-roll
orig_pm = pretty_midi.PrettyMIDI(input_path)
fs      = 1.0 / dataset.step            # frames per second
pr_full = orig_pm.get_piano_roll(fs=fs) # shape (128, T_full)

T_full = pr_full.shape[1]
print(f"Original length: {T_full*dataset.step:.1f}s ({T_full} frames)")

# 6.3 Extract melody features (even rows → 64 dims)
mel = pr_full[::2, :].T.astype(np.float32)          # (T_full, 64)
mel_tensor = torch.from_numpy(mel).unsqueeze(0).to(device)  # (1, T_full, 64)

# 6.4 Predict harmony
model.eval()
with torch.no_grad():
    out      = model(mel_tensor)              # (1, T_full, 64)
    pred_h   = out[0].cpu().numpy().T         # (64, T_full)
    pred_bin = (pred_h > 0.5).astype(int)     # (64, T_full)

# 6.5 Reconstruct a full 128×T_full piano-roll
full_roll      = np.zeros((128, T_full), dtype=int)
full_roll[::2] = mel.T                       # melody back in
full_roll[1::2]= pred_bin                    # predicted harmony

# 6.6 Convert back to MIDI & save
pm_out = pretty_midi.PrettyMIDI()
inst   = pretty_midi.Instrument(program=0, name="Harmonized")
step   = dataset.step

for pitch in range(128):
    frames = full_roll[pitch]
    # pad to detect on/off
    padded = np.concatenate([[0], frames, [0]])
    on_ix  = np.where((padded[:-1]==0)&(padded[1:]==1))[0]
    off_ix = np.where((padded[:-1]==1)&(padded[1:]==0))[0]
    for on, off in zip(on_ix, off_ix):
        inst.notes.append(pretty_midi.Note(
            velocity=100,
            pitch=pitch,
            start=on  * step,
            end  =off * step
        ))

pm_out.instruments.append(inst)
out_file = os.path.splitext(os.path.basename(input_path))[0] + "_harmonized.mid"
pm_out.write(out_file)
print(f"Saved → {out_file} (≈ {T_full*step:.1f}s)")


Harmonizing: C:\Users\sugia\Desktop\UCSD\CSE 153\A2\Twinkle Twinkle Little Star (MIDI Version).midi
Original length: 53.0s (530 frames)
Saved → Twinkle Twinkle Little Star (MIDI Version)_harmonized.mid (≈ 53.0s)


In [None]:
# More Complex 

In [27]:
import os
import glob
import numpy as np
import pretty_midi
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Bidirectional, LSTM, TimeDistributed, Dense
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

In [25]:
# =====================
# Configuration
# =====================
MIDI_PATH = r"C:\Users\sugia\Desktop\UCSD\CSE 153\A2\maestro-v3.0.0\midi\trimmed_20s"
FRAME_RATE = 50        # lower frame rate for fewer time steps
SEQ_LENGTH = 100       # time steps per training sequence
N_NOTES = 128          # MIDI note range
BATCH_SIZE = 64        # larger batch for faster GPU utilization
EPOCHS = 10            # fewer epochs
MODEL_CHECKPOINT = "bilstm_harmony_best.h5"

# =====================
# Data Preprocessing
# =====================
def midi_to_piano_roll(pm: pretty_midi.PrettyMIDI, fs: int = FRAME_RATE) -> np.ndarray:
    """
    Convert a PrettyMIDI object into a piano-roll representation.
    Returns an array of shape (N_NOTES, T) where T = duration * fs.
    """
    return pm.get_piano_roll(fs=fs)


def load_data(midi_folder: str):
    """
    Load all MIDI files, extract melody and harmony tracks, convert to sequences.
    Assumes instrument 0 is melody; all others summed as harmony.
    Returns X (melody) and Y (harmony) as numpy arrays with shape (num_examples, SEQ_LENGTH, N_NOTES).
    """
    X_list, Y_list = [], []
    for path in glob.glob(os.path.join(midi_folder, "*.mid")) + glob.glob(os.path.join(midi_folder, "*.midi")):
        pm = pretty_midi.PrettyMIDI(path)
        # Melody = first instrument
        melody_roll = midi_to_piano_roll(pm.instruments[0])
        # Harmony = sum of all other instruments; if none, zeros
        if len(pm.instruments) > 1:
            harmony_roll = np.zeros_like(melody_roll)
            for inst in pm.instruments[1:]:
                harmony_roll += inst.get_piano_roll(fs=FRAME_RATE)
        else:
            harmony_roll = np.zeros_like(melody_roll)

        # Binarize
        melody_bin = (melody_roll > 0).astype(np.float32)
        harmony_bin = (harmony_roll > 0).astype(np.float32)

        T = melody_bin.shape[1]
        # Split into fixed-length segments
        for start in range(0, T - SEQ_LENGTH, SEQ_LENGTH):
            end = start + SEQ_LENGTH
            X_list.append(melody_bin[:, start:end].T)
            Y_list.append(harmony_bin[:, start:end].T)

    X = np.array(X_list)
    Y = np.array(Y_list)
    return X, Y

# Load and split dataset
data_X, data_Y = load_data(MIDI_PATH)
X_train, X_val, Y_train, Y_val = train_test_split(data_X, data_Y, test_size=0.1, random_state=42)

In [28]:
# =====================
# Model Definition (Smaller)
# =====================
model = Sequential([
    Bidirectional(LSTM(128, return_sequences=True), input_shape=(SEQ_LENGTH, N_NOTES)),
    Bidirectional(LSTM(128, return_sequences=True)),
    TimeDistributed(Dense(N_NOTES, activation='sigmoid'))
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

# =====================
# Training with Early Stopping
# =====================
checkpoint_cb = ModelCheckpoint(MODEL_CHECKPOINT, save_best_only=True, monitor='val_loss')
early_cb = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

model.fit(
    X_train, Y_train,
    validation_data=(X_val, Y_val),
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    callbacks=[checkpoint_cb, early_cb]
)
model.save("bilstm_harmony_final.h5")

Epoch 1/10
[1m162/162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 330ms/step - accuracy: 0.0014 - loss: 0.1446



[1m162/162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 342ms/step - accuracy: 0.0014 - loss: 0.1440 - val_accuracy: 0.0000e+00 - val_loss: 4.2603e-04
Epoch 2/10
[1m162/162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 394ms/step - accuracy: 0.0000e+00 - loss: 3.4713e-04



[1m162/162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 400ms/step - accuracy: 0.0000e+00 - loss: 3.4676e-04 - val_accuracy: 0.0000e+00 - val_loss: 1.9382e-04
Epoch 3/10
[1m162/162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 523ms/step - accuracy: 0.0000e+00 - loss: 1.6875e-04



[1m162/162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m87s[0m 535ms/step - accuracy: 0.0000e+00 - loss: 1.6863e-04 - val_accuracy: 0.0000e+00 - val_loss: 1.1295e-04
Epoch 4/10
[1m162/162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 420ms/step - accuracy: 0.0000e+00 - loss: 1.0170e-04



[1m162/162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 427ms/step - accuracy: 0.0000e+00 - loss: 1.0164e-04 - val_accuracy: 0.0000e+00 - val_loss: 7.4776e-05
Epoch 5/10
[1m162/162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 367ms/step - accuracy: 0.0000e+00 - loss: 6.8696e-05



[1m162/162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 373ms/step - accuracy: 0.0000e+00 - loss: 6.8662e-05 - val_accuracy: 0.0000e+00 - val_loss: 5.3468e-05
Epoch 6/10
[1m162/162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 389ms/step - accuracy: 0.0000e+00 - loss: 4.9779e-05



[1m162/162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 395ms/step - accuracy: 0.0000e+00 - loss: 4.9758e-05 - val_accuracy: 0.0000e+00 - val_loss: 4.0250e-05
Epoch 7/10
[1m162/162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 376ms/step - accuracy: 0.0000e+00 - loss: 3.7824e-05



[1m162/162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m62s[0m 383ms/step - accuracy: 0.0000e+00 - loss: 3.7810e-05 - val_accuracy: 0.0000e+00 - val_loss: 3.1422e-05
Epoch 8/10
[1m162/162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 396ms/step - accuracy: 0.0000e+00 - loss: 2.9734e-05



[1m162/162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 403ms/step - accuracy: 0.0000e+00 - loss: 2.9725e-05 - val_accuracy: 0.0000e+00 - val_loss: 2.5207e-05
Epoch 9/10
[1m162/162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 421ms/step - accuracy: 0.0000e+00 - loss: 2.3982e-05



[1m162/162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 434ms/step - accuracy: 0.0000e+00 - loss: 2.3975e-05 - val_accuracy: 0.0000e+00 - val_loss: 2.0649e-05
Epoch 10/10
[1m162/162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 515ms/step - accuracy: 0.0000e+00 - loss: 1.9729e-05



[1m162/162[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m98s[0m 529ms/step - accuracy: 0.0000e+00 - loss: 1.9724e-05 - val_accuracy: 0.0000e+00 - val_loss: 1.7200e-05




In [29]:
def generate_harmony(input_midi: str, output_midi: str, model_path: str = MODEL_CHECKPOINT):
    gen_model = load_model(model_path)
    pm = pretty_midi.PrettyMIDI(input_midi)
    melody_roll = midi_to_piano_roll(pm.instruments[0])
    melody_bin = (melody_roll > 0).astype(np.float32)
    T = melody_bin.shape[1]
    pad = (SEQ_LENGTH - (T % SEQ_LENGTH)) % SEQ_LENGTH
    if pad > 0:
        melody_bin = np.pad(melody_bin, ((0,0),(0,pad)), mode='constant')

    segments = [melody_bin[:, i:i+SEQ_LENGTH].T for i in range(0, melody_bin.shape[1], SEQ_LENGTH)]
    preds = gen_model.predict(np.array(segments), batch_size=1)
    harmony_bin = np.vstack(preds).T[:, :T]

    harmony_pm = pretty_midi.PrettyMIDI()
    piano_inst = pretty_midi.Instrument(program=0)
    times = np.arange(harmony_bin.shape[1]) / FRAME_RATE
    for note in range(N_NOTES):
        active = False
        for idx, t in enumerate(times):
            if harmony_bin[note, idx] > 0.5 and not active:
                active = True; start_t = t
            elif active and (harmony_bin[note, idx] <= 0.5 or idx == len(times)-1):
                piano_inst.notes.append(pretty_midi.Note(100, note, start_t, t))
                active = False
    harmony_pm.instruments.extend([piano_inst] + pm.instruments)
    harmony_pm.write(output_midi)
    print(f"Harmony generated and saved to {output_midi}")

In [32]:
# input_file = r"C:\Users\sugia\Desktop\UCSD\CSE 153\A2\maestro-v3.0.0\midi\trimmed_20s\MIDI-UNPROCESSED_01-03_R1_2014_MID--AUDIO_03_R1_2014_wav--4.midi"
# output_file = r"C:\Users\sugia\Desktop\UCSD\CSE 153\A2\sample_harmonization.midi"

input_file = r"C:\Users\sugia\Desktop\UCSD\CSE 153\A2\maestro-v3.0.0\midi\trimmed_20s\MIDI-Unprocessed_01_R1_2008_01-04_ORIG_MID--AUDIO_01_R1_2008_wav--1.midi"
output_file = r"C:\Users\sugia\Desktop\UCSD\CSE 153\A2\sample_harmonization2.midi"
generate_harmony(input_file, output_file)





[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step
Harmony generated and saved to C:\Users\sugia\Desktop\UCSD\CSE 153\A2\sample_harmonization2.midi
