Symbolic Unconditional Music Generator
=====

In [1]:
#Important Imports can add more as needed
#Easier Importing you can type "pip install -r imports.txt" in the terminal
#these are imports from workbook3
import glob
import random
from typing import List
from collections import defaultdict

import numpy as np
from numpy.random import choice

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from symusic import Score
from miditok import REMI, TokenizerConfig

#additional imports for the LSTM model
from miditok import TokSequence
from midi_utils import midi_to_note_duration_sequence, note_sequence_to_midi
#from tensorflow.keras.models import Sequential
#from tensorflow.keras.layers import LSTM, Dense, Dropout

## 1. Load MIDI Files

In [44]:
#random seed can be changed to get different results, default its 42
#from CSE_153R Homework 3(Spring 2025)
random.seed(42)

#Loading the music data
#first is popular pop songs I enjoy
#midi_files = glob('data/*.mid')
#same midi file data We have trained on for Assigment 1 Task 1
#midi_files = glob('Assignment1(Task1_midis)/*.midi')
#Lofi music data I got from kaggle
midi_files = glob.glob('Lo-Fi/*.mid')
#print(midi_files)
len(midi_files)
    

93

### We are focusing on only Grand Piano, which is a very commonly used instrument for Lofti music. Our Whole training data uses only Grand Piano, which is program 0 in MIDI files.

In [47]:
def extract_instrument_stats(midi_files):
    program_counts = defaultdict(int)
    
    for file_path in midi_files:
        score = Score(file_path)
        for track in score.tracks:
            # Get program number (0-127)
            program = track.program
            # Count notes in this track
            note_count = len(track.notes)
            program_counts[program] += note_count
    
    return program_counts

instrument_stats = extract_instrument_stats(midi_files)

# Sort if training data contains more than 1 programs, in this case no.
top_programs = sorted(instrument_stats.items(), key=lambda x: x[1], reverse=True)[:4]

print("\n instrument programs used by training data:")
for i, (program, count) in enumerate(top_programs, 1):
    print(f"{i}. Program {program}: {count} notes")


 instrument programs used by training data:
1. Program 0: 4060 notes


# 2. Training A Tokenizer With REMI Method
 - Configures MIDI tokenizer with simplified settings：
 
     - Ignore how hard notes are pressed

     - Don't treat chords as single items

     - Ignore instrument types

 - Initializes REMI tokenizer

 - Trains tokenizer on the MIDI files with 1000-token vocabulary

In [36]:
note_config = TokenizerConfig(num_velocities=1, use_chords=False, use_programs=False)
note_tokenizer = REMI(note_config)
note_tokenizer.train(vocab_size=1000, files_paths=midi_files)

### Example of tokenization on the first MIDI file

In [37]:
# e.g.:
midi = Score(midi_files[0])
tokens = note_tokenizer(midi)[0].tokens
tokens[:10]

['Bar_None',
 'Position_0',
 'Pitch_47',
 'Velocity_127',
 'Duration_0.3.8',
 'Pitch_54',
 'Velocity_127',
 'Duration_0.3.8',
 'Pitch_62',
 'Velocity_127']

# 3. Extracting The Notes
 - Define helper function that extracts pitch numbers from tokenized MIDI

 - Aggregate all notes from all MIDI files into single list

In [22]:
def note_extraction(midi_file):
    score = Score(midi_file)
    tokens = note_tokenizer(score)[0].tokens
    pitches = []
    #only grabs the number if the starting word is "Pitch"
    for token in tokens:
        if token.startswith('Pitch'):
            # Extract the note number from the token
            note_number = int(token.split('_')[1])
            pitches.append(note_number)
    return pitches

#Getting the list of notes as Corpus
#create an Array of Notes from each midi file
notes = []
total_notes = 0
for file in midi_files:
    Corpus = note_extraction(file)
    notes += Corpus
    total_notes += sum(Corpus)
print("Each midifile notes: ", notes)
print("length of MifiFile notes: ", len(notes))
print("Total notes in the dataset: ", total_notes)

Each midifile notes:  [47, 54, 62, 69, 73, 47, 59, 62, 66, 69, 52, 59, 62, 67, 71, 74, 50, 61, 66, 69, 76, 47, 54, 62, 69, 73, 47, 59, 62, 66, 69, 52, 59, 62, 67, 71, 74, 50, 61, 66, 69, 76, 47, 54, 62, 69, 73, 45, 69, 72, 64, 83, 86, 64, 67, 76, 52, 64, 67, 71, 86, 78, 66, 76, 71, 79, 45, 69, 72, 64, 83, 86, 64, 67, 76, 52, 64, 67, 71, 86, 78, 66, 76, 79, 43, 62, 67, 71, 78, 71, 67, 62, 53, 65, 69, 72, 76, 74, 79, 67, 43, 62, 67, 71, 78, 71, 67, 62, 53, 65, 69, 72, 76, 74, 79, 67, 41, 60, 65, 69, 76, 50, 62, 65, 69, 72, 52, 64, 67, 71, 79, 84, 71, 46, 65, 70, 73, 75, 53, 65, 68, 72, 53, 65, 68, 72, 77, 46, 65, 70, 73, 77, 82, 46, 65, 70, 73, 75, 53, 65, 68, 72, 53, 65, 68, 72, 77, 46, 65, 70, 73, 77, 82, 36, 60, 64, 55, 69, 38, 62, 65, 57, 67, 40, 64, 57, 59, 74, 43, 55, 59, 62, 64, 69, 45, 64, 69, 72, 79, 83, 64, 67, 48, 64, 67, 72, 86, 60, 69, 47, 66, 71, 74, 81, 41, 60, 65, 69, 76, 79, 50, 62, 65, 69, 72, 52, 64, 67, 71, 74, 41, 60, 65, 69, 76, 41, 60, 65, 69, 76, 79, 50, 62, 65, 6

In [39]:
def extract_instrument_stats(midi_files):
    program_counts = defaultdict(int)
    
    for file_path in midi_files:
        score = Score(file_path)
        for track in score.tracks:
            # Get program number (0-127)
            program = track.program
            # Count notes in this track
            note_count = len(track.notes)
            program_counts[program] += note_count
    
    return program_counts

instrument_stats = extract_instrument_stats(midi_files)

# Get top 4 programs by note count
top_programs = sorted(instrument_stats.items(), key=lambda x: x[1], reverse=True)[:4]

print("\nTop 4 instrument programs by note count:")
for i, (program, count) in enumerate(top_programs, 1):
    print(f"{i}. Program {program}: {count} notes")


Top 4 instrument programs by note count:
1. Program 0: 4060 notes


# 4. Building the LSTM Model

### Preparation:
 - Set training sequence

 - Prepare input-target pairs for LSTM training
 
 - Convert data to PyTorch tensors

In [23]:
seq_length = 32  # Number of notes in each input sequence
step = 1

# Create input/output pairs
inputs = []
targets = []
for i in range(0, len(notes) - seq_length, step):
    inputs.append(notes[i:i+seq_length])
    targets.append(notes[i+seq_length])

inputs = torch.tensor(inputs, dtype=torch.long)
targets = torch.tensor(targets, dtype=torch.long)

### Define LSTM Model
 - Define architectures
 
 - Use only last LSTM output for prediction

In [24]:
class MusicLSTM(nn.Module):
    def __init__(self, vocab_size, embedding_dim=64, hidden_dim=128, num_layers=2):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, vocab_size)

    def forward(self, x, hidden=None):
        x = self.embedding(x)
        out, hidden = self.lstm(x, hidden)
        out = self.fc(out[:, -1, :])  # Use only the last output for prediction
        return out, hidden

# 5. Setting Up The Training

### Initiallize Model
 - vocabulary size = maximum MIDI pitch

 - use GPU if available

 - use Adam optimizer with 0.01 learning rate
 
 - Use cross-entropy loss

In [25]:
vocab_size = max(notes) + 1  # Or len(set(notes))
model = MusicLSTM(vocab_size)
#default use GPU unless not avaliable
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()

### Training Loop

In [26]:
num_epochs = 50
batch_size = 64

for epoch in range(num_epochs):
    permutation = torch.randperm(inputs.size(0))
    epoch_loss = 0
    for i in range(0, inputs.size(0), batch_size):
        idx = permutation[i:i+batch_size]
        batch_x = inputs[idx].to(device)
        batch_y = targets[idx].to(device)

        optimizer.zero_grad()
        outputs, _ = model(batch_x)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}")

Epoch 1/50, Loss: 232.4985
Epoch 2/50, Loss: 185.0279
Epoch 3/50, Loss: 151.3165
Epoch 4/50, Loss: 123.8603
Epoch 5/50, Loss: 97.6598
Epoch 6/50, Loss: 78.1883
Epoch 7/50, Loss: 58.6773
Epoch 8/50, Loss: 45.2282
Epoch 9/50, Loss: 36.0122
Epoch 10/50, Loss: 28.1711
Epoch 11/50, Loss: 20.9218
Epoch 12/50, Loss: 14.8224
Epoch 13/50, Loss: 10.6167
Epoch 14/50, Loss: 8.7292
Epoch 15/50, Loss: 7.2623
Epoch 16/50, Loss: 5.4634
Epoch 17/50, Loss: 4.6665
Epoch 18/50, Loss: 3.5587
Epoch 19/50, Loss: 2.9230
Epoch 20/50, Loss: 2.6567
Epoch 21/50, Loss: 2.3352
Epoch 22/50, Loss: 2.2434
Epoch 23/50, Loss: 2.0533
Epoch 24/50, Loss: 1.9502
Epoch 25/50, Loss: 1.8854
Epoch 26/50, Loss: 2.0841
Epoch 27/50, Loss: 2.4174
Epoch 28/50, Loss: 5.6466
Epoch 29/50, Loss: 67.8503
Epoch 30/50, Loss: 137.5515
Epoch 31/50, Loss: 105.3452
Epoch 32/50, Loss: 72.1600
Epoch 33/50, Loss: 48.0394
Epoch 34/50, Loss: 32.2514
Epoch 35/50, Loss: 22.6662
Epoch 36/50, Loss: 14.0857
Epoch 37/50, Loss: 9.4736
Epoch 38/50, Loss: 6

# 6. Generate Music

### MIDI Export Helper Functions

 - `note_sequence_to_midi()` : creates a single-instrument midi

 - `multi_instrument_sequence_to_midi()` : creates a multi-instrument midi by creating seperate tracks for each instrument

 - Lo-Fi specific settings:

     - Slower tempo (750000 μs/beat ≈ 80 BPM)

     - Longer note durations

In [27]:
#change the tempo for a slower BPM
#increase note_length for longer notes, more lofi feel
#AI suggest these programs for a Lo-Fi type of music
"""
program 0: is the Grand Piano
program 4: is the Electric Piano
program 6: is the Harpsichord
program 11: is the Vibraphone
program 24: is the Acoustic Guitar (nylon)
program 32: is the Acosutic Bass
program 48: is the String Ensemble 1
"""
def note_sequence_to_midi(note_sequence, filename, velocity=64, tempo=750000, note_length=240, program=24):
    import mido
    mid = mido.MidiFile()
    track = mido.MidiTrack()
    mid.tracks.append(track)
    track.append(mido.MetaMessage('set_tempo', tempo=tempo))
    track.append(mido.Message('program_change', program=program, time=0))
    
    for note in note_sequence:
        track.append(mido.Message('note_on', note=note, velocity=velocity, time=0))
        track.append(mido.Message('note_off', note=note, velocity=velocity, time=note_length))
    mid.save(filename)

In [28]:
# I had AI generate a function that allowed me to create a midi file with multiple instruments
# instead of my default one that only allows one single instrument
def multi_instrument_sequence_to_midi(note_sequences, programs, filename, velocity=64, tempo=750000, note_length=240):
    """
    note_sequences: list of lists, each sublist is a sequence of notes for one instrument
    programs: list of program numbers (0-127), one for each instrument
    """
    import mido
    mid = mido.MidiFile()
    # I am guessing here is where it iterates through each program/instrument
    for channel, (notes, program) in enumerate(zip(note_sequences, programs)):
        track = mido.MidiTrack()
        mid.tracks.append(track)
        track.append(mido.MetaMessage('set_tempo', tempo=tempo))
        track.append(mido.Message('program_change', program=program, channel=channel, time=0))
        for note in notes:
            track.append(mido.Message('note_on', note=note, velocity=velocity, time=0, channel=channel))
            track.append(mido.Message('note_off', note=note, velocity=velocity, time=note_length, channel=channel))
    mid.save(filename)

### Music Generation Process:

1. Starts with seed sequence (first 32 notes)

2. Autoregressively predicts next notes

3. Uses probabilistic sampling instead of greedy algorithm

4. Generates 1024-note sequence

### Multi-instrument arrangement:

1. Splits notes into 7 parallel sequences

2. Assigns Lo-Fi appropriate instruments:

     - 4: Electric Piano

     - 5: Chorused Piano

     - 6: Harpsichord

     - 11: Vibraphone

     - 24: Acoustic Guitar

     - 32: Acoustic Bass

     - 48: Strings

3. Exports final MIDI file

In [43]:
def generate_sequence(model, seed, length=1024):
    model.eval()
    generated = list(seed)
    input_seq = torch.tensor(seed, dtype=torch.long).unsqueeze(0).to(device)
    hidden = None
    for _ in range(length):
        output, hidden = model(input_seq, hidden)
        probs = torch.softmax(output, dim=-1).detach().cpu().numpy().flatten()
        next_note = np.random.choice(np.arange(vocab_size), p=probs)
        generated.append(next_note)
        input_seq = torch.cat([input_seq[:, 1:], torch.tensor([[next_note]], device=device)], dim=1)
    return generated

seed = notes[:seq_length]
generated_notes = generate_sequence(model, seed, length = 1024)

#change this to match the amount of instrument you want to use
num_instruments = 7 
length = len(generated_notes) // num_instruments

# Truncate to a multiple of num_instruments to avoid uneven lists
trimmed_length = length * num_instruments
trimmed_notes = generated_notes[:trimmed_length]

# Group notes by time step: each sublist is [note1, note2, note3] for that tick
ticks = [trimmed_notes[i*num_instruments:(i+1)*num_instruments] for i in range(length)]

# Transpose: now each instrument gets its own list of notes
note_sequences = list(map(list, zip(*ticks)))

## Generate Multi-instrument piece

# Save generated notes as a MIDI file using your midi_utils
output_midi_path = "Lo-Fi_Generated_Music_Multi_Instrument.mid"
multi_instrument_sequence_to_midi(note_sequences,[4, 5, 6, 11, 24, 32, 48], output_midi_path)
print(f"Generated Multi-instrument MIDI saved as {output_midi_path}")

## Generate Single instrument piece, using Acoustic Guitar
output_midi_path = "Lo-Fi_Generated_Music_Single_Instrument.mid"
note_sequence_to_midi(generated_notes, output_midi_path, program=0)
print(f"Generated Single-instrument MIDI saved as {output_midi_path}")

Generated Multi-instrument MIDI saved as Lo-Fi_Generated_Music_Multi_Instrument.mid
Generated Single-instrument MIDI saved as Lo-Fi_Generated_Music_Single_Instrument.mid
