In [1]:
import os
import torch
import torchaudio
from torch.utils.data import Dataset, DataLoader, random_split, TensorDataset
import torch.nn as nn
import torch.nn.functional as F
from torchaudio.transforms import MelSpectrogram, AmplitudeToDB
from tqdm import tqdm
import librosa
import numpy as np
import miditoolkit
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, average_precision_score, accuracy_score
import random
import pretty_midi

from symusic import Score
from miditok import REMI, TokenizerConfig
from midiutil import MIDIFile
from glob import glob
# used chatgpt to help me generate some functions

  import pkg_resources


In [2]:
# Processing the midi files
midi_files = glob('nes_midis/*')
print(len(midi_files))

config = TokenizerConfig(num_velocities=1)
tokenizer = REMI(config)
tokenizer.train(vocab_size = 2000, files_paths=midi_files)


2000


In [3]:
print("CUDA available:", torch.cuda.is_available())
print("Device name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU found")
print("Torch CUDA version:", torch.version.cuda)
instruments = {}
bad_files = []

for file in midi_files:
    try:
        midi = pretty_midi.PrettyMIDI(file)
        for instrument in midi.instruments:
            name = pretty_midi.program_to_instrument_name(instrument.program)
            instruments[name] = instruments.get(name, 0) + 1
    except Exception as e:
        bad_files.append(file)

sorted_instruments = sorted(instruments.items(), key=lambda x: x[1], reverse=True)
midi_files = [file for file in midi_files if file not in bad_files]


CUDA available: True
Device name: NVIDIA GeForce GTX 1660 SUPER
Torch CUDA version: 11.8




In [None]:
# Using the top 20 instruments to condense the instrument types
useful_instruments = set(name for name, _ in sorted_instruments[:20]) 

# extracts only the notes where the instruments are useful
def extract_note_sequence(midi_path):
    midi = pretty_midi.PrettyMIDI(midi_path)
    notes = []
    for instrument in midi.instruments:
        if not instrument.is_drum:
            notes += [note.pitch for note in instrument.notes]
    return notes

#extract_notes(midi_files[1])

In [None]:
from torch.utils.data import Dataset

class MIDIDataset(Dataset):
    def __init__(self, midi_dir, vocab, seq_len=128):
        self.data = []
        self.vocab = vocab
        self.seq_len = seq_len
        self.pitch2idx = {p: i for i, p in enumerate(vocab)}

        for file in os.listdir(midi_dir):
            if file.endswith('.mid') or file.endswith('.midi'):
                path = os.path.join(midi_dir, file)
                notes = extract_note_sequence(path)
                encoded = [self.pitch2idx[n] for n in notes if n in self.pitch2idx]

                for i in range(0, len(encoded) - seq_len):
                    x = encoded[i:i+seq_len]
                    y = encoded[i+1:i+1+seq_len]
                    self.data.append((torch.tensor(x), torch.tensor(y)))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]


In [None]:
dataset = MIDIDataset(midi_dir="your/midi/path", vocab=your_vocab)
loader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=2, pin_memory=True)


In [7]:
class PitchLSTM(nn.Module):
    def __init__(self, vocab_size, embed_dim=128, hidden_dim=256, num_layers=2):
        super().__init__()
        self.embed = nn.Embedding(vocab_size, embed_dim)
        self.lstm = nn.LSTM(embed_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, vocab_size)

    def forward(self, x, hidden=None):
        x = self.embed(x)
        out, hidden = self.lstm(x, hidden)
        out = self.fc(out[:, -1, :])  # use the last output for prediction
        return out, hidden


In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = PitchLSTM(vocab_size=len(vocab))
model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

for epoch in range(5):
    for batch_x, batch_y in loader:
        batch_x = batch_x.to(device)
        batch_y = batch_y.to(device)

        optimizer.zero_grad()
        out, _ = model(batch_x)
        loss = criterion(out, batch_y)
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch} | Loss: {loss.item():.4f}")





KeyboardInterrupt: 