In [36]:
import os
from pathlib import Path
from copy import deepcopy

import torch
if torch.cuda.is_available():
    device = torch.device("cuda")
    print('Using GPU via CUDA:', torch.cuda.get_device_name(0))
elif torch.backends.mps.is_available():
    device = torch.device("mps")
    print('Using GPU via MPS (Apple Silicon)')
else:
    device = torch.device("cpu")
    print('Using CPU')

# Use device like this:
# model.to(device)


from mido import MidiFile
from symusic import Score

from miditok import REMI, TokenizerConfig, TokSequence
from miditok.pytorch_data import DatasetMIDI, DataCollator
from miditok.utils import split_files_for_training

import miditoolkit
from miditoolkit import MidiFile
import json

import pretty_midi

from torch.utils.data import DataLoader

import torch
if torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")
from torch import nn
from torch.utils.data import Dataset, DataLoader
import json
from pathlib import Path
from typing import List

# Load miditok tokenizer
from miditok import REMI, TokenizerConfig, TokSequence
from miditoolkit import MidiFile, Instrument, Note






Using GPU via MPS (Apple Silicon)


In [38]:
# Using miditoolkit
path = os.path.join('best_model/symbolic_conditioned.mid')
midi_obj = miditoolkit.midi.parser.MidiFile(path)
midi_obj

ticks per beat: 480
max tick: 3208
tempo changes: 1
time sig: 1
key sig: 0
markers: 0
lyrics: False
instruments: 2

In [41]:
midi_obj.tempo_changes

[TempoChange(tempo=120.0, time=0)]

In [39]:
midi_obj.instruments[1].notes

[Note(velocity=93, pitch=59, start=66, end=82),
 Note(velocity=93, pitch=55, start=128, end=144),
 Note(velocity=93, pitch=55, start=164, end=180),
 Note(velocity=103, pitch=52, start=174, end=190),
 Note(velocity=89, pitch=64, start=174, end=190),
 Note(velocity=79, pitch=52, start=216, end=232),
 Note(velocity=89, pitch=43, start=236, end=252),
 Note(velocity=79, pitch=48, start=290, end=306),
 Note(velocity=79, pitch=49, start=300, end=316),
 Note(velocity=79, pitch=55, start=300, end=316),
 Note(velocity=79, pitch=58, start=308, end=324),
 Note(velocity=79, pitch=48, start=320, end=336),
 Note(velocity=79, pitch=48, start=350, end=366),
 Note(velocity=79, pitch=54, start=372, end=388),
 Note(velocity=79, pitch=48, start=372, end=388),
 Note(velocity=79, pitch=55, start=404, end=420),
 Note(velocity=79, pitch=62, start=404, end=420),
 Note(velocity=79, pitch=64, start=462, end=478),
 Note(velocity=79, pitch=51, start=462, end=478),
 Note(velocity=79, pitch=46, start=504, end=520),
 

In [30]:

tokenizer = REMI.from_pretrained("tokenizer.json")
# Invert the vocab dictionary: id → token
id_to_token = {v: k for k, v in tokenizer.vocab.items()}

# Load token IDs
import json
with open("tokenized_json/right_hand/acsrnade.json") as f:
    token_ids = json.load(f)

# Map first few token IDs to strings
token_strs = [id_to_token[token_id] for token_id in token_ids[:20]]

print(token_strs)



['Bar_None', 'TimeSig_4/4', 'Position_0', 'Tempo_121.29', 'Program_0', 'Pitch_55', 'Velocity_121', 'Duration_0.3.4', 'Program_0', 'Pitch_67', 'Velocity_121', 'Duration_0.3.4', 'Rest_0.3.8', 'Position_19', 'Program_0', 'Pitch_57', 'Velocity_123', 'Duration_0.1.4', 'Program_0', 'Pitch_69']


In [31]:
id_to_token

{0: 'PAD_None',
 1: 'BOS_None',
 2: 'EOS_None',
 3: 'MASK_None',
 4: 'Bar_None',
 5: 'Pitch_21',
 6: 'Pitch_22',
 7: 'Pitch_23',
 8: 'Pitch_24',
 9: 'Pitch_25',
 10: 'Pitch_26',
 11: 'Pitch_27',
 12: 'Pitch_28',
 13: 'Pitch_29',
 14: 'Pitch_30',
 15: 'Pitch_31',
 16: 'Pitch_32',
 17: 'Pitch_33',
 18: 'Pitch_34',
 19: 'Pitch_35',
 20: 'Pitch_36',
 21: 'Pitch_37',
 22: 'Pitch_38',
 23: 'Pitch_39',
 24: 'Pitch_40',
 25: 'Pitch_41',
 26: 'Pitch_42',
 27: 'Pitch_43',
 28: 'Pitch_44',
 29: 'Pitch_45',
 30: 'Pitch_46',
 31: 'Pitch_47',
 32: 'Pitch_48',
 33: 'Pitch_49',
 34: 'Pitch_50',
 35: 'Pitch_51',
 36: 'Pitch_52',
 37: 'Pitch_53',
 38: 'Pitch_54',
 39: 'Pitch_55',
 40: 'Pitch_56',
 41: 'Pitch_57',
 42: 'Pitch_58',
 43: 'Pitch_59',
 44: 'Pitch_60',
 45: 'Pitch_61',
 46: 'Pitch_62',
 47: 'Pitch_63',
 48: 'Pitch_64',
 49: 'Pitch_65',
 50: 'Pitch_66',
 51: 'Pitch_67',
 52: 'Pitch_68',
 53: 'Pitch_69',
 54: 'Pitch_70',
 55: 'Pitch_71',
 56: 'Pitch_72',
 57: 'Pitch_73',
 58: 'Pitch_74',
 59: '

In [32]:
# For trained tokenizer with BPE/multi-vocab setup
id_to_token = {v: k for k, v in tokenizer._vocab_base.items()}  # try this first

# Fallback if that doesn't work
if not id_to_token:
    print('doesnt work')
    id_to_token = {v: k for k, v in tokenizer.vocab_bpe.items()}


In [33]:
len(id_to_token.keys())

898