In [11]:
from GPTmuseVAE import GPTmuseVAE

import os
import torch
from GPTmuseVAE import GPTmuseVAE
from miditok.pytorch_data import DatasetTok
from miditok import REMI
from torchtoolkit.data import create_subsets
from pathlib import Path
from utils import *
import pygame
from pygame import mixer

In [12]:
# Model Hyperparameters
n_embd = 64
n_head = 8
n_layer = 4
z_dim = 16
block_size = 254 # what is the maximum context length for predictions?
dropout = 0.2
########################

# Hyperparameters
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# ------------

In [13]:
tokenizer = REMI(params= Path('midi_dataset_tokenizer_bpe.conf'))
vocab_size = len(tokenizer)

tokens_paths = list(Path('midi_dataset_tokens_no_bpe').glob("**/*.json"))

dataset = DatasetTok(
    tokens_paths, 
    max_seq_len=block_size, # to make target and prediction match the song length of block size
    min_seq_len=block_size, 
    one_token_stream= False,
    func_to_get_labels = get_artist_label
)

Loading data: midi_dataset_tokens_no_bpe/midi_metal/Slayer:   0%|          | 0/511 [00:00<?, ?it/s]

Loading data: midi_dataset_tokens_no_bpe/midi_metal/Slayer: 100%|██████████| 511/511 [00:01<00:00, 447.60it/s]


In [14]:
model = GPTmuseVAE( vocab_size= len(tokenizer),
                    n_embd = n_embd,
                    n_head = n_head,
                    n_layer = n_layer,
                    z_dim = z_dim,
                    block_size = block_size,
                    dropout = dropout)


m = model.to(device)
print(sum(p.numel() for p in m.parameters())/1e6, 'M parameters')



0.481936 M parameters


In [15]:
loaded_state_dict = torch.load('checkpoints/checkpoint_6500.pt')
model.load_state_dict(loaded_state_dict['model_state_dict'])

<All keys matched successfully>

In [16]:
gen_seed = dataset[2000]['input_ids'].unsqueeze(0)
print(gen_seed.shape)

torch.Size([1, 254])


In [17]:
generated_sequence = model.generate(gen_seed[:], max_new_tokens=128)
print(generated_sequence[0].shape)
out = generated_sequence[0].cpu().numpy().tolist()
print(len(out))
gen_midi = tokenizer.tokens_to_midi(out)
gen_midi.dump('musicGPT.mid')

torch.Size([1, 382])
1


In [18]:
mixer.init()
mixer.music.load("musicGPT.mid")
mixer.music.play()

In [19]:
mixer.music.stop()

# Z manipulation

In [20]:
_ , small_data = create_subsets(dataset, [0.1])

In [21]:
z, labels = process_dataset_for_z(small_data)

z.shape

torch.Size([2834, 254])

In [22]:
z = model.sample_latent(z)

In [24]:
z.shape

torch.Size([2834, 254, 16])

In [25]:
pointer_dict = calculate_feature_pointers(z,labels)

In [26]:
pointer_dict.keys()

dict_keys(['Carcass', 'Bach', 'Dvorak', 'Brahms', 'Megadeth', 'Beethoven', 'Ravel', 'Mozart', 'Black_sabath', 'Judas Priest', 'Slayer', 'Schubert', 'Ozzy Osbourne', 'midi_pop_songs', 'Pantera', 'Cambini', 'Faure', 'Type O Negative', 'Children Of Bodom', 'Sepultura', 'Haydn'])

In [86]:
pointer = pointer_dict['Slayer']
song_flag = 450
input_block_size = block_size
magnitude = 1
max_new_tokens = 256
print(decode_artist_label(dataset[song_flag]['labels'],get_artist_label.artist_id_mapping))

Slayer


In [88]:
gen_seed = dataset[song_flag]['input_ids'].unsqueeze(0)
print(gen_seed.shape)
generated_sequence = model.generate(gen_seed[:input_block_size] ,max_new_tokens=max_new_tokens, latent_vector = pointer, magnitude = magnitude)
out = generated_sequence[0].cpu().numpy().tolist()
gen_midi = tokenizer.tokens_to_midi(out)
gen_midi.dump('musicGPT_latent.mid')

torch.Size([1, 254])


In [89]:
mixer.init()
mixer.music.load("musicGPT_latent.mid")
mixer.music.play()

In [None]:
mixer.music.stop()