Import libraries:

In [1]:
import torch
import numpy as np
import os
from torch.utils.data import Subset
from torch.utils.data import random_split
from torch_geometric.loader import DataLoader
from torch_geometric.data import Batch
import random
from matplotlib import pyplot as plt
from utils import set_seed

from data import MIDIDataset, graph_from_tensor, graph_from_tensor_torch
from model import VAE
from utils import plot_struct, dense_from_sparse, dense_from_sparse_torch, muspy_from_dense, muspy_from_dense_torch
from utils import plot_pianoroll, midi_from_muspy
from train import VAETrainer

Set global seed:

In [2]:
seed = 42
set_seed(seed)

Load model:

In [3]:
models_dir = 'models/'
model = 'LMD2'
gpu = True
device_idx = 3

checkpoint = torch.load(os.path.join(models_dir, model, 'checkpoint'), map_location='cpu')
state_dict = checkpoint['model_state_dict']
params = torch.load(os.path.join(models_dir, model, 'params'), map_location='cpu')

In [4]:
checkpoint.keys()

dict_keys(['epoch', 'batch', 'tot_batches', 'betas', 'min_val_loss', 'print_every', 'save_every', 'eval_every', 'lrs', 'tr_losses', 'tr_accuracies', 'val_losses', 'val_accuracies', 'model_state_dict', 'optimizer_state_dict'])

In [None]:
def print_decoder_keys(state_dict):
    i = 0

    for k in state_dict.keys():
        if 'decoder.' in k:
            print(k)
            i += 1

    print(i)

In [None]:
from collections import OrderedDict

model_dir = './models/2barsGNNDEFrefactoring'
model_name = 'prova'

renaming_dict = {
    "decoder.lin_divide.": "decoder.lin_decoder.",
    "decoder.bn_ld.": "decoder.batch_norm.",
    "decoder.bars_decoder_attr.": "decoder.c_decoder.bars_decoder.",
    "decoder.bars_decoder_struct.": "decoder.s_decoder.bars_decoder.",
    "decoder.cnn_decoder.": "decoder.s_decoder.cnn_decoder.",
    "decoder.graph_decoder.": "decoder.c_decoder.graph_decoder.",
    "decoder.chord_decoder.": "decoder.c_decoder.chord_decoder.",
    "decoder.drums_pitch_emb.": "decoder.c_decoder.drums_pitch_emb.",
    "decoder.notes_pitch_emb.": "decoder.c_decoder.non_drums_pitch_emb.",
    "decoder.dur_emb.": "decoder.c_decoder.dur_emb."
}

new_state_dict = OrderedDict()

for old_key, value in state_dict.items():
    new_key = old_key
    for old_sub_str, new_sub_str in renaming_dict.items():
        new_key = new_key.replace(old_sub_str, new_sub_str)  # Replace the old substrings with the new ones
    new_state_dict[new_key] = value

print_decoder_keys(new_state_dict)

 
checkpoint['model_state_dict'] = new_state_dict
#print_decoder_keys(checkpoint['model_state_dict'])
#state_dict = checkpoint['model_state_dict']

path = os.path.join(model_dir, model_name)
torch.save(checkpoint, path)

In [5]:
torch.cuda.set_device(device_idx)

device = torch.device("cuda") if gpu else torch.device("cpu")
print("Device:", device)
print("Device idx:", torch.cuda.current_device())

Device: cuda
Device idx: 3


In [6]:
params

{'training': {'batch_size': 256,
  'num_workers': 4,
  'ds_len': 6813946,
  'tr_len': 4769762,
  'vl_len': 681394,
  'ts_len': 1362790},
 'model': {'dropout': 0,
  'batch_norm': True,
  'gnn_n_layers': 8,
  'actsnn_n_layers': 2,
  'd': 512,
  'rnn_n_layers': 1,
  'k_isgn': 3,
  'd_token': 230,
  'd_token_pitches': 131,
  'd_token_dur': 99,
  'n_bars': 2,
  'n_relations': 6,
  'n_tracks': 4,
  'resolution': 8,
  'max_simu_notes': 16},
 'scheduler': {'peak_lr': 0.0001,
  'final_lr_scale': 0.01,
  'warmup_steps': 8000,
  'decay_steps': 800000},
 'optimizer': {'betas': (0.9, 0.98), 'eps': 1e-09, 'lr': 5e-06},
 'beta_annealing': {'beta_update': True,
  'anneal_start': 40000,
  'beta_max': 0.01,
  'step_size': 0.001,
  'anneal_end': 500000}}

In [7]:
vae = VAE(**params['model'], device=device).to(device)
vae.load_state_dict(state_dict)
vae.eval()

VAE(
  (encoder): Encoder(
    (dropout_layer): Dropout(p=0, inplace=False)
    (notes_pitch_emb): Linear(in_features=131, out_features=256, bias=True)
    (drums_pitch_emb): Linear(in_features=131, out_features=256, bias=True)
    (dur_emb): Linear(in_features=99, out_features=256, bias=True)
    (bn_npe): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (bn_dpe): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (bn_de): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (chord_encoder): Linear(in_features=7680, out_features=512, bias=True)
    (graph_encoder): GCN(
      (layers): ModuleList(
        (0): RGCNConv(512, 512, num_relations=6)
        (1): RGCNConv(512, 512, num_relations=6)
        (2): RGCNConv(512, 512, num_relations=6)
        (3): RGCNConv(512, 512, num_relations=6)
        (4): RGCNConv(512, 512, num_relations=6)
        (5): RGCNConv(512, 512, num_relations=6

## Generation
1. Sample z from normal.
2. Create and sample structure.
3. Create graph from sampled structure.
4. Generate content.
5. Put structure and content together and create muspy music.

In [None]:
def generate_music(vae, z):
    
    # Get structure and content logits
    with torch.cuda.amp.autocast():
        _, c_logits, s_tensor = vae.decoder(z)
    
    # Build (n_batches x n_bars x n_tracks x n_timesteps x Sigma x d_token)
    # multitrack pianoroll tensor containing logits for each activation and
    # hard silences elsewhere
    mtp = dense_from_sparse_torch(c_logits, s_tensor)
    
    # Collapse bars dimension
    mtp = mtp.permute(0, 2, 1, 3, 4, 5)
    size = (mtp.shape[0], mtp.shape[1], -1, mtp.shape[4], mtp.shape[5])
    mtp = mtp.reshape(*size)
    
    return mtp, s_tensor

Sample $\boldsymbol{z}$ from normal:

In [None]:
bs = params['training']['batch_size']
d_model = params['model']['d']
shape = (bs, d_model)

z_norm = torch.normal(
    torch.zeros(shape, device=device),
    torch.ones(shape, device=device)
)

In [None]:
# 2 bars: 24 26 31 200 202 204
# 16 bars: 4
same = False
idx = 26

if same:
    z = z_norm[idx].repeat(params['training']['batch_size'], 1)
else:
    z = z_norm

In [None]:
import matplotlib as mpl

start_folder_idx = 0
start_z_idx = 0
n_songs = 10
extend = True
root_dir = "tmpmusic/gen"
track_data = [('Drums', -1), ('Bass', 34), ('Guitar', 1), ('Strings', 83)]

n_bars = params['model']['n_bars']
root = os.path.join(root_dir, str(n_bars))

# Generate music with the model
print("Generating bars...")
music, s = generate_music(vae, z)

# Iterate over the generated n-bar sequences
for i in range(start_z_idx, start_z_idx + n_songs):
    
    # Create the directory if it does not exist
    save_dir = os.path.join(root, str(start_folder_idx + i))
    os.makedirs(save_dir, exist_ok=True)

    print("Generating midi sequence " + str(i+1) + "...")
    
    # Generate muspy song from dense representation
    muspy_song = muspy_from_dense_torch(music[i], track_data, params['model']['resolution'])

    # Generate and save midi data from muspy song
    midi_from_muspy(muspy_song, save_dir, name='music')
    
    # Plot the pianoroll associated to the sequence
    preset = 'full'
    with mpl.rc_context({'lines.linewidth': 4, 'axes.linewidth': 4, 'font.size': 34}):
        plot_pianoroll(muspy_song, save_dir, name='pianoroll',
                       figsize=(20, 10), fformat='png',
                       preset=preset)
        s_curr = s[i]
        s_curr = s_curr.permute(1, 0, 2)
        s_curr = s_curr.reshape(s_curr.shape[0], -1)
        with mpl.rc_context({'lines.linewidth': 1, 'axes.linewidth': 1, 'font.size': 14}):
            plot_struct(s_curr.cpu(), name='structure', save_dir=save_dir, figsize=(12, 3))

    if extend:
        # Generate extended sequence
        print("Generating extended (looped) midi sequence " + str(i+1) + "...")
        extended = music[i].repeat(1, 4, 1, 1)
        extended = muspy_from_dense_torch(extended, track_data, params['model']['resolution'])
        midi_from_muspy(extended, save_dir, name='extended')

print("Finished.")

In [None]:
def generate_music(vae, z, S=0.5, T=0.001):
    
    # Get structure and sample
    s = vae.decoder.forward_struct(z)
    s = torch.sigmoid(s)
    
    # Hard threshold instead of sampling gives more pleasant results
    s[s >= S] = 1
    s[s < S] = 0

    s = s.detach().cpu().numpy()
    
    graphs = [0 for _ in range(params['training']['batch_size'])]

    # Create graph structures for each input in the batch
    for i in range(s.shape[0]):
        graphs[i] = graph_from_tensor(s[i])

    # Create batch from graphs
    graphs = Batch.from_data_list(graphs, exclude_keys=['batch'])
    graphs = graphs.to(device)
    
    # Get content from z and structure
    with torch.cuda.amp.autocast():
        c = vae.decoder.forward_content(z, graphs)

    c = c.detach().cpu().numpy()
    
    # Compute dense representation (pianoroll with silences)
    dense = dense_from_sparse(c, s)
    
    # Collapse bars dimension
    dense = np.transpose(dense, (0, 2, 1, 3, 4, 5))

    size = (
        dense.shape[0],
        dense.shape[1],
        -1,
        dense.shape[4],
        dense.shape[5]
    )

    dense = dense.reshape(size)
    
    return dense, s

In [None]:
import muspy

def plot_pianoroll(music, save_dir=None, name=None, figsize=(10, 10),
                   fformat="png", xticklabel='on', preset='full', **kwargs):

    fig, axs_ = plt.subplots(4, sharex=True, figsize=figsize)
    fig.subplots_adjust(hspace=0)
    axs = axs_.tolist()
    muspy.show_pianoroll(music=music, yticklabel='off',
                         xticklabel=xticklabel, grid_axis='off',
                         axs=axs, preset=preset)
    
    if save_dir:
        plt.savefig(os.path.join(save_dir, name+"."+fformat), format=fformat, dpi=200)

In [None]:
import matplotlib as mpl

start_folder_idx = 0
start_z_idx = 0
n_songs = 10
extend = True
root_dir = "tmpmusic/gen"
track_data = [('Drums', -1), ('Bass', 34), ('Guitar', 1), ('Strings', 83)]

n_bars = params['model']['n_bars']
root = os.path.join(root_dir, str(n_bars))

# Generate music with the model
print("Generating bars...")
music, s = generate_music(vae, z, S=0.5, T=0.001)

# Iterate over the generated n-bar sequences
for i in range(start_z_idx, start_z_idx + n_songs):
    
    # Create the directory if it does not exist
    save_dir = os.path.join(root, str(start_folder_idx + i))
    os.makedirs(save_dir, exist_ok=True)

    print("Generating midi sequence " + str(i+1) + "...")
    
    # Generate muspy song from dense representation
    muspy_song = muspy_from_dense(music[i], track_data, params['model']['resolution'])

    # Generate and save midi data from muspy song
    midi_from_muspy(muspy_song, save_dir, name='music')
    
    # Plot the pianoroll associated to the sequence
    preset = 'full'
    with mpl.rc_context({'lines.linewidth': 4, 'axes.linewidth': 4, 'font.size': 34}):
        plot_pianoroll(muspy_song, save_dir, name='pianoroll',
                       figsize=(20, 10), fformat='png',
                       preset=preset)
    
    # Plot the structure tensor
    if n_bars == 2:
        s_curr = s[i]
        s_curr = np.transpose(s_curr, (1, 0, 2))
        s_curr = s_curr.reshape(s_curr.shape[0], -1)
        with mpl.rc_context({'lines.linewidth': 1, 'axes.linewidth': 1, 'font.size': 14}):
            plot_struct(s_curr, name='structure', save_dir=save_dir, figsize=(12, 3))

    if extend:
        # Generate extended sequence
        print("Generating extended (looped) midi sequence " + str(i+1) + "...")
        extended = np.tile(music[i], (1, 4, 1, 1))
        extended = muspy_from_dense(extended, track_data, params['model']['resolution'])
        midi_from_muspy(extended, save_dir, name='extended')

print("Finished.")

In [None]:
def generate():
    # make sure struct tensor is not zero! or maybe we can let internal methods fix that
    pass

def save():
    # make sure struct tensor is not zero! or maybe we can let internal methods fix that
    pass

In [8]:
def generate_music(vae, z, s_cond=None, s_tensor_cond=None):
    
    # Get structure and content logits
    with torch.cuda.amp.autocast():
        _, c_logits, s_tensor_out = vae.decoder(z, s_cond)
    
    s_tensor = s_tensor_cond if s_tensor_cond != None else s_tensor_out
    
    # Build (n_batches x n_bars x n_tracks x n_timesteps x Sigma x d_token)
    # multitrack pianoroll tensor containing logits for each activation and
    # hard silences elsewhere
    mtp = dense_from_sparse_torch(c_logits, s_tensor)
    
    # Collapse bars dimension
    mtp = mtp.permute(0, 2, 1, 3, 4, 5)
    size = (mtp.shape[0], mtp.shape[1], -1, mtp.shape[4], mtp.shape[5])
    mtp = mtp.reshape(*size)
    
    return mtp, s_tensor

In [9]:
import matplotlib as mpl


def save(mtp, dir, s_tensor=None, track_data=None):

    start_folder_idx = 0
    start_z_idx = 0
    n_songs = 10
    extend = True
    track_data = ([('Drums', -1), ('Bass', 34), ('Guitar', 1), ('Strings', 83)]
                  if track_data == None else track_data)

    n_bars = params['model']['n_bars']

    # Iterate over the generated n-bar sequences
    for i in range(start_z_idx, start_z_idx + n_songs):
        
        # Create the directory if it does not exist
        save_dir = os.path.join(dir, str(start_folder_idx + i))
        os.makedirs(save_dir, exist_ok=True)

        print("Saving midi sequence " + str(i+1) + "...")
        
        # Generate muspy song from multitrack pianoroll, then midi from muspy
        # and save
        muspy_song = muspy_from_dense_torch(mtp[i], track_data, 
                                            params['model']['resolution'])
        midi_from_muspy(muspy_song, save_dir, name='music')
        
        # Plot the pianoroll associated to the sequence
        preset = 'full'
        with mpl.rc_context({'lines.linewidth': 4, 
                             'axes.linewidth': 4, 'font.size': 34}):
            plot_pianoroll(muspy_song, save_dir, name='pianoroll',
                           figsize=(20, 10), fformat='png', preset=preset)
        
        # Plot structure_tensor if present
        if s_tensor != None:
            s_curr = s_tensor[i]
            s_curr = s_curr.permute(1, 0, 2)
            s_curr = s_curr.reshape(s_curr.shape[0], -1)
            with mpl.rc_context({'lines.linewidth': 1, 
                                 'axes.linewidth': 1, 'font.size': 14}):
                plot_struct(s_curr.cpu(), name='structure', 
                            save_dir=save_dir, figsize=(12, 3))

        if extend:
            # Generate extended sequence
            print("Saving extended (looped) midi sequence " + str(i+1) + "...")
            extended = mtp[i].repeat(1, 4, 1, 1)
            extended = muspy_from_dense_torch(extended, track_data, 
                                              params['model']['resolution'])
            midi_from_muspy(extended, save_dir, name='extended')
        
        print()

    print("Finished.")

In [10]:
def generate_z(bs, d_model):
    bs = params['training']['batch_size']
    d_model = params['model']['d']
    shape = (bs, d_model)

    z_norm = torch.normal(
        torch.zeros(shape, device=device),
        torch.ones(shape, device=device)
    )
    
    return z_norm
    

In [11]:
bs = params['training']['batch_size']
d_model = params['model']['d']
n_bars = 2
n_tracks = 4
n_timesteps = 32
n_sequences = 10
dir = 'tmpmusic/'

track_data = [('Drums', -1), ('Bass', 34), ('Guitar', 1), ('Strings', 83)]

# Test structure tensor
s_tensor = torch.zeros(n_bars, n_tracks, n_timesteps)
s_tensor[:, :, ::2] = 1
s_tensor = s_tensor.bool()
s_tensor = s_tensor.unsqueeze(0).repeat(bs, 1, 1, 1)
s_tensor.to(device)

z = generate_z(bs, d_model)
s = vae.decoder._structure_from_binary(s_tensor)
s.to(device)
mtp, s_tensor = generate_music(vae, z, s, s_tensor)
save(mtp, dir, s_tensor, track_data)

RuntimeError: CUDA out of memory. Tried to allocate 64.00 MiB (GPU 3; 15.75 GiB total capacity; 14.32 GiB already allocated; 9.56 MiB free; 14.52 GiB reserved in total by PyTorch)