In [4]:
%load_ext autoreload
%autoreload 2

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pickle
import gc
from collections import Counter
from torch.utils.data import Dataset
import torch.optim as optim 




import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset


from sklearn.preprocessing import StandardScaler

from Preprocessing import *
#from ExtractGenre import *
from CNN_ExtractGenre import *
from PolyphonicPreprocessing import *
import Util as Util

import DatasetLoader as DL
import Model as M

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
class Generator(nn.Module):

    def __init__(self, input_size, cond_1d_size, instrument_size=1, n_hlayers=128):
            super().__init__()

            self.input_size = input_size
            self.instrument_size = instrument_size
            self.cond1d_dim = cond_1d_size

            #generator layers
            #as said in the DCGAN paper always ReLU activation function in the generator excluded the last layer
            #as said in the DCGAN paper always batchnorm iin the generator excluded the last layer
            self.ff1 = nn.Sequential(
                nn.Linear(input_size+cond_1d_size, 1024),                                                               #[batch,1024]
                nn.BatchNorm1d(1024),
                nn.ReLU()
                )
            self.ff2 = nn.Sequential(
                nn.Linear(1024+cond_1d_size,n_hlayers*2),                                                                                    #[batch,512]
                nn.BatchNorm1d(n_hlayers*2),
                nn.ReLU()
                )
            #reshape to [batch size,128,1,2]
            # #+condition [batch,128+cond_1d_size+16,1,2]
            self.cnn1 = nn.Sequential(
                nn.ConvTranspose2d(n_hlayers+cond_1d_size+16, n_hlayers, kernel_size=(1,2), stride=(2,2), bias=False, padding=0),           #[batch,128,1,4]
                nn.BatchNorm2d(n_hlayers),
                nn.ReLU()
                )
            #+condition [batch,128+cond_1d_size+16,1,2]
            self.cnn2 = nn.Sequential(
                nn.ConvTranspose2d(n_hlayers+cond_1d_size+16, n_hlayers, kernel_size=(1,2), stride=(2,2), bias=False, padding=0),           #[batch,128,1,8]
                nn.BatchNorm2d(n_hlayers),
                nn.ReLU()
                )
            #+condition [batch,128+cond_1d_size+16,1,2]
            self.cnn3 = nn.Sequential(
                nn.ConvTranspose2d(n_hlayers+cond_1d_size+16, n_hlayers, kernel_size=(1,2), stride=(2,2), bias=False, padding=0),           #[batch,128,1,16]
                nn.BatchNorm2d(n_hlayers),
                nn.ReLU()
                )
            #+condition [batch,128+cond_1d_size+16,1,2]
            self.cnn4 = nn.Sequential(
                nn.ConvTranspose2d(n_hlayers+cond_1d_size+16, instrument_size, kernel_size=(128,1), stride=(2,1), bias=False, padding=0),       #[batch,instrument_size,128,16]
                nn.Sigmoid()
                #Sigmoid funciotn because we want to generate the matrixes of music without velocity, i.e. only (0,1)
                #Thus we use the sigmoid which is a smoother version of the sign function
                )
            #conditioner layers
            # #as in Midinet model we use the Leaky activation funciton for the conditioner
            self.h0_prev = nn.Sequential(
                nn.Conv2d(in_channels=instrument_size, out_channels=16, kernel_size=(128,1), stride=(2,1)),                  #[batch,16,1,16]
                nn.BatchNorm2d(16),
                nn.LeakyReLU()          #note: in the original paper leak=0.2, default leak=0.01
                )
            self.h1_prev = nn.Sequential(
                nn.Conv2d(in_channels=16, out_channels=16, kernel_size=(1,2), stride=(2,2)),                                  #[batch,16,1,8]
                nn.BatchNorm2d(16),
                nn.LeakyReLU()
                )
            self.h2_prev = nn.Sequential(
                nn.Conv2d(in_channels=16, out_channels=16, kernel_size=(1,2), stride=(2,2)),                                  #[batch,16,1,4]
                nn.BatchNorm2d(16),
                nn.LeakyReLU()
                )
            self.h3_prev = nn.Sequential(
                nn.Conv2d(in_channels=16, out_channels=16, kernel_size=(1,2), stride=(2,2)),                                  #[batch,16,1,2]
                nn.BatchNorm2d(16),
                nn.LeakyReLU()
                )

    def forward(self, z, prev_bar, cond_1d, batch_size):

            #2d condiiton
            cond0 = self.h0_prev(prev_bar)          #[batch,16,1,16]
            cond1 = self.h1_prev(cond0)             #[batch,16,1,8]
            cond2 = self.h2_prev(cond1)             #[batch,16,1,4]
            cond3 = self.h3_prev(cond2)             #[batch,16,1,2]

            #single cond_1d size =[n,1], batch_cond_1d size = [batch_size,n]

            input = torch.cat((z,cond_1d), dim=1)   #[batch_size, input_size+cond_1d_size]

            h0 = self.ff1(input)                    #[batch,1024]
            h0 = torch.cat((h0,cond_1d), dim=1)     #[batch,1024+cond_1d_size]

            h1 = self.ff2(h0)                       #[batch,256]
            h1 = h1.reshape(batch_size, 128, 1, 2)  #[batch,128,1,2]
            h1 = conv_cond_concat(h1,cond_1d)       #[batch,128+cond_1d_size,1,2]
            h1 = conv_prev_concat(h1,cond3)         #[batch,128+cond_1d_size+16,1,2]

            h2 = self.cnn1(h1)                      #[batch,128,1,4]
            h2 = conv_cond_concat(h2,cond_1d)       #[batch,128+cond_1d_size,1,4]
            h2 = conv_prev_concat(h2,cond2)         #[batch,128+cond_1d_size+16,1,4]

            h3 = self.cnn2(h2)                      #[batch,128,1,8]
            h3 = conv_cond_concat(h3,cond_1d)       #[batch,128+cond_1d_size,1,8]
            h3 = conv_prev_concat(h3,cond1)         #[batch,128+cond_1d_size+16,1,8]

            h4 = self.cnn3(h3)                      #[batch,128,1,16]
            h4 = conv_cond_concat(h4,cond_1d)       #[batch,128+cond_1d_size,1,16]
            h4 = conv_prev_concat(h4,cond0)         #[batch,128+cond_1d_size+16,1,16]

            out = self.cnn4(h4)                     #[batch,instrument_size,128,16]

            return out

In [12]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print('GPU available')
else:
    device = torch.device("cpu")
    print('GPU not available')


def LoadModel(Cond1D_Size, instrumentSize, Which):

    Path = os.path.realpath('ModelParameters')

    files = [
        ['generator_parameters.torch', 'gen_opt_state.torch'],
        ['Tradgenerator_parameters.torch', 'Tradgen_opt_state.torch']
    ]

    generator = Generator(input_size=256, cond_1d_size=Cond1D_Size, instrument_size=instrumentSize, n_hlayers=256, n_2dhlayers=16)
    generator.apply(M.weights_init)
    generator.to(device)
    # Load the state dict previously saved
    generator_state_dict = torch.load(os.path.join(Path, files[Which][0]), map_location=torch.device('cpu'))
    # Update the network parameters
    generator.load_state_dict(generator_state_dict)

    gen_opt = optim.Adam(generator.parameters(), lr=2e-4)
    # Load the state dict previously saved
    gen_opt_state_dict = torch.load(os.path.join(Path, files[Which][1]), map_location=torch.device('cpu'))
    # Update the network parameters
    gen_opt.load_state_dict(gen_opt_state_dict)

    return generator

                                                    #MOnophonic or polyphonic
generator = LoadModel(Cond1D_Size=5, instrumentSize=4, Which=1)

GPU not available


In [13]:
PolyDataset = PolyphonicPreProcessing(nDir = 50)

Preprocessing: 100%|██████████| 50/50 [00:09<00:00,  5.04it/s]


In [15]:
TEMPO_MIN, TEMPO_MAX = 60, 200  # Typical tempo range
PROGRAM_MIN, PROGRAM_MAX = 1, 128  # MIDI program range

def NormCond(tempo, programs):
    # Normalize tempo to [0, 1]
    tempo_norm = (tempo - TEMPO_MIN) / (TEMPO_MAX - TEMPO_MIN)
    
    # Normalize programs to [0, 1]
    programs_norm = [(p - PROGRAM_MIN) / (PROGRAM_MAX - PROGRAM_MIN) for p in programs]
    
    return [tempo_norm] + programs_norm

In [22]:
generator.eval()

Genre = 'rock'
bar = np.random.randint(0, 100)

prev_bar = PolyDataset[Genre][bar]['Bars'][0].to_dense().float().to(device)
InstrumentCode = PolyDataset[Genre][bar]['Program'][0]
Tempo = PolyDataset[Genre][bar]['Tempo'][0]

cond_1d = torch.tensor([NormCond(Tempo, InstrumentCode)], dtype= torch.float32)
Bars = []
Bars.append(prev_bar)
#If polyphonic only 1 unsqueeze
prev_bar = prev_bar.unsqueeze(0)#.unsqueeze(0) 


for i in range(8):
   noise = torch.rand([1, 256], device=device)

   #print(np.mean(np.array(noise)))

   with torch.no_grad():
      generated_bar = generator(noise, prev_bar, cond_1d, 1)

   binary_bar = (generated_bar > 0.77).float()  # still a tensor
   Bars.append(binary_bar.squeeze(0).cpu().numpy())  # only now for MIDI
   print(generated_bar.mean().item(), generated_bar.std().item())
   prev_bar = binary_bar.detach()

   if i % 2 == 0:
      prev_bar = binary_bar.detach()
   else:
      prev_bar = PolyDataset[Genre][bar + i]['Bars'][0].to_dense().float().to(device).unsqueeze(0)




#ConcBars = np.concatenate(Bars, axis = 1)
PolyConcBars = np.concatenate(Bars, axis = 2)

PolyBarsToMIDI(PolyConcBars, title='Polytest', Instrument=InstrumentCode)

print(InstrumentCode)


0.03683403506875038 0.1705053746700287
0.056664980947971344 0.21078908443450928
0.0657743364572525 0.22884739935398102
0.08248093724250793 0.2581101357936859
0.05227778106927872 0.20397546887397766
0.03434033691883087 0.1685989797115326
0.04756946116685867 0.19547688961029053
0.03672681748867035 0.17458079755306244
[25, 25, 57, 89]
