In [3]:
import matplotlib.pyplot as plt
import numpy as np

import torch
import torch.nn as nn #neural network module
import torch.optim as optim #optimizer
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

import time as time
from tqdm import tqdm

import pickle
import gc
import os





In [4]:
def conv_prev_concat(x, y):
    if x is None or y is None:
      raise ValueError("x or prev_bar is None")
    """Concatenate conditioning vector on feature map axis."""
    x_shapes = x.shape  #[batch,n_features,a,b]
    y_shapes = y.shape  #[batch,16,a,b]

    if x_shapes[2:] == y_shapes[2:]:
        return torch.cat((x, y),dim=1)                                 #[batch,n_features+16,a,b]

    else:
        print(x_shapes[2:])
        print(y_shapes[2:])


In [5]:
class Generator(nn.Module):

    def __init__(self, input_size, instrument_size=1, n_hlayers=128):
            super().__init__()

            self.input_size = input_size
            self.instrument_size = instrument_size

            #generator layers
            #as said in the DCGAN paper always ReLU activation function in the generator excluded the last layer
            #as said in the DCGAN paper always batchnorm iin the generator excluded the last layer
            self.ff1 = nn.Sequential(
                nn.Linear(input_size, 1024),                                                               #[batch,1024]
                nn.BatchNorm1d(1024),
                nn.ReLU()
                )
            self.ff2 = nn.Sequential(
                nn.Linear(1024,n_hlayers*2),                                                               #[batch,256]
                nn.BatchNorm1d(n_hlayers*2),
                nn.ReLU()
                )
            #reshape to [batch size,128,1,2]
            #+condition 2d [batch,128+16,1,2]
            self.cnn1 = nn.Sequential(
                nn.ConvTranspose2d(n_hlayers+16, n_hlayers, kernel_size=(1,2), stride=(2,2), bias=False, padding=0),           #[batch,128,1,4]
                nn.BatchNorm2d(n_hlayers),
                nn.ReLU()
                )
            #+condition 2d [batch,128+16,1,2]
            self.cnn2 = nn.Sequential(
                nn.ConvTranspose2d(n_hlayers+16, n_hlayers, kernel_size=(1,2), stride=(2,2), bias=False, padding=0),           #[batch,128,1,8]
                nn.BatchNorm2d(n_hlayers),
                nn.ReLU()
                )
            #+condition 2d [batch,128+16,1,2]
            self.cnn3 = nn.Sequential(
                nn.ConvTranspose2d(n_hlayers+16, n_hlayers, kernel_size=(1,2), stride=(2,2), bias=False, padding=0),           #[batch,128,1,16]
                nn.BatchNorm2d(n_hlayers),
                nn.ReLU()
                )
            #+condition 2d [batch,128+16,1,2]
            self.cnn4 = nn.Sequential(
                nn.ConvTranspose2d(n_hlayers+16, instrument_size, kernel_size=(128,1), stride=(2,1), bias=False, padding=0),       #[batch,instrument_size,128,16]
                nn.Sigmoid()
                #Sigmoid funciotn because we want to generate the matrixes of music without velocity, i.e. only (0,1)
                #Thus we use the sigmoid which is a smoother version of the sign function
                )
            #conditioner layers
            #as in Midinet model we use the Leaky activation funciton for the conditioner
            self.h0_prev = nn.Sequential(
                nn.Conv2d(in_channels=instrument_size, out_channels=16, kernel_size=(128,1), stride=(2,1)),                  #[batch,16,1,16]
                nn.BatchNorm2d(16),
                nn.LeakyReLU(0.2)          #note: in the original paper leak=0.2, default leak=0.01
                )
            self.h1_prev = nn.Sequential(
                nn.Conv2d(in_channels=16, out_channels=16, kernel_size=(1,2), stride=(2,2)),                                  #[batch,16,1,8]
                nn.BatchNorm2d(16),
                nn.LeakyReLU(0.2)
                )
            self.h2_prev = nn.Sequential(
                nn.Conv2d(in_channels=16, out_channels=16, kernel_size=(1,2), stride=(2,2)),                                  #[batch,16,1,4]
                nn.BatchNorm2d(16),
                nn.LeakyReLU(0.2)
                )
            self.h3_prev = nn.Sequential(
                nn.Conv2d(in_channels=16, out_channels=16, kernel_size=(1,2), stride=(2,2)),                                  #[batch,16,1,2]
                nn.BatchNorm2d(16),
                nn.LeakyReLU(0.2)
                )

    def forward(self, z, prev_bar, batch_size):

            #2d condition
            cond0 = self.h0_prev(prev_bar)          #[batch,16,1,16]
            cond1 = self.h1_prev(cond0)             #[batch,16,1,8]
            cond2 = self.h2_prev(cond1)             #[batch,16,1,4]
            cond3 = self.h3_prev(cond2)             #[batch,16,1,2]



            h0 = self.ff1(z)                        #[batch,1024]

            h1 = self.ff2(h0)                       #[batch,256]
            h1 = h1.reshape(batch_size, 128, 1, 2)  #[batch,128,1,2]
            h1 = conv_prev_concat(h1,cond3)         #[batch,128+16,1,2]

            h2 = self.cnn1(h1)                      #[batch,128,1,4]
            h2 = conv_prev_concat(h2,cond2)         #[batch,128+16,1,4]

            h3 = self.cnn2(h2)                      #[batch,128,1,8]
            h3 = conv_prev_concat(h3,cond1)         #[batch,128+16,1,8]

            h4 = self.cnn3(h3)                      #[batch,128,1,16]
            h4 = conv_prev_concat(h4,cond0)         #[batch,128+16,1,16]

            out = self.cnn4(h4)                     #[batch,instrument_size,128,16]

            return out

In [6]:
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        nn.init.xavier_uniform_(m.weight.data)
    if classname.find('Linear') != -1:
        nn.init.xavier_uniform_(m.weight.data)
    elif classname.find('BatchNorm') != -1:
        nn.init.normal_(m.weight.data, 1.0, 0.2)
        nn.init.constant_(m.bias.data, 0)

In [7]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print('GPU available')
else:
    device = torch.device("cpu")
    print('GPU not available')


def LoadModel(instrumentSize, Which):

    Path = os.path.realpath('ModelParameters')

    files = [
        ['generator_no1dnoise.torch', 'gen_opt_state_no1dnoise.torch'],
        ['generator_no1d2.torch', 'gen_opt_state_no1d2.torch']
    ]

    generator = Generator(input_size=100, instrument_size=instrumentSize, n_hlayers=128)
    generator.apply(weights_init)
    generator.to(device)
    # Load the state dict previously saved
    generator_state_dict = torch.load(os.path.join(Path, files[Which][0]), map_location=torch.device('cpu'))
    # Update the network parameters
    generator.load_state_dict(generator_state_dict)

    gen_opt = optim.Adam(generator.parameters(), lr=2e-4)
    # Load the state dict previously saved
    gen_opt_state_dict = torch.load(os.path.join(Path, files[Which][1]), map_location=torch.device('cpu'))
    # Update the network parameters
    gen_opt.load_state_dict(gen_opt_state_dict)

    return generator

                                                    #MOnophonic or polyphonic
generator = LoadModel(instrumentSize=1, Which=0)

GPU not available


In [8]:
from Preprocessing import *
Dataset = PreProcessing(nDir = 50)

Preprocessing: 100%|██████████| 50/50 [00:11<00:00,  4.43it/s]


In [13]:
generator.eval()

Instrument = 'Piano'
bar = np.random.randint(0, 100)

prev_bar = Dataset[Instrument][bar]['Bars'][0].to_dense().float().to(device)
InstrumentCode = Dataset[Instrument][bar]['Program']
Tempo = Dataset[Instrument][bar]['Tempo'][0]

Bars = []
Bars.append(prev_bar)
#If polyphonic only 1 unsqueeze
prev_bar = prev_bar.unsqueeze(0).unsqueeze(0) 


for i in range(8):
   noise = torch.rand([1, 100], device=device)*0.2

   #print(np.mean(np.array(noise)))

   with torch.no_grad():
      generated_bar = generator(noise, prev_bar, 1)

   binary_bar = (generated_bar > 0.7).float()  # still a tensor
   Bars.append(binary_bar.squeeze(0).squeeze(0).cpu().numpy())  # only now for MIDI
   print(generated_bar.mean().item(), generated_bar.std().item())
   prev_bar = binary_bar.detach()



PolyConcBars = np.concatenate(Bars, axis = 1)

MonoBarsToMIDI(PolyConcBars, title='No1DCond', Instrument=InstrumentCode)

print(InstrumentCode)


0.07682120054960251 0.19310049712657928
0.07805368304252625 0.18510711193084717
0.0753619521856308 0.17833036184310913
0.0736963301897049 0.1861092746257782
0.07616700232028961 0.19217918813228607
0.07618725299835205 0.19694870710372925
0.07230650633573532 0.192580446600914
0.0678374245762825 0.18772608041763306
2


In [10]:
np.sum(Bars[4])

98.0