# Connect with google drive and git



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


In [None]:
%cd "drive/My Drive/Audio_Style_Transfer"

/content/drive/My Drive/Audio_Style_Transfer


#Install and import

In [None]:
!pip install soundfile



In [None]:
#external imports
import copy
import numpy as np
import pickle
import random
import torch
import torch.nn as nn
import torch.nn.functional as F

In [None]:
#our libraries
import dataloader

import classifier_model
import discriminator_model
import generator_model

#Data loading

In [None]:
#upload dataset
dataset = dataloader.MusicDataset('Data/train','Data/features_30_sec.csv')
test_dataset = dataloader.MusicDataset('Data/test','Data/features_30_sec.csv')


In [None]:
#check - dataset functionalities
print (dataset.__len__())
vect,mean,sd,label = dataset.__getitem__(1)
print(vect.shape)
print(label)

print('train and valid dataset:')
print(dataset.path_label)

240
torch.Size([1, 220, 1280])
0
train and valid dataset:
[('Data/train/classical/classical.00001.wav', 'classical'), ('Data/train/classical/classical.00002.wav', 'classical'), ('Data/train/classical/classical.00003.wav', 'classical'), ('Data/train/classical/classical.00004.wav', 'classical'), ('Data/train/classical/classical.00006.wav', 'classical'), ('Data/train/classical/classical.00007.wav', 'classical'), ('Data/train/classical/classical.00008.wav', 'classical'), ('Data/train/classical/classical.00009.wav', 'classical'), ('Data/train/classical/classical.00011.wav', 'classical'), ('Data/train/classical/classical.00012.wav', 'classical'), ('Data/train/classical/classical.00013.wav', 'classical'), ('Data/train/classical/classical.00014.wav', 'classical'), ('Data/train/classical/classical.00016.wav', 'classical'), ('Data/train/classical/classical.00017.wav', 'classical'), ('Data/train/classical/classical.00018.wav', 'classical'), ('Data/train/classical/classical.00019.wav', 'classical'

In [None]:
dataset.intToClass(1)

'pop'

#Classifier training

In [None]:
#make new classifier
classifier = classifier_model.Classifier(len(dataset.classes))

In [None]:
#loading datasets using dataloader

batch_size = 10
num_workers = 0

#random sampling for train and valid datasets
random.seed(0)  # rng seed, set to 0 for reproducibility
dataset_indices = list(range(len(dataset)))
random.shuffle(dataset_indices) 

#split train dataset
train_split_indices = dataset_indices[:int(len(dataset_indices)*0.8)]  # get the training split indices
valid_split_indices = dataset_indices[int(len(dataset_indices)*0.8):]  # get the validation split indices 

train_subset_sampler = torch.utils.data.SubsetRandomSampler(train_split_indices)
valid_subset_sampler = torch.utils.data.SubsetRandomSampler(valid_split_indices)

#load all three datasets
train_dataloader = torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size, sampler=train_subset_sampler, num_workers=num_workers, drop_last=True)
valid_dataloader = torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size, sampler=valid_subset_sampler, num_workers=num_workers, drop_last=True)
test_dataloader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, num_workers=num_workers, drop_last=True)


In [None]:
#training parameters
learning_rate = 0.001
num_epochs = 20

optimizer = torch.optim.Adam(classifier.parameters(), lr = learning_rate)
loss_func = nn.CrossEntropyLoss()

if (torch.cuda.is_available()):
    device = "cuda"
else:
    device = "cpu"

classifier = classifier.to(device)

In [None]:
max_acc = 0 #remembering the best accuracy

for epoch in range(num_epochs):

    classifier.train()
    
    for i, (spect,mean,sigma,label) in enumerate(train_dataloader):
        spect = spect.to(device)
        label = label.to(device)

        optimizer.zero_grad()
        output = classifier(spect)
        loss = loss_func(output, label)
        loss.backward()
        optimizer.step()

        if (i+1) % (10) == 0:
            print(f'epoch: {epoch}   iter: {i+1}   batch_loss: {loss}')
            #print('output: ', output,' label: ',label)

    classifier.eval()
    
    correct = 0
    total = 0

    with torch.no_grad():
        for (spect,mean,sigma,label) in valid_dataloader:
            spect = spect.to(device)
            label = label.to(device)

            output = classifier(spect)

            y_pred = torch.log_softmax(output,-1)  # convert logits (model outputs) to class probabilities

            _ , predicted = torch.max(y_pred, 1)  # find the most probable class

            total += label.size(0)
            correct += (predicted == label).sum()


    validation_accuracy = 100 * float(correct)/total
    print(f'epoch: {epoch}   validation accuracy: {validation_accuracy}%' )

    if (validation_accuracy > max_acc):
          #saving the best model (avoiding overfitting)
          best_model = copy.deepcopy(classifier.state_dict())
          max_acc = validation_accuracy


        
        

epoch: 0   iter: 10   batch_loss: 0.9815198183059692
epoch: 0   validation accuracy: 50.0%
epoch: 1   iter: 10   batch_loss: 0.8625684976577759
epoch: 1   validation accuracy: 75.0%


In [None]:
#save the best model
torch.save(best_model,'classif_model.pt')

#Classifier test

In [None]:
with torch.no_grad():
        for (spect,mean,sigma,label) in test_dataloader:
            spect = spect.to(device)
            label = label.to(device)

            output = classifier(spect)

            y_pred = torch.log_softmax(output,-1)  # convert logits (model outputs) to class probabilities

            _ , predicted = torch.max(y_pred, 1)  # find the most probable class

            total += label.size(0)
            correct += (predicted == label).sum()


    test_accuracy = 100 * float(correct)/total
    print(f'Test accuracy: {test_accuracy}%' )

#Main GAN training

In [None]:
#CREATE INSTANCES
discriminator = discriminator_model.Discriminator(len(dataset.classes))
generator = generator_model.Generator(num_classes = len(dataset.classes))

#LOAD SAVED MODELS
#discriminator.load_state_dict(torch.load("diskriminator_3klase.pt"))
#generator.load_state_dict(torch.load("generator_3klase.pt"))

#DEVICE
if (torch.cuda.is_available()):
    torch.cuda.empty_cache()
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

discriminator = discriminator.to(device)
generator = generator.to(device)

In [None]:
#LOAD GAN DATASET - whole previous train dataset

batch_size = 5
num_workers = 0
train_dataloader = torch.utils.data.DataLoader(dataset=dataset, batch_size=batch_size, num_workers=num_workers, drop_last=True)


In [None]:
#TRAINING PARAMETERS

num_epochs = 100
n_disc = 5 #for every n_disc epochs of discriminator training, do one epoch of generator training
disc_headstart = 10 #disc_headstart epochs of discriminator only training before starting default n_disc vs 1 regime

#optimizer
d_learning_rate = 0.0003
g_learning_rate = 0.0003
g_optimizer = torch.optim.Adam(generator.parameters(),lr=d_learning_rate)
d_optimizer = torch.optim.Adam(discriminator.parameters(),lr=g_learning_rate)
g_optim_sch = torch.optim.lr_scheduler.ReduceLROnPlateau(g_optimizer, mode='min', factor=1.0/3.16227766, patience=5,verbose=True, threshold=0.0001, threshold_mode='rel',cooldown=0, min_lr=0, eps=1e-08)
d_optim_sch = torch.optim.lr_scheduler.ReduceLROnPlateau(d_optimizer, mode='min', factor=1.0/3.16227766, patience=5,verbose=True, threshold=0.0001, threshold_mode='rel',cooldown=0, min_lr=0, eps=1e-08)

#loss function parameters
lambda_class_d = 8  #"importance" of good classification - discriminator
lambda_class = 8    #good classification - generator
lambda_cycle = 2    #self-inverse - generator
lambda_ident = 0    #identity preservation - generator


In [None]:
for epoch in range(num_epochs):
    
    for i, (spect,mean,sd,label) in enumerate(train_dataloader):
    
        discriminator = discriminator.train()
        generator = generator.train()
        
        #label conversions - onehot/int
        label_goal = np.random.randint(0,len(dataset.classes),batch_size) #label goal as random int for every sample in the batch

        label_goal_onehot = []
        label_onehot = []
        for i in range(batch_size):
          label_goal_onehot.append(dataset.intToOnehot(label_goal[i])) #label_goal converted to onehot
          label_onehot.append(dataset.intToOnehot(label[i])) #given int label converted to onehot 

        #long torch tensor
        label_goal_onehot = torch.stack(label_goal_onehot) 
        label_onehot = torch.stack(label_onehot)
        label_goal = torch.Tensor(label_goal).type(torch.LongTensor)
        label = label.type(torch.LongTensor)
        
        #device
        label = label.to(device)
        label_goal = label_goal.to(device)
        label_onehot = label_onehot.to(device)
        label_goal_onehot = label_goal_onehot.to(device)
        spect = spect.to(device)
        
        #TRAIN DISCRIMINATOR
        genre,fake = discriminator(spect)
        loss_real = - torch.mean(fake)

        loss_class = F.cross_entropy(genre, label)
        
        generator_out = generator(spect,label_goal_onehot)
        genre, fake = discriminator(generator_out.detach())
        loss_fake = torch.mean(fake)
        
        #TODO: add gradient penalty 
        
        loss_disc = loss_real + loss_fake + lambda_class_d * loss_class
        
        d_optimizer.zero_grad()
        loss_disc.backward()
        d_optimizer.step()

        #if (i%10==0):
            #print("loss_real=",loss_real.item())
            #print("loss_disc=",loss_disc.item())
            #print('Loss disc: ',loss_gen.item())
            #print('Genre,fake=',genre,fake)

        #TRAIN GENERATOR
        if (i % n_disc == 0 and epoch >= disc_headstart):
          
            generator_out = generator(spect,label_goal_onehot)
            genre,fake = discriminator(generator_out)
            loss_fake = - torch.mean(fake) #adversary loss
            
            genre = classifier(generator_out).detach()
            loss_class = F.cross_entropy(genre, label_goal) #wrong class
            
            self_inverse = generator(generator_out,label_onehot)
            loss_cycle = torch.mean(torch.abs(self_inverse - spect))
            
            self_out = generator(spect,label_onehot)
            loss_ident = torch.mean(torch.abs(self_out - spect))
            
            loss_gen = loss_fake + lambda_class*loss_class + lambda_cycle * loss_cycle + lambda_ident * loss_ident
            
            
            g_optimizer.zero_grad()
            loss_gen.backward()
            g_optimizer.step()
            

        if(i% (n_disc*2) == 0 and epoch>=disc_headstart):
            print("loss_disc=",loss_disc.item())
            print("loss_gen = ",loss_gen.item())
            generator_out = generator_out[0].detach().cpu().numpy().reshape(generator_out.size(2),generator_out.size(3))
            mean = mean[0].cpu().numpy()
            var = var[0].cpu().numpy()
            generator_out = sd*generator_out+mean
            plt.imshow(generator_out)
            plt.show()
          
            g_optim_sch.step(loss_gen)
            d_optim_sch.step(loss_disc)
            
    print('Epoch: ',epoch)
    
    if(epoch%5==1 and epoch>-1):
        generator = generator.eval()
        y = generator(spect,label_onehot).detach()
        y = y[0].detach().cpu().numpy().reshape(y.size(2),y.size(3))
        mean = mean[0].cpu().numpy()
        sd = sd[0].cpu().numpy()
        #dataloader.writeAudio(y,22050,mean,sd,'epoch_gan1'+str(epoch))
        
        with open('pickle//audio_lambda8820_'+str(epoch)+'.pickle', 'wb') as f:
            pickle.dump(y, f)
        with open('pickle//mean_lambda8820_'+str(epoch)+'.pickle', 'wb') as f:
            pickle.dump(mean, f)
        with open('pickle//sigma_lambda8820_'+str(epoch)+'.pickle','wb') as f:
            pickle.dump(sd,f)

Epoch:  0
Epoch:  1
Epoch:  2
Epoch:  3
Epoch:  4
Epoch:  5
Epoch:  6
Epoch:  7
Epoch:  8
Epoch:  9
Epoch:  10
Epoch:  11
Epoch:  12
Epoch:  13
Epoch:  14
Epoch:  15
Epoch:  16
Epoch:  17
Epoch:  18
Epoch:  19
Epoch:  20
Epoch:  21
Epoch:  22
Epoch:  23
Epoch:  24
Epoch:  25
Epoch:  26
Epoch:  27
Epoch:  28
Epoch:  29
Epoch:  30
Epoch:  31
Epoch:  32
Epoch:  33
Epoch:  34
Epoch:  35
Epoch:  36
Epoch:  37
Epoch:  38
Epoch:  39
Epoch:  40
Epoch:  41
Epoch:  42
Epoch:  43
Epoch:  44
Epoch:  45
Epoch:  46
Epoch:  47
Epoch:  48
Epoch:  49
Epoch:  50
Epoch:  51
Epoch:  52
Epoch:  53
Epoch:  54
Epoch:  55
Epoch:  56
Epoch:  57
Epoch:  58
Epoch:  59
Epoch:  60
Epoch:  61
Epoch:  62
Epoch:  63
Epoch:  64
Epoch:  65
Epoch:  66
Epoch:  67
Epoch:  68
Epoch:  69
Epoch:  70
Epoch:  71
Epoch:  72
Epoch:  73
Epoch:  74
Epoch:  75
Epoch:  76
Epoch:  77
Epoch:  78
Epoch:  79
Epoch:  80
Epoch:  81
Epoch:  82
Epoch:  83
Epoch:  84
Epoch:  85
Epoch:  86
Epoch:  87
Epoch:  88
Epoch:  89
Epoch:  90
Epoch:  9