# 0: Libraries and Functionalities

In [0]:
#import all the needed libraries and initialize them

import pandas as pd
import numpy as np
import math
import librosa as lr
import librosa.display
import matplotlib.pyplot as plt
from tqdm import tqdm
import random
import time

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

#playing audio
import IPython.display as ipd

In [0]:
# make list variables for the needed files these are like global variables
audio_files = []
file_ids = []

In [0]:
class StopWatch():

    def __init__(self):
        self.start_time = time.time()

    def give(self):
        time_diff = round(time.time() - self.start_time)
        hour = str(time_diff // 3600).zfill(2)
        minute = str((time_diff % 3600) // 60).zfill(2)
        second = str(time_diff % 60).zfill(2)  # Same as time_diff - (minutes * 60)
        
        return f'[{hour}:{minute}:{second}]'


In [4]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [5]:
%cd 'drive/My Drive/Music + AI Project/01 - [NN1] LSTM Generation'
#%cd 'Music + AI Project/01 - [NN1] LSTM Generation'
#%ls

/content/drive/My Drive/Music + AI Project/01 - [NN1] LSTM Generation


In [0]:
global_sr = int(44100/500)

# 3: The Neural Network (Chunk Output)

## 3.1: Reading features

In [7]:
#read the fetures if not in memeory
all_feature_matrix = np.load("data/feature_data.npy")
print("Feature Reading Completed!")
print("Shape of the features: ", all_feature_matrix.shape)

Feature Reading Completed!
Shape of the features:  (265, 880)


## 3.2: Setting NN Variables and Define Model

**Variables**:

Variable name | Description | Can I change this?
--- | --- | ---
sr | sampling rate at which the song has been read at, and helps in setting network nodes | Yes (Keep consistent with input)
batch_num | how many batches should a chunk be converted into? | Yes
input_size | calculates the input layer nodes for the network | No
hidden_size | calculates the hidden layer nodes for the network | Yes (Only the factor)
output_size | calculates the output layer nodes for the network | Yes (Depends on network check)
num_layers | setting default layers to 1 for now | No
dropout_per | what percent of the layers need to be droped while training | Yes
learning_rate | the rate at which the network learns outputs | Yes (exponent form)

In [0]:
# Setting up variables for the neural networks
sr = int(44100/4)
batch_num = 5
input_size = int(sr/batch_num)
hidden_size = int(input_size * 2.4)
output_size = input_size
num_layers = 1
dropout_per = 0.7
learning_rate = 1e-3

# setting the device to run the code to GPU is avaialble
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [0]:
# definfng the Neural network class
class BasicRNN(nn.Module):

    # initializaing the network
    # declaring all the needed layers
    def __init__(self):
        super(BasicRNN,self).__init__()

        # an lstm layer for input to hidden layers
        self.rnn = nn.LSTM(input_size, hidden_size, num_layers)
        # hidden to putput
        self.out = nn.Linear(hidden_size, output_size)
        # a dropdout layer between the hideen and output layer 
        self.drop = nn.Dropout(p=dropout_per)

        # making the hidden layer and setting it to zero
        self.hidden = ((torch.zeros(num_layers, 1, hidden_size)), (torch.zeros(num_layers, 1, hidden_size)))

    def reset_hidden(self):
        # resetting the hidden layer to zero, which can be done after backpropogation
        self.hidden = ((torch.zeros(num_layers, 1, hidden_size)), (torch.zeros(num_layers, 1, hidden_size)))

    #setting the network layers in order
    def forward(self, seq):
        
        # here, the view is adding anoher dimention to the sequence being passed to the network
        out, self.hidden = self.rnn(seq.view(1,1,-1))
        out = self.drop(out)
        out = self.out(out.view(1,-1))
        
        return out

##3.3: Validate the Model

In [0]:
def validate_network(showError = False):

    # making a list of all the batch number which belong to the testing groups
    test_list = [x for x in range(total_train,total_train+total_test)]
    loss_by_batch = []

    # setting the network t evaluation
    net.eval()

    # iterate through the testing bacthes
    for i, batch in enumerate(test_list):

        # set loss to to zero after each batch iteration
        loss = 0

        # iterate over the small chunks to pass into the network and hear their output
        for j in range(batch_num-1):
            
            # get the needed input and actual output values 
            input_matrix = torch.FloatTensor(all_feature_matrix[batch][input_size*j:input_size*(j+1)]).to(device)
            val_output = torch.FloatTensor(all_feature_matrix[batch][input_size*(j+1):input_size*(j+2)]).to(device)

            # get the network output
            nn_output = net(input_matrix)

            # check the network output and add the loss
            loss += loss_function(nn_output, val_output.view(1,-1).long())

        # add the loss to a list which contains loss for all batches
        loss_by_batch.append(loss)

    # plot the graph of the batch loss as a line graph
    if showError:
        plt.plot(loss_by_batch)
        plt.ylabel('Loss by batch')
        plt.show()

##3.4: Generate some Music

In [0]:
def generate_sample_song(song_length_seconds, song_name = "test_output.wav", showSignal = False):
        
    # start with a random seed
    batch_seed = 225
    sample_seed = 2

    # variables for the song output
    # changing the seconds will automatically calculate the needed samples for the song and total iterations for the loop
    seconds = song_length_seconds
    song_samples = int(seconds * sr)
    total_iterations = int(song_samples/input_size)

    # get the features for the seed
    input_seed = torch.FloatTensor(all_feature_matrix[batch_seed][input_size*sample_seed:input_size*(sample_seed+1)]).to(device)

    # make a zero variable to input the song into
    song = np.zeros(song_samples)

    # set the network to evaluation mode
    net.eval()

    # loop through the needed iterations
    for i in range(total_iterations):

        # change the input seed if needed for a change in the output
        if i % 100 == 0:
            batch_seed = random.randint(0, batch_num)
            input_seed = torch.FloatTensor(all_feature_matrix[batch_seed][input_size*sample_seed:input_size*(sample_seed+1)]).to(device)

        # get the output from the network
        nn_output = net(input_seed)

        # make the output the next input
        input_seed = nn_output

        # add the current output to the song
        song[i*input_size:(i+1)*input_size] = nn_output.cpu().detach().numpy()

    #save the song as a test output
    lr.output.write_wav(song_name, song, sr)

    # plot the signal plot of the song as a line graph
    if showSignal:
        plt.plot(song)
        plt.ylabel('Song Sound')
        plt.show()

##3.5: Train the Model

In [13]:
# set the number of epoch and traininng perecntage of the dataset
epochs = 200
training_per = 0.85
test_network = 5

# this calculates the total number of chucks to be used for training and testing
total_train = int(all_feature_matrix.shape[0] * training_per)
total_test = all_feature_matrix.shape[0] - total_train

# makes a list of all chunk numbers which will be used to train the network
shuffle_list = [x for x in range(total_train)]

# make the network and put it on GPU
net = BasicRNN().float().to(device)

# define an optimizer and loss function
# this can be changed as per the model
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
loss_function = nn.PoissonNLLLoss()

# making a stopwatch to count time
watch = StopWatch()

# loop for all the epochs
for epoch in range(epochs):

    # shufftle the batchs so the network is not used to the sequence
    random.shuffle(shuffle_list)
    # reset the epoch loss
    epoch_loss = 0

    # run for all the chunks
    for i, batch in enumerate(shuffle_list):

        # reset the hidden layers and remove all gradients after each batch iteration, which also considers back propogation
        net.reset_hidden()
        net.zero_grad()
        loss = 0

        # loop for all the possible input-output training neede for the network
        for j in range(batch_num-1):
            
            # make the input and validation output tensors
            input_matrix = torch.FloatTensor(all_feature_matrix[batch][input_size*j:input_size*(j+1)]).to(device)
            val_output = torch.FloatTensor(all_feature_matrix[batch][input_size*(j+1):input_size*(j+2)]).to(device)

            # get the network output
            nn_output = net(input_matrix)

            # calculate the loss from the nnetwork output and valid output
            loss += loss_function(nn_output, val_output.view(1,-1).long())
            epoch_loss += loss

        # back propogate through the network with the accumulated error and optimizer
        loss.backward()
        optimizer.step()

        # print the state of the network after a few chunks are worked om
        if (i+1) % 50 == 0:
            print(f'{watch.give()} Epoch {epoch + 1}; Batch {i + 1}; Loss {round(float(loss),6)}')

    # a print to now the end of an epoch and its loss
    print(f'{watch.give()} Epoch {epoch + 1} completed! Total Loss: {round(float(epoch_loss),6)}')

    # after a few epochs check with the testing of the network and also generate a song sample
    if (epoch+1) % test_network == 0:
        validate_network(True)
        generate_sample_song(20, f'bp_wav_output/test_e{epoch+1}.wav',True)
        net.train()

KeyboardInterrupt: ignored