# 0: Libraries and Functionalities

In [1]:
#import all the needed libraries and initialize them

import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
from tqdm import tqdm
import random
import time

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

#playing audio
import IPython.display as ipd

In [2]:
class StopWatch():

    def __init__(self):
        self.start_time = time.time()

    def give(self):
        time_diff = round(time.time() - self.start_time)
        hour = str(time_diff // 3600).zfill(2)
        minute = str((time_diff % 3600) // 60).zfill(2)
        second = str(time_diff % 60).zfill(2)  # Same as time_diff - (minutes * 60)
        
        return f'[{hour}:{minute}:{second}]'


In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
%cd 'drive/My Drive/Music + AI Project/04 - [NN2] RRN for MIDI'

/content/drive/My Drive/Music + AI Project/04 - [NN2] RRN for MIDI


In [5]:
pip install mido

Collecting mido
[?25l  Downloading https://files.pythonhosted.org/packages/20/0a/81beb587b1ae832ea6a1901dc7c6faa380e8dd154e0a862f0a9f3d2afab9/mido-1.2.9-py2.py3-none-any.whl (52kB)
[K     |██████▎                         | 10kB 16.5MB/s eta 0:00:01[K     |████████████▌                   | 20kB 10.7MB/s eta 0:00:01[K     |██████████████████▊             | 30kB 7.9MB/s eta 0:00:01[K     |█████████████████████████       | 40kB 7.7MB/s eta 0:00:01[K     |███████████████████████████████▏| 51kB 4.5MB/s eta 0:00:01[K     |████████████████████████████████| 61kB 3.3MB/s 
[?25hInstalling collected packages: mido
Successfully installed mido-1.2.9


In [6]:
import mido
import numpy as np

class DataManager():
    
    
    def npFile2MIDI(self, in_filename, out_filename, num = 4, den = 4, clocks = 36, noted32 = 8, AutoTimed = False, AutoTime=120):
            
        max_midi_time = 1000.0

        data = np.load(in_filename)
        mid = mido.MidiFile()
        track = mido.MidiTrack()

        mid.tracks.append(track)
        
        num = 4
        den = 4
        clocks = 36
        noted32 = 8

        track.append(mido.MetaMessage('time_signature', numerator=num, denominator=den, clocks_per_click=clocks, notated_32nd_notes_per_beat=noted32, time=0))
        test=[]

        for msg in data:

            if int(msg[0]+0.5) == 1:
                control = 'note_on'
            else:
                control = 'note_off'
            
            if AutoTimed:
                track.append(mido.Message(control, note=int(msg[1]*127), velocity=int(msg[2]*127), time=AutoTime))
                
            else:
                track.append(mido.Message(control, note=int(msg[1]*127), velocity=int(msg[2]*127), time=int(msg[3]*max_midi_time)))

        if not out_filename[-4] == '.mid':
            out_filename += '.mid'

        mid.save(out_filename)

    def np2MIDI(self, np_track, out_filename, num = 4, den = 4, clocks = 36, noted32 = 8, AutoTimed = False, AutoTime=120):
            
        max_midi_time = 1000.0

        data = np_track
        mid = mido.MidiFile()
        track = mido.MidiTrack()

        mid.tracks.append(track)
        
        num = 4
        den = 4
        clocks = 36
        noted32 = 8

        track.append(mido.MetaMessage('time_signature', numerator=num, denominator=den, clocks_per_click=clocks, notated_32nd_notes_per_beat=noted32, time=0))
        test=[]

        for msg in data:

            if int(msg[0]+0.5) == 1:
                control = 'note_on'
            else:
                control = 'note_off'
            
            if AutoTimed:
                track.append(mido.Message(control, note=int(msg[1]*127), velocity=int(msg[2]*127), time=AutoTime))
                
            else:
                track.append(mido.Message(control, note=int(msg[1]*127), velocity=int(msg[2]*127), time=int(msg[3]*max_midi_time)))

        if not out_filename[-4] == '.mid':
            out_filename += '.mid'

        mid.save(out_filename)

    def MIDIFile2np(self, in_filename, out_filname):
        max_midi_time = 1000.0

        def standardizeData(midiData):
            for msg in midiData:
                msg[1] = float(msg[1])/127.0
                msg[2] = float(msg[2])/127.0
                msg[3] = float(msg[3])/max_midi_time
            return midiData


        mid = mido.MidiFile(in_filename)

        i = 0
        mid_out = []

        for i,track in enumerate(mid.tracks):
                
            for msg in track:

                if msg.type == "control_change":
                    #skip for now I guess
                    continue
                elif msg.type == "note_on":
                    mid_out.append([1,msg.note,msg.velocity,msg.time])
                elif msg.type == "note_off":
                    mid_out.append([0,msg.note,msg.velocity,msg.time])

            mid_out = np.array(standardizeData(mid_out))
            np.save(out_filname + str(i),np.array(mid_out))




# 4: The Neural Network (Sample Output)

## 4.1: Reading features

In [None]:
#read the fetures if not in memeory
all_feature_matrix = np.load("midiTest.npy")
print("MIDI Reading Completed!")
print("Shape of the features: ", all_feature_matrix.shape)

MIDI Reading Completed!
Shape of the features:  (4264, 4)


## 4.2: Setting NN Variables and Define Model

In [None]:
def save_checkpoint(net, optimizer, epoch_no, loss, checkpoint_name="", store_eNum = True):

    path = "saved_net/"
    if checkpoint_name == "":
        path = path + "checkpoint_e" + str(epoch_no+1) + ".pt"
    else:
        path = path + checkpoint_name
        if store_eNum:
            path = path + "_e" + str(epoch_no+1)
        path += ".pt"

    checkpoint = {}
    checkpoint = {'epoch': epoch_no,
                    'model_state_dict': net.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'loss': loss,
                    'meta': update_nn_metaData()
                  }

    torch.save(checkpoint, path)

    print(f"----- Saved the network as '{path}' -----")

def load_checkpoint(net, optimizer, checkpoint_name, net_evalMode = False):

    path = "saved_net/"
    path = path + checkpoint_name + ".pt"

    checkpoint = torch.load(path)

    net.load_state_dict(checkpoint['model_state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    epoch_no = checkpoint['epoch']
    loss = checkpoint['loss']
    nn_meta = checkpoint['meta']

    apply_nn_metaData(nn_meta)

    if net_evalMode:
        net.eval()
    else:
        net.train()

    print(f"----- Loaded the network from '{path}' -----")

    return net, optimizer, epoch_no, loss

def apply_nn_metaData(nn_meta):

    input_size = nn_meta['input_size']
    midi_features = nn_meta['midi_features']
    feature_size = nn_meta['feature_size']
    hidden_size = nn_meta['hidden_size']
    output_size = nn_meta['output_size']
    num_layers = nn_meta['num_layers']
    dropout_per = nn_meta['dropout_per']
    learning_rate = nn_meta['learning_rate']
    batch_size = nn_meta['batch_size']

def update_nn_metaData():

    nn_meta = {}

    nn_meta['input_size'] = input_size
    nn_meta['midi_features'] = midi_features
    nn_meta['feature_size'] = feature_size
    nn_meta['hidden_size'] = hidden_size
    nn_meta['output_size'] = output_size
    nn_meta['num_layers'] = num_layers
    nn_meta['dropout_per'] = dropout_per
    nn_meta['learning_rate'] = learning_rate
    nn_meta['batch_size'] = batch_size

    return nn_meta
    

**Variables**:

Variable name | Description | Can I change this?
--- | --- | ---
sr | sampling rate at which the song has been read at, and helps in setting network nodes | Yes (Keep consistent with input)
batch_num | how many batches should a chunk be converted into? | Yes
input_size | calculates the input layer nodes for the network | No
hidden_size | calculates the hidden layer nodes for the network | Yes (Only the factor)
output_size | calculates the output layer nodes for the network | Yes (Depends on network check)
num_layers | setting default layers to 1 for now | No
dropout_per | what percent of the layers need to be droped while training | Yes
learning_rate | the rate at which the network learns outputs | Yes (exponent form)

In [None]:
# Setting up variables for the neural networks
input_size = 20
midi_features = 4
feature_size = midi_features * input_size
hidden_size = int(feature_size*2.2)
output_size = midi_features
num_layers = 5
dropout_per = 0.65
learning_rate = 1e-3
batch_size = 100

# setting the device to run the code to GPU is avaialble
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [None]:
# definfng the Neural network class
class MidiRNN(nn.Module):

    # initializaing the network
    # declaring all the needed layers
    def __init__(self):
        super(MidiRNN,self).__init__()

        # an lstm layer for input to hidden layers
        self.rnn = nn.LSTM(feature_size, hidden_size, num_layers)
        # hidden to putput
        self.out = nn.Linear(hidden_size, output_size)
        # a dropdout layer between the hideen and output layer 
        self.drop = nn.Dropout(p=dropout_per)

        # making the hidden layer and setting it to zero
        self.hidden = ((torch.zeros(num_layers, 1, hidden_size)), (torch.zeros(num_layers, 1, hidden_size)))

    def reset_hidden(self):
        # resetting the hidden layer to zero, which can be done after backpropogation
        self.hidden = ((torch.zeros(num_layers, 1, hidden_size)), (torch.zeros(num_layers, 1, hidden_size)))

    #setting the network layers in order
    def forward(self, seq):
        # here, the view is adding anoher dimention to the sequence being passed to the network
        out, self.hidden = self.rnn(seq.view(1,1,-1))
        # out, self.hidden = self.rnn(seq.view(1,feature_size,-1))
        out = self.drop(out)
        # out = self.out(out.view(1,-1))
        out = self.out(out)

        return out

## 4.3: Validate the Model

In [None]:
def validate_network(showError = False):

    # making a list of all the batch number which belong to the testing groups
    test_list = [x for x in range(total_train,total_train+total_test)]
    loss_by_batch = []

    # setting the network t evaluation
    net.eval()

    # iterate through the testing bacthes
    for i in range(total_train,total_train + total_test - input_size - 1):

        # set loss to to zero after each batch iteration
        loss = 0

        # get the needed input and actual output values 
        input_matrix = torch.FloatTensor(all_feature_matrix[i:i + input_size]).to(device)
        val_output = torch.FloatTensor(np.array(all_feature_matrix[i+input_size+1])).to(device)

        # get the network output
        nn_output = net(input_matrix)

        # check the network output and add the loss
        loss += loss_function(nn_output, val_output)

        # add the loss to a list which contains loss for all batches
        loss_by_batch.append(loss)

    # plot the graph of the batch loss as a line graph
    if showError:
        plt.plot(loss_by_batch)
        plt.ylabel('Loss by batch')
        plt.show()

## 4.4: Generate some Music

In [None]:
def generate_sample_song(song_length_seconds, song_name = "test_output.wav", showSignal = False, saveMIDI = False, saveNumpy = True, seed = 35):

    # variables for the song output
    total_iterations = song_length_seconds

    # get the features for the seed
    input_seed = torch.FloatTensor(all_feature_matrix[seed:seed+input_size]).to(device)

    # make a zero variable to input the song into
    song = np.zeros((song_length_seconds,4))
    song[0:input_size] = input_seed.cpu().detach().numpy()

    # set the network to evaluation mode
    net.eval()

    # loop through the needed iterations
    for i in range(total_iterations-input_size):

        input_seed = torch.FloatTensor(song[i:input_size + i]).to(device)

        # get the output from the network
        nn_output = net(input_seed)

        # add the current output to the song
        song[input_size + i] = nn_output.cpu().detach().numpy()

    if saveNumpy:
        np.save(song_name,np.array(song))

    if saveMIDI:
        DM = DataManager()
        DM.np2MIDI(song, song_name,AutoTimed=True)
        

def generate_sample_song_random(song_length_midi, total_midi,  song_name = "test_output.wav", showSignal = False, saveMIDI = False, saveNumpy = True, seed = 35):

    # variables for the song output
    total_iterations = song_length_midi

    # set the network to evaluation mode
    net.eval()

    for j in range(total_midi):
        

        # get the features for the seed
        input_seed = torch.FloatTensor(all_feature_matrix[seed:seed+input_size]).to(device)

        # make a zero variable to input the song into
        song = np.zeros((song_length_midi,4))
        song[0:input_size] = input_seed.cpu().detach().numpy()


        # loop through the needed iterations
        for i in range(total_iterations-input_size):

            input_seed = torch.FloatTensor(song[i:input_size + i]).to(device)

            # get the output from the network
            nn_output = net(input_seed)

            # add the current output to the song
            song[input_size + i] = nn_output.cpu().detach().numpy()

    if saveNumpy:
        np.save(song_name,np.array(song))

    if saveMIDI:
        DM = DataManager()
        DM.np2MIDI(song, song_name,AutoTimed=True)
        

## 4.5: Train the Model

In [None]:
# set the number of epoch and traininng perecntage of the dataset
epochs = 2000
training_per = 0.9
test_network = 10
start_epoch = 0

# load a checkpoint
load = True
checkpoint_name = "checkpoint_e50"

# this calculates the total number of chucks to be used for training and testing
total_train = int(all_feature_matrix.shape[0] * training_per)
total_test = all_feature_matrix.shape[0] - total_train

#get total batch sizes and math around it
total_batches = math.ceil(total_train/batch_size)

# make the network and put it on GPU
net = MidiRNN().float().to(device)

# define an optimizer and loss function
# this can be changed as per the model
optimizer = torch.optim.Adamax(net.parameters(), lr=learning_rate)
loss_function = nn.MSELoss()

# making a stopwatch to count time
watch = StopWatch()

if load:
    net, optimizer, start_epoch, loss = load_checkpoint(net,optimizer,checkpoint_name)
    start_epoch = start_epoch + 1

# loop for all the epochs
for epoch in range(start_epoch,epochs):

    # reset the epoch loss
    epoch_loss = 0

    for batch in range(total_batches):
        # reset the hidden layers and remove all gradients after each batch iteration, which also considers back propogation
        net.reset_hidden()
        net.zero_grad()
        loss = 0 

        batch_start = max((batch * batch_size) - input_size,0)
        batch_end = max(((batch+1) * batch_size),total_train) - input_size

        # run for all the chunks
        for i in range(batch_start, batch_end):
                
            # make the input and validation output tensors
            input_matrix = torch.FloatTensor(all_feature_matrix[i:i + input_size]).to(device)
            val_output = torch.FloatTensor(np.array(all_feature_matrix[i+input_size+1])).to(device)

            # get the network output
            nn_output = net(input_matrix)

            # calculate the loss from the nnetwork output and valid output
            # print(nn_output, val_output)
            # print(val_output.view(1,-1))
            loss += loss_function(nn_output, val_output.view(1,1,-1))
            epoch_loss += loss

        # back propogate through the network with the accumulated error and optimizer
        loss.backward()
        optimizer.step()

        if (batch+1) % 5 == 0:
            # a print to now the end of an epoch and its loss
            print(f'{watch.give()} Epoch {epoch + 1} Batch {batch + 1} Batch Loss: {round(float(loss),6)}')

    # a print to now the end of an epoch and its loss
    print(f'{watch.give()} Epoch {epoch + 1} completed! Total Loss: {round(float(epoch_loss),6)}')

    # after a few epochs check with the testing of the network and also generate a song sample
    if (epoch+1) % test_network == 0:
        validate_network(True)
        generate_sample_song(1000, f'outputs/midi_e{epoch+1}',saveMIDI = True)
        save_checkpoint(net,optimizer,epoch,loss)
        net.train()

----- Loaded the network from 'saved_net/checkpoint_e50.pt' -----


KeyboardInterrupt: ignored

## 4.5: Run-Only Model

(Makes MIDI on the go!)

In [None]:
# set loading values as variables
sample_MIDI_size = 40
total_samples = 30
total_seeds = all_feature_matrix.shape[0]//input_size

# load a checkpoint
checkpoint_name = "checkpoint_e50"

watch = StopWatch()

net, _, _, _ = load_checkpoint(net,optimizer,checkpoint_name)

for i in range(total_samples):
    generate_sample_song(sample_MIDI_size, f'outputs_run/sample_{(i+1):02d}',saveMIDI = True, saveNumpy=False, seed=random.randint(0, total_seeds))
    print(f'{watch.give()} sample_{(i+1):02d} generated')

----- Loaded the network from 'saved_net/checkpoint_e50.pt' -----
[00:00:00] sample_01 generated
[00:00:00] sample_02 generated
[00:00:00] sample_03 generated
[00:00:00] sample_04 generated
[00:00:00] sample_05 generated
[00:00:00] sample_06 generated
[00:00:00] sample_07 generated
[00:00:00] sample_08 generated
[00:00:00] sample_09 generated
[00:00:00] sample_10 generated
[00:00:00] sample_11 generated
[00:00:00] sample_12 generated
[00:00:00] sample_13 generated
[00:00:00] sample_14 generated
[00:00:01] sample_15 generated
[00:00:01] sample_16 generated
[00:00:01] sample_17 generated
[00:00:01] sample_18 generated
[00:00:01] sample_19 generated
[00:00:01] sample_20 generated
[00:00:01] sample_21 generated
[00:00:01] sample_22 generated
[00:00:01] sample_23 generated
[00:00:01] sample_24 generated
[00:00:01] sample_25 generated
[00:00:01] sample_26 generated
[00:00:01] sample_27 generated
[00:00:01] sample_28 generated
[00:00:01] sample_29 generated
[00:00:01] sample_30 generated
