In [231]:
import torch
from torch import nn, optim
import torch.nn.functional as nnF
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader
import os, glob, csv
import numpy as np
from random import randint

In [664]:
# How many time steps to input with each datapoint. (=WIDTH OF RECURRENT NETWORK)
num_of_time_steps_in_input = 50
num_of_input_chromas = 24

# Chord vocabulary is defined in this dictionary.
#Major/Minor vocab
#chord_dict = {'N':0, 'X':0, 'C:maj':1, 'C:min':2, 'Db:maj':3, 'C#:maj':3, 'Db:min':4, 'C#:min':4, 'D:maj':5, 'D:min':6,
#                      'Eb:maj':7, 'D#:maj':7, 'Eb:min':8, 'D#:min':8, 'E:maj':9, 'Fb:maj':9, 'E:min':10, 'F:maj':11, 'F:min':12,
#                      'Gb:maj':13, 'F#:maj':13, 'Gb:min':14, 'F#:min':14, 'G:maj':15, 'G:min':16,
#                      'Ab:maj':17, 'G#:maj':17, 'Ab:min':18, 'G#:min':18, 'A:maj':19, 'A:min':20,
#                      'Bb:maj':21, 'A#:maj':21, 'Bb:min':22, 'A#:min':22, 'B:maj':23, 'Cb:maj':23, 'B:min':24, 'Cb:min':24}
#chord_annotations_directory = 'Data/ChordAnnotations/McGill-GroundTruth/'
#chord_dict['Cb:maj']

#Major(7)/Minor(7) vocab.
#chord_dict = {'N':0, 'X':0, 'C:maj':1, 'C:min':2, 'Db:maj':3, 'C#:maj':3, 'Db:min':4, 'C#:min':4, 'D:maj':5, 'D:min':6,
#                      'Eb:maj':7, 'D#:maj':7, 'Eb:min':8, 'D#:min':8, 'E:maj':9, 'Fb:maj':9, 'E:min':10, 'F:maj':11, 'F:min':12,
#                      'Gb:maj':13, 'F#:maj':13, 'Gb:min':14, 'F#:min':14, 'G:maj':15, 'G:min':16,
#                      'Ab:maj':17, 'G#:maj':17, 'Ab:min':18, 'G#:min':18, 'A:maj':19, 'A:min':20,
#                      'Bb:maj':21, 'A#:maj':21, 'Bb:min':22, 'A#:min':22, 'B:maj':23, 'Cb:maj':23, 'B:min':24, 'Cb:min':24,
#                      'C:maj7':25, 'C:min7':26, 'C:7':27, 'Db:maj7':28, 'C#:maj7':28, 'Db:min7':29, 'C#:min7':29,
#                      'Db:7':30, 'C#:7':30, 'D:maj7':31, 'D:min7':32, 'D:7':33, 'Eb:maj7':34, 'D#:maj7':34,
#                      'Eb:min7':35, 'D#:min7':35, 'Eb:7':36, 'D#:7':36, 'E:maj7':37, 'Fb:maj7':37, 'E:min7':38,
#                      'E:7':39, 'F:maj7':40, 'F:min7':41, 'F:7':42, 'F#:maj7':43, 'Gb:maj7':43, 'F#:min7':44, 'Gb:min7':44,
#                      'F#:7':45, 'Gb:7':45, 'G:maj7':46, 'G:min7':47, 'G:7':48, 'Ab:maj7':49, 'G#:maj7':49,
#                      'Ab:min7':50, 'G#:min7':50, 'Ab:7':51, 'G#:7':51, 'A:maj7':52, 'A:min7':53, 'A:7':54,
#                      'Bb:maj7':55, 'A#:maj7':55, 'Bb:min7':56, 'A#:min7':56, 'Bb:7':57, 'A#:7':57, 'B:maj7':58,
#                      'Cb:maj7':59, 'B:min7':59, 'Cb:min7':59, 'B:7':60, 'Cb:7':60}
#chord_annotations_directory = 'Data/ChordAnnotations_majmin7/McGill-GroundTruth_majmin7/'

#Major/Minor with inversions vocab.
chord_dict = {'N':0, 'X':0, 'C:maj':1, 'C:min':2, 'Db:maj':3, 'C#:maj':3, 'Db:min':4, 'C#:min':4, 'D:maj':5, 'D:min':6,
              'Eb:maj':7, 'D#:maj':7, 'Eb:min':8, 'D#:min':8, 'E:maj':9, 'Fb:maj':9, 'E:min':10, 'F:maj':11, 'F:min':12,
              'Gb:maj':13, 'F#:maj':13, 'Gb:min':14, 'F#:min':14, 'G:maj':15, 'G:min':16,
              'Ab:maj':17, 'G#:maj':17, 'Ab:min':18, 'G#:min':18, 'A:maj':19, 'A:min':20,
              'Bb:maj':21, 'A#:maj':21, 'Bb:min':22, 'A#:min':22, 'B:maj':23, 'Cb:maj':23, 'B:min':24, 'Cb:min':24,
              'C:maj/3':25, 'C:min/b3':26, 'C:maj/5':27, 'C:min/5':28, 'Db:maj/3':29, 'C#:maj/3':29, 'Db:min/b3':30,
              'C#:min/b3':30, 'Db:maj/5':31, 'C#:maj/5':31, 'Db:min/5':32, 'C#:min/5':32, 'D:maj/3':33, 'D:min/b3':34,
              'D:maj/5':35, 'D:min/5':36, 'Eb:maj/3':37, 'D#:maj/3':37, 'Eb:min/b3':38, 'D#:min/b3':38,
              'Eb:maj/5':39, 'D#:maj/5':39, 'Eb:min/5':40, 'D#:min/5':40, 'E:maj/3':41, 'Fb:maj/3':41, 'E:min/b3':42,
              'E:maj/5':43, 'Fb:maj/5':43, 'E:min/5':44, 'F:maj/3':45, 'F:min/b3':46, 'F:maj/5':47, 'F:min/5':48,
              'Gb:maj/3':49, 'F#:maj/3':49, 'Gb:min/b3':50, 'F#:min/b3':50, 'Gb:maj/5':51, 'F#:maj/5':51,
              'Gb:min/5':52, 'F#:min/5':52, 'G:maj/3':53, 'G:min/b3':54, 'G:maj/5':55, 'G:min/5':56, 'Ab:maj/3':57,
              'G#:maj/3':57, 'Ab:min/b3':58, 'G#:min/b3':58, 'Ab:maj/5':59, 'G#:maj/5':59, 'Ab:min/5':60, 'G#:min/5':60,
              'A:maj/3':61, 'A:min/b3':62, 'A:maj/5':63, 'A:min/5':64, 'Bb:maj/3':65, 'A#:maj/3':65, 'Bb:min/b3':66,
              'A#:min/b3':66, 'Bb:maj/5':67, 'A#:maj/5':67, 'Bb:min/5':68, 'A#:min/5':68, 'B:maj/3':69, 'Cb:maj/3':69,
              'B:min/b3':70, 'Cb:min/b3':70, 'B:maj/5':71, 'Cb:maj/5':71, 'B:min/5':72, 'Cb:min/5':72}
chord_annotations_directory = 'Data/ChordAnnotations_majmininv/McGill-GroundTruth_majmininv/'



num_of_chord_classes = max(chord_dict.values())+1


# Create the dataset

In [385]:
def createTrainAndTestSets(train_set_size, test_set_size, csv_file_numbers, time=1):

    train_set = []
    train_samples_added = 0
    while train_samples_added < train_set_size:
        #Pick a random index in the csv_file_numbers
        rand_index = randint(0,len(csv_file_numbers)-1)
        #Add the file number at that index to the train set
        train_set += [csv_file_numbers[rand_index]]
        #Remove the file number at that index from the list of file numbers
        csv_file_numbers.pop(rand_index)
        
        train_samples_added += 1

    test_set = []
    test_samples_added = 0
    while test_samples_added < test_set_size:
        #Pick a random index in the csv_file_numbers
        rand_index = randint(0,len(csv_file_numbers)-1)
        #Add the file number at that index to the train set
        test_set += [csv_file_numbers[rand_index]]
        #Remove the file number at that index from the list of file numbers
        csv_file_numbers.pop(rand_index)
        
        test_samples_added += 1
    
    return train_set, test_set


In [528]:
class ChordDataset(Dataset):
    def __init__(self, csv_file_numbers=-1, num_of_datapoints_per_file=-1, file_to_load_from=-1):
        """Creates a dataset from a list of csv file numbers.
        Creates a variable number of data points per file. Takes datapoints in neighbouring blocks.
        There are two types of csv file:
        1) A set of chromagrams for each time step
        2) A chord symbol for each time step
        Both of these need to be accessed to find a datapoint."""
        if file_to_load_from != -1:
            print('Loading from ' + file_to_load_from)
            self.samples = np.load(file_to_load_from)
            self.length = self.samples.shape[0]
        else:
            self.num_of_datapoints_per_file = num_of_datapoints_per_file
            self.length = self.num_of_datapoints_per_file*len(csv_file_numbers)

            #Samples will be a numpy array of dimensions: [num_of_samples, num_of_time_steps, 25 (24 chromas + 1 label)]
            self.samples = np.zeros((self.length, num_of_time_steps_in_input, num_of_input_chromas+1))

            #Go through each of the csv files and add them to the np array of samples.
            for num_of_file_used, csv_file_number in enumerate(csv_file_numbers):
                for i in range(num_of_datapoints_per_file):
                    sample_number = num_of_file_used*self.num_of_datapoints_per_file + i
                    sample = np.zeros((num_of_time_steps_in_input, num_of_input_chromas+1))

                    #Work out how long this song is. (row_count)
                    csv_chroma_filename = 'Data/Chromagrams/McGill-Chromagrams/'+csv_file_number+'_bothchroma.csv'
                    with open(csv_chroma_filename, 'r') as chroma_file:
                        rdr = csv.reader(chroma_file)
                        row_count = sum(1 for row in rdr)

                    #A lot of the time the first 500 or so rows are no chord so take these points out.
                    random_starting_row = randint(500, row_count-num_of_time_steps_in_input)

                    #Add the chroma values for this sample
                    with open(csv_chroma_filename, 'r') as chroma_file:
                        rdr = csv.reader(chroma_file)

                        desiredrows=[row for idx, row in enumerate(rdr) if idx in range(random_starting_row,random_starting_row+num_of_time_steps_in_input)]
                        for idx, row in enumerate(desiredrows):
                            #row is ['','{TimeInstant}','{ChordSymbol}']
                            #Add the chroma values to the sample at the desired time index
                            sample[idx, 0:num_of_input_chromas] = row[2:]

                    #Add the chord label for this sample
                    if num_of_chord_classes == 25:
                        csv_chord_filename = chord_annotations_directory+csv_file_number+'_chords.csv'#used for T=50
                    else:
                        csv_chord_filename = chord_annotations_directory+csv_file_number+'.csv' #Used for T!=50
                    
                    with open(csv_chord_filename, 'r') as chord_file:
                        rdr = csv.reader(chord_file)
                        desiredrows=[row for idx, row in enumerate(rdr) if idx in range(random_starting_row,random_starting_row+num_of_time_steps_in_input)]
                        for idx, row in enumerate(desiredrows):
                            #row is ['{TimeInstant}\t{ChordSymbol}']
                            #Add the chord label to the sample at the desired time index
                            sample[idx, num_of_input_chromas] = chord_dict[row[0].split('\t')[1]]

                    self.samples[sample_number, :, :] = sample
    
    def __len__(self):
        return self.length
    
    def __getitem__(self, index):
        sample_ = self.samples[index,:,:]
        sample = {
            'chromas': sample_[:, 0:num_of_input_chromas], #Chromas are up to the num_of_chromas
            'labels': sample_[:, num_of_input_chromas] #Labels are the one after
        }
        return sample
    
    def saveToFile(self, fileName):
        np.save(fileName, self.samples)
        
#Create a certain number of datapoints from each file
num_of_files_in_dataset = 890
num_of_datapoints_per_file = 20
num_of_datapoints = num_of_files_in_dataset*num_of_datapoints_per_file

# The total number of files
#dataset_size = 890
num_in_train_set = 0.95*num_of_files_in_dataset//1
num_in_test_set = 0.05*num_of_files_in_dataset//1


In [529]:
csv_file_numbers = []
for f in os.scandir('Data/ChordAnnotations/McGill-GroundTruth'):
    if f.name.endswith('.csv'):
        csv_file_numbers += [f.name[0:4]]
assert(len(csv_file_numbers) == num_of_files_in_dataset)

#Each set is assigned a certain number of csv file numbers which are
#then accessed and loaded inside the ChordDataset class.
train_set, test_set = createTrainAndTestSets(num_in_train_set, num_in_test_set, csv_file_numbers, time=1)
train_set = ChordDataset(csv_file_numbers=train_set, num_of_datapoints_per_file=num_of_datapoints_per_file)
test_set = ChordDataset(csv_file_numbers=test_set, num_of_datapoints_per_file=num_of_datapoints_per_file)


# Save the dataset to file.

In [530]:
trainSetFileName = 'TrainSet_inputSize_' + str(num_of_time_steps_in_input) + '_chordVocab_' + str(num_of_chord_classes)
testSetFileName = 'TestSet_inputSize_' + str(num_of_time_steps_in_input) + '_chordVocab_' + str(num_of_chord_classes)
train_set.saveToFile(trainSetFileName)
test_set.saveToFile(testSetFileName)

# Load the dataset from a file.

In [665]:
trainSetFileName = 'TrainSet_inputSize_' + str(num_of_time_steps_in_input) + '_chordVocab_' + str(num_of_chord_classes)
testSetFileName = 'TestSet_inputSize_' + str(num_of_time_steps_in_input) + '_chordVocab_' + str(num_of_chord_classes)
train_set = ChordDataset(file_to_load_from=trainSetFileName + '.npy')
test_set = ChordDataset(file_to_load_from=testSetFileName + '.npy')

Loading from TrainSet_inputSize_50_chordVocab_73.npy
Loading from TestSet_inputSize_50_chordVocab_73.npy


In [666]:
BATCH_SIZE=64
train_loader = DataLoader(dataset=train_set,
                          batch_size=BATCH_SIZE
)
test_loader = DataLoader(dataset=test_set, batch_size=BATCH_SIZE)

# Create the Network

In [672]:
class RecurrentNet(nn.Module):
    def __init__(self):
        super(RecurrentNet, self).__init__()
        #Fully connected layer.
        #self.fc1 = nn.Linear(num_of_time_steps_in_input*num_of_input_chromas,num_of_time_steps_in_input*num_of_input_chromas)
        
        #Recurrent layer.
        #self.rnn1 = nn.RNN(input_size=24, hidden_size=25, batch_first=True)
        
        #Convolutional layer. Only convolve along the time dimension, keeping the time dimension intact.
        #self.conv1 = nn.Conv2d(1,1,(1,3), padding=(0,1), stride=1, dilation=1)
        
        #Dilated Convolutional layer.
        #self.conv2 = nn.Conv2d(1,1,(1,3), padding=(0,2), stride=1, dilation=2)
        
        #Dilated Convolutional layer.
        #self.conv3 = nn.Conv2d(1,1,(1,3), padding=(0,3), stride=1, dilation=3)
        
        #Time-distributed dense layer. Convolutional kernels with as many
        #output channels as inputs in the one input channel. And a kernel of height 24 and width 1.
        #(inChannels, outChannels, (kernelHeight, kernelWidth))
        #self.conv4 = nn.Conv2d(1,num_of_chord_classes,(num_of_input_chromas,1))
        
        #RECURRENT LAYER
        self.num_recurrent_layers=3
        self.bidirectional=False
        self.lstm1 = nn.LSTM(input_size=num_of_input_chromas, hidden_size=num_of_chord_classes, batch_first=True, num_layers=self.num_recurrent_layers, dropout=0.1, bidirectional=self.bidirectional)
        
        #Fully connected output layers
        #self.fc_out_1 = nn.Linear(num_of_time_steps_in_input*num_of_chord_classes, num_of_time_steps_in_input*num_of_chord_classes)
        #self.fc_out_2 = nn.Linear(num_of_time_steps_in_input*num_of_chord_classes, num_of_time_steps_in_input*num_of_chord_classes)
        
        
    def forward(self, x):
        #Input is of shape (BATCH_SIZE, TIME_STEPS, CHROMAS_PER_TIME_STEP)
        current_batch_size = x.shape[0]
        #print(x.shape)
        
        #Fully connected layer
        #x = torch.sigmoid(self.fc1(x.view(current_batch_size, num_of_time_steps_in_input*num_of_input_chromas)))
        
        #Convolutional layer.
        #x = nnF.relu(self.conv1(x.view(current_batch_size, 1, num_of_input_chromas, num_of_time_steps_in_input)))
        
        #Dilated convolutional layer
        #x = nnF.relu(self.conv2(x.view(current_batch_size, 1, num_of_input_chromas, num_of_time_steps_in_input)))
        
        #Dilated-er convolutional layer
        #x = nnF.relu(self.conv3(x.view(current_batch_size, 1, num_of_input_chromas, num_of_time_steps_in_input)))
        
        #Time distributed layer.
        #x = nnF.relu(self.conv4(x.view(current_batch_size,1,num_of_input_chromas,num_of_time_steps_in_input)))
        
        #Initialise hidden state with (num_layers * num_directions, batch_size, hidden_dimension)
        if self.bidirectional:
            h0 = torch.zeros(2*self.num_recurrent_layers, current_batch_size, num_of_chord_classes).double().to(device)
            c0 = torch.zeros(2*self.num_recurrent_layers, current_batch_size, num_of_chord_classes).double().to(device)
        else:
            h0 = torch.zeros(self.num_recurrent_layers, current_batch_size, num_of_chord_classes).double().to(device)
            c0 = torch.zeros(self.num_recurrent_layers, current_batch_size, num_of_chord_classes).double().to(device)

        x = x.view(current_batch_size,num_of_time_steps_in_input,num_of_input_chromas)
        x, hn = self.lstm1(x, (h0,c0))

        #Fully connected output layers
        #x = nnF.relu(self.fc_out_1(x.reshape(current_batch_size, num_of_time_steps_in_input*num_of_chord_classes))).view(current_batch_size, num_of_time_steps_in_input, num_of_chord_classes)
        #x = nnF.relu(self.fc_out_2(x.reshape(current_batch_size, num_of_time_steps_in_input*num_of_chord_classes))).view(current_batch_size, num_of_time_steps_in_input, num_of_chord_classes)
         
        #print(x.shape)
        return x


# Train the network

In [673]:
net = RecurrentNet()
net.double()

cross_entropy_loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters())

device = torch.device('cuda:3')
cpuDevice = torch.device('cpu')
net.to(device)

accuracy_stats = {
    'train': [],
    'test': []
}
loss_stats = {
    'train': [],
    'test': []
}

In [674]:
def batch_accuracy(y_pred, y_test):
    y_pred_softmax = torch.log_softmax(y_pred, dim = 2)
    #print('y_pred_softmax shape', y_pred_softmax.shape)
    _, y_pred_tags = torch.max(y_pred_softmax, dim = 2)    
    #print('y_pred_tags shape', y_pred_tags.shape)
    
    correct_pred = (y_pred_tags == y_test).float()
    #print(correct_pred)
    acc = correct_pred.sum() / correct_pred.numel()
    
    acc = torch.round(acc * 100)
    
    return acc

In [675]:
num_of_epochs = 100

print('Beginning Training ..')

prevNet = -1

for epoch in range(num_of_epochs):
    # TRAINING
    train_epoch_loss = 0
    train_epoch_acc = 0
    
    net.train()
    i=0
    for train_batch in train_loader:
        #X_train_batch, y_train_batch = X_train_batch.to(device), y_train_batch.to(device)
        optimizer.zero_grad()
        X_train_batch = train_batch['chromas'].double().to(device)
        #print(X_train_batch[0,:,:])
        y_train_batch = train_batch['labels'].long().to(device)
        #print(y_train_batch[0,:])
        current_batch_size = X_train_batch.shape[0]
        
        y_train_pred = net(X_train_batch)
        #if i==0:
        #    chordSelections=[]
        #    actualChords=[]
        #    
        #    for row in y_train_pred[0]:
        #        chordSelections += [torch.argmax(row).item()]
        #    for item in y_train_batch[0]:
        #        actualChords += [item.item()]
        #    #actualChords = list(y_train_batch[0,:].item())
        #    print(chordSelections)
        #    print(actualChords)
        #    i+=1
        
        train_loss = cross_entropy_loss(y_train_pred.reshape(current_batch_size*num_of_time_steps_in_input,num_of_chord_classes),
                                        y_train_batch.view(-1))
        train_acc = batch_accuracy(y_train_pred, y_train_batch)
        
        train_loss.backward()
        optimizer.step()
        
        train_epoch_loss += train_loss.item()
        train_epoch_acc += train_acc.item()
        
        
    # VALIDATION   
    i=0
    with torch.no_grad():
        
        test_epoch_loss = 0
        test_epoch_acc = 0
        
        net.eval()
        for test_batch in test_loader:
            #X_val_batch, y_val_batch = X_val_batch.to(device), y_val_batch.to(device)
            X_test_batch = test_batch['chromas'].double().to(device)
            y_test_batch = test_batch['labels'].long().to(device)
            current_batch_size = X_test_batch.shape[0]
            
            y_test_pred = net(X_test_batch)
            
            #Print out output for a whole batch
            #if i==12:
            #    for j,batchitem in enumerate(y_test_pred):
            #        chordSelections=[]
            #        actualChords=[]
            #
            #        for row in y_test_pred[j]:
            #            chordSelections += [chord_dict_reverse[torch.argmax(row).item()]]
            #        for item in y_test_batch[j]:
            #            actualChords += [chord_dict_reverse[item.item()]]
            #        #actualChords = list(y_train_batch[0,:].item())
            #        print(chordSelections)
            #        print(actualChords)
            #i+=1
            
            test_loss = cross_entropy_loss(y_test_pred.reshape(current_batch_size*num_of_time_steps_in_input,num_of_chord_classes),
                                           y_test_batch.view(-1))
            test_acc = batch_accuracy(y_test_pred, y_test_batch)
            
            test_epoch_loss += test_loss.item()
            test_epoch_acc += test_acc.item()
            
    loss_stats['train'].append(train_epoch_loss/len(train_loader))
    loss_stats['test'].append(test_epoch_loss/len(test_loader))
    accuracy_stats['train'].append(train_epoch_acc/len(train_loader))
    accuracy_stats['test'].append(test_epoch_acc/len(test_loader))
    
    print(f'Epoch {epoch+0:03}: | Train Loss: {train_epoch_loss/len(train_loader):.5f} | Test Loss: {test_epoch_loss/len(test_loader):.5f} | Train Acc: {train_epoch_acc/len(train_loader):.3f}| Test Acc: {test_epoch_acc/len(test_loader):.3f}')
    prevNet = net


Beginning Training ..
Epoch 000: | Train Loss: 3.44359 | Test Loss: 3.31551 | Train Acc: 10.185| Test Acc: 9.714
Epoch 001: | Train Loss: 3.17169 | Test Loss: 3.10640 | Train Acc: 15.792| Test Acc: 16.357
Epoch 002: | Train Loss: 3.03376 | Test Loss: 3.01254 | Train Acc: 18.400| Test Acc: 19.214
Epoch 003: | Train Loss: 2.96838 | Test Loss: 2.97293 | Train Acc: 19.777| Test Acc: 20.286
Epoch 004: | Train Loss: 2.93754 | Test Loss: 2.95414 | Train Acc: 21.192| Test Acc: 22.357
Epoch 005: | Train Loss: 2.91677 | Test Loss: 2.93401 | Train Acc: 23.045| Test Acc: 24.429
Epoch 006: | Train Loss: 2.89774 | Test Loss: 2.91123 | Train Acc: 24.464| Test Acc: 24.571
Epoch 007: | Train Loss: 2.87784 | Test Loss: 2.89913 | Train Acc: 25.702| Test Acc: 25.000
Epoch 008: | Train Loss: 2.86150 | Test Loss: 2.88344 | Train Acc: 29.219| Test Acc: 29.000
Epoch 009: | Train Loss: 2.84631 | Test Loss: 2.86860 | Train Acc: 30.974| Test Acc: 29.571
Epoch 010: | Train Loss: 2.83690 | Test Loss: 2.86075 | Tra

KeyboardInterrupt: 

# Test the chopin file...

In [676]:
#First create a mapping from class number to chord
#MajMin Chord Vocab
#chord_dict_reverse = {0:'N', 1:'C:maj', 2:'C:min', 3:'Db:maj', 4:'Db:min', 5:'D:maj', 6:'D:min', 7:'Eb:maj',
#                      8:'Eb:min', 9:'E:maj', 10:'E:min', 11:'F:maj', 12:'F:min', 13:'Gb:maj', 14:'Gb:min', 15:'G:maj',
#                      16:'G:min', 17: 'Ab:maj', 18:'Ab:min', 19:'A:maj', 20:'A:min', 21:'Bb:maj', 22:'Bb:min',
#                      23:'B:maj', 24:'B:min'}


#MajMinInv Chord Vocab
chord_dict_reverse = {0:'N', 1:'C:maj', 2:'C:min', 3:'Db:maj', 4:'Db:min', 5:'D:maj', 6:'D:min', 7:'Eb:maj',
                     8:'Eb:min', 9:'E:maj', 10:'E:min', 11:'F:maj', 12:'F:min', 13:'Gb:maj', 14:'Gb:min', 15:'G:maj',
                     16:'G:min', 17: 'Ab:maj', 18:'Ab:min', 19:'A:maj', 20:'A:min', 21:'Bb:maj', 22:'Bb:min',
                     23:'B:maj', 24:'B:min', 25:'C:maj/3', 26:'C:min/b3', 27:'C:maj/5', 28:'C:min/5', 29:'Db:maj/3',
                     30:'Db:min/b3', 31:'Db:maj/5', 32:'Db:min/5', 33:'D:maj/3', 34:'D:min/b3', 35:'D:maj/5',
                     36:'D:min/5', 37:'Eb:maj/3', 38:'Eb:min/b3', 39:'Eb:maj/5', 40:'Eb:min/5', 41:'E:maj/3',
                     42:'E:min/b3', 43:'E:maj/5', 44:'E:min/5', 45:'F:maj/3', 46:'F:min/b3', 47:'F:maj/5', 48:'F:min/5',
                     49:'Gb:maj/3', 50:'Gb:min/b3', 51:'Gb:maj/5', 52:'Gb:min/5', 53:'G:maj/3', 54:'G:min/b3',
                     55:'G:maj/5', 56:'G:min/5', 57:'Ab:maj/3', 58:'Ab:min/b3', 59:'Ab:maj/5', 60:'Ab:min/5',
                     61:'A:maj/3', 62:'A:min/b3', 63:'A:maj/5', 64:'A:min/5', 65:'Bb:maj/3', 66:'Bb:min/b3',
                     67:'Bb:maj/5', 68:'Bb:min/5', 69:'B:maj/3', 70:'B:min/b3', 71:'B:maj/5', 72:'B:min/5'}


In [678]:
#Extract the right chromas and plug them into the network to get predictions.

ChromasPerTimeStep = 24
IdxToReadFrom = 100
#Do batch_size batches of 50 time steps. 50 time steps is about 2s of audio.
NumOfTimeStepsToRead = 50
batch_size=5

timeStampToReadFrom = 271
IdxToReadFrom = 271*44100//2048

net = prevNet

chopinChromas = np.zeros((batch_size,NumOfTimeStepsToRead, ChromasPerTimeStep))

with open('Data/ChopinChromagrams.csv', 'r') as f:
    rdr = csv.reader(f)
    batch_num=0
    for idx, row in enumerate(rdr):
        if IdxToReadFrom <= idx < IdxToReadFrom+NumOfTimeStepsToRead:

            chopinChromas[batch_num,idx-IdxToReadFrom,:] = row[1:]
            
            if idx == IdxToReadFrom+NumOfTimeStepsToRead-1:
                batch_num+=1
                IdxToReadFrom = IdxToReadFrom+NumOfTimeStepsToRead

            if batch_num==batch_size:
                break

chopinChromas = torch.Tensor(chopinChromas).double().to(device)

#Input into the net and get them back on the CPU.
chopin_predictions = net(chopinChromas).to(cpuDevice)
chopin_predictions = chopin_predictions.view(batch_size*NumOfTimeStepsToRead,num_of_chord_classes)

In [679]:
#Get output and turn it back into chords.
#chopin_predictions = (batch_num, time_step, chord_values)
secondsPerTimeStep = 2048/44100
chopin_chord_numbers = np.zeros((chopin_predictions.shape[0]))
timesAndChords = []

prevChord = -1

for time_step in range(NumOfTimeStepsToRead*batch_size):
    
    #Which chord has the highest score.
    maxIdx = torch.argmax(chopin_predictions[time_step]).item()
    
    #Add this chord number to an array to be median filtered.
    chopin_chord_numbers[time_step] = maxIdx

    #Work out the entropy in this decision
    entropy = torch.distributions.Categorical(probs=chopin_predictions[time_step]).entropy().item()
    
    #Get the score of the chord out
    chordScore = torch.max(chopin_predictions[time_step]).item()
    
    #Get the chord symbol out
    chord = chord_dict_reverse[maxIdx]
    if chord != prevChord:
        #New chord onset!
        timesAndChords += [[time_step*secondsPerTimeStep, chord]]
    
        prevChord = chord
    
    #print('Time:', time_step*secondsPerTimeStep, 'Chord:', chord, 'Score:', chordScore, 'Entropy:', entropy)
for time, chord in timesAndChords:
    print("Time: %4.1f" % (time), 'Chord:', chord)


Time:  0.0 Chord: F:maj
Time:  0.0 Chord: N
Time:  0.2 Chord: Bb:maj
Time:  2.3 Chord: N
Time:  3.0 Chord: Bb:maj
Time:  4.6 Chord: N
Time:  5.0 Chord: E:maj
Time:  5.2 Chord: N
Time:  5.9 Chord: A:maj
Time:  7.0 Chord: N
Time:  7.2 Chord: D:maj
Time:  7.6 Chord: N
Time:  8.3 Chord: C:maj
Time:  8.5 Chord: C:min
Time:  8.5 Chord: C:maj
Time:  8.6 Chord: C:min
Time:  9.1 Chord: Bb:maj
Time:  9.3 Chord: N
Time:  9.4 Chord: Bb:maj
Time:  9.8 Chord: F:maj
Time:  9.9 Chord: N
Time: 10.0 Chord: F:maj
Time: 10.1 Chord: N
Time: 10.8 Chord: Bb:maj


In [660]:
timesAndChords

[]

In [680]:
from scipy.signal import medfilt

In [681]:
#Median filter the chors and display nicely
chopin_chord_numbers_filtered = medfilt(np.squeeze(chopin_chord_numbers), kernel_size=9)
prevChord = -1
filteredTimesAndChords = []
for i, chordNumber in enumerate(chopin_chord_numbers_filtered):
    if chordNumber != prevChord:
        filteredTimesAndChords += [[i*secondsPerTimeStep, chord_dict_reverse[chordNumber]]]
    prevChord=chordNumber
    
for time, chord in filteredTimesAndChords:
    print("Time: %4.1f" % (time), 'Chord:', chord)

Time:  0.0 Chord: N
Time:  0.2 Chord: F:maj
Time:  0.2 Chord: Bb:maj
Time:  2.3 Chord: N
Time:  3.0 Chord: Bb:maj
Time:  4.6 Chord: N
Time:  5.9 Chord: A:maj
Time:  7.0 Chord: D:maj
Time:  7.6 Chord: N
Time:  8.3 Chord: C:maj
Time:  8.5 Chord: C:min
Time:  9.2 Chord: Bb:maj
Time:  9.8 Chord: F:maj
Time: 10.0 Chord: N
Time: 10.8 Chord: Bb:maj
