In [1]:
import torch
from torch import nn, optim
import torch.nn.functional as nnF
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader
import os, glob, csv
import numpy as np
from random import randint

In [223]:
# Chord vocabulary is defined in this dictionary.
#chord_dict = {'N':0, 'X':0, 'C:maj':1, 'C:min':2, 'Db:maj':3, 'C#:maj':3, 'Db:min':4, 'C#:min':4, 'D:maj':5, 'D:min':6,
#                      'Eb:maj':7, 'D#:maj':7, 'Eb:min':8, 'D#:min':8, 'E:maj':9, 'Fb:maj':9, 'E:min':10, 'F:maj':11, 'F:min':12,
#                      'Gb:maj':13, 'F#:maj':13, 'Gb:min':14, 'F#:min':14, 'G:maj':15, 'G:min':16,
#                      'Ab:maj':17, 'G#:maj':17, 'Ab:min':18, 'G#:min':18, 'A:maj':19, 'A:min':20,
#                      'Bb:maj':21, 'A#:maj':21, 'Bb:min':22, 'A#:min':22, 'B:maj':23, 'Cb:maj':23, 'B:min':24}
#chord_annotations_directory = 'Data/ChordAnnotations/McGill-GroundTruth'
#chord_dict['Cb:maj']


#chord_dict = {'N':0, 'X':0, 'C:maj':1, 'C:min':2, 'Db:maj':3, 'C#:maj':3, 'Db:min':4, 'C#:min':4, 'D:maj':5, 'D:min':6,
#                      'Eb:maj':7, 'D#:maj':7, 'Eb:min':8, 'D#:min':8, 'E:maj':9, 'Fb:maj':9, 'E:min':10, 'F:maj':11, 'F:min':12,
#                      'Gb:maj':13, 'F#:maj':13, 'Gb:min':14, 'F#:min':14, 'G:maj':15, 'G:min':16,
#                      'Ab:maj':17, 'G#:maj':17, 'Ab:min':18, 'G#:min':18, 'A:maj':19, 'A:min':20,
#                      'Bb:maj':21, 'A#:maj':21, 'Bb:min':22, 'A#:min':22, 'B:maj':23, 'Cb:maj':23, 'B:min':24, 'Cb:min':24,
#                      'C:maj7':25, 'C:min7':26, 'C:7':27, 'Db:maj7':28, 'C#:maj7':28, 'Db:min7':29, 'C#:min7':29,
#                      'Db:7':30, 'C#:7':30, 'D:maj7':31, 'D:min7':32, 'D:7':33, 'Eb:maj7':34, 'D#:maj7':34,
#                      'Eb:min7':35, 'D#:min7':35, 'Eb:7':36, 'D#:7':36, 'E:maj7':37, 'Fb:maj7':37, 'E:min7':38,
#                      'E:7':39, 'F:maj7':40, 'F:min7':41, 'F:7':42, 'F#:maj7':43, 'Gb:maj7':43, 'F#:min7':44, 'Gb:min7':44,
#                      'F#:7':45, 'Gb:7':45, 'G:maj7':46, 'G:min7':47, 'G:7':48, 'Ab:maj7':49, 'G#:maj7':49,
#                      'Ab:min7':50, 'G#:min7':50, 'Ab:7':51, 'G#:7':51, 'A:maj7':52, 'A:min7':53, 'A:7':54,
#                      'Bb:maj7':55, 'A#:maj7':55, 'Bb:min7':56, 'A#:min7':56, 'Bb:7':57, 'A#:7':57, 'B:maj7':58,
#                      'Cb:maj7':59, 'B:min7':59, 'Cb:min7':59, 'B:7':60, 'Cb:7':60}
#chord_annotations_directory = 'Data/ChordAnnotations_majmin7/McGill-GroundTruth_majmin7'

chord_dict = {'N':0, 'X':0, 'C:maj':1, 'C:min':2, 'Db:maj':3, 'C#:maj':3, 'Db:min':4, 'C#:min':4, 'D:maj':5, 'D:min':6,
              'Eb:maj':7, 'D#:maj':7, 'Eb:min':8, 'D#:min':8, 'E:maj':9, 'Fb:maj':9, 'E:min':10, 'F:maj':11, 'F:min':12,
              'Gb:maj':13, 'F#:maj':13, 'Gb:min':14, 'F#:min':14, 'G:maj':15, 'G:min':16,
              'Ab:maj':17, 'G#:maj':17, 'Ab:min':18, 'G#:min':18, 'A:maj':19, 'A:min':20,
              'Bb:maj':21, 'A#:maj':21, 'Bb:min':22, 'A#:min':22, 'B:maj':23, 'Cb:maj':23, 'B:min':24,
              'C:maj/3':25, 'C:min/b3':26, 'C:maj/5':27, 'C:min/5':28, 'Db:maj/3':29, 'C#:maj/3':29, 'Db:min/b3':30,
              'C#:min/b3':30, 'Db:maj/5':31, 'C#:maj/5':31, 'Db:min/5':32, 'C#:min/5':32, 'D:maj/3':33, 'D:min/b3':34,
              'D:maj/5':35, 'D:min/5':36, 'Eb:maj/3':37, 'D#:maj/3':37, 'Eb:min/b3':38, 'D#:min/b3':38,
              'Eb:maj/5':39, 'D#:maj/5':39, 'Eb:min/5':40, 'D#:min/5':40, 'E:maj/3':41, 'Fb:maj/3':41, 'E:min/b3':42,
              'E:maj/5':43, 'Fb:maj/5':43, 'E:min/5':44, 'F:maj/3':45, 'F:min/b3':46, 'F:maj/5':47, 'F:min/5':48,
              'Gb:maj/3':49, 'F#:maj/3':49, 'Gb:min/b3':50, 'F#:min/b3':50, 'Gb:maj/5':51, 'F#:maj/5':51,
              'Gb:min/5':52, 'F#:min/5':52, 'G:maj/3':53, 'G:min/b3':54, 'G:maj/5':55, 'G:min/5':56, 'Ab:maj/3':57,
              'G#:maj/3':57, 'Ab:min/b3':58, 'G#:min/b3':58, 'Ab:maj/5':59, 'G#:maj/5':59, 'Ab:min/5':60, 'G#:min/5':60,
              'A:maj/3':61, 'A:min/b3':62, 'A:maj/5':63, 'A:min/5':64, 'Bb:maj/3':65, 'A#:maj/3':65, 'Bb:min/b3':66,
              'A#:min/b3':66, 'Bb:maj/5':67, 'A#:maj/5':67, 'Bb:min/5':68, 'A#:min/5':68, 'B:maj/3':69, 'Cb:maj/3':69,
              'B:min/b3':70, 'Cb:min/b3':70, 'B:maj/5':71, 'Cb:maj/5':71, 'B:min/5':72, 'Cb:min/5':72}
chord_annotations_directory = 'Data/ChordAnnotations_majmininv/McGill-GroundTruth_majmininv'

chord_vocab_size = max(chord_dict.values())+1


In [224]:
train_set_size = 50000
test_set_size = 10000
train_set = np.zeros((train_set_size, 25)) #train set has a certain number of points, and 25 columns for each of the 24 input chroma vals plus 1 output chord symbol
test_set = np.zeros((test_set_size, 25))

def pickRandomChordFile():
    randomFileNumber = randint(0,889)
    filesSearched=0
    for f in os.scandir(chord_annotations_directory):
        if f.name.endswith('.csv'):
            if filesSearched == randomFileNumber:
                return f
            filesSearched += 1

#Pick a random datapoint until this value has reached the train set size.
num_of_datapoints_picked = 0
while num_of_datapoints_picked < train_set_size:
    f = pickRandomChordFile()
    dataPointsSearched=0
    try:
        with open(f, 'r') as chord_file:
            rdr = csv.reader(chord_file)
            #Take it from the middle of the song
            randomDataPointNumber = randint(500, 1500)
            for row in rdr:
                if dataPointsSearched == randomDataPointNumber:
                    #row is ['{TimeInstant}\t{ChordSymbol}']
                    #print(row[0].split('\t'))
                    if row[0].split('\t')[1] == 'X':
                        break
                    train_set[num_of_datapoints_picked, 24] = chord_dict[row[0].split('\t')[1]]

                    chroma_filepath = 'Data/Chromagrams/McGill-Chromagrams/'+f.name[0:4]+'_bothchroma.csv'
                    with open(chroma_filepath, 'r') as chroma_file:
                        rdr = csv.reader(chroma_file)
                        chordRowsSearched=0
                        for row in rdr:
                            if chordRowsSearched == dataPointsSearched:
                                train_set[num_of_datapoints_picked, 0:24] = row[2:]
                                num_of_datapoints_picked += 1
                                break
                            chordRowsSearched += 1
                #print('Datapointssearched:', dataPointsSearched)
                dataPointsSearched += 1
    except:
        print('Error occurred reading file:', f.name)

num_of_datapoints_picked=0
#Do the same for the test set.
while num_of_datapoints_picked <test_set_size:
    f = pickRandomChordFile()
    dataPointsSearched=0
    try:
        with open(f, 'r') as chord_file:
            rdr = csv.reader(chord_file)
            #Take them from the middle of the song
            randomDataPointNumber = randint(500, 3000)
            for row in rdr:
                if dataPointsSearched == randomDataPointNumber:
                    if row[0].split('\t')[1] == 'X':
                        break
                    #row is ['{TimeInstant}\t{ChordSymbol}']
                    test_set[num_of_datapoints_picked, 24] = chord_dict[row[0].split('\t')[1]]

                    chroma_filepath = 'Data/Chromagrams/McGill-Chromagrams/'+f.name[0:4]+'_bothchroma.csv'
                    with open(chroma_filepath, 'r') as chroma_file:
                        rdr = csv.reader(chroma_file)
                        chordRowsSearched=0
                        for row in rdr:
                            if chordRowsSearched == dataPointsSearched:
                                test_set[num_of_datapoints_picked, 0:24] = row[2:]
                                num_of_datapoints_picked += 1
                                break
                            chordRowsSearched += 1
                dataPointsSearched += 1
    except:
        print("Error occurred reading file", f.name)

In [174]:
print(train_set.shape)
print(test_set.shape)

(50000, 25)
(10000, 25)


In [225]:
class ChordDataset(Dataset):
    def __init__(self, chordData):
        self.labels = chordData[:,24]
        self.chromas = chordData[:,0:24]
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, index):
        sample = {
            'chromas': self.chromas[index,:],
            'label': self.labels[index]
        }
        return sample

train_dataset = ChordDataset(train_set)
test_dataset = ChordDataset(test_set)

In [226]:
BATCH_SIZE=64
train_loader = DataLoader(dataset=train_dataset,
                          batch_size=BATCH_SIZE
)
test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE)

In [227]:
class Net(nn.Module):
    #Simple chord estimator. 24 chroma inputs. chord_vocab_size number of outputs.
    def __init__(self):
        super(Net, self).__init__()
        #1 time step net
        self.fc1 = nn.Linear(24, chord_vocab_size)
        #self.fc2 = nn.Linear(12, chord_vocab_size)

    def forward(self, x):
        x = nnF.relu(self.fc1(x))
        #x = nnF.relu(self.fc2(x))
        return x



In [228]:
def multi_acc(y_pred, y_test):
    y_pred_softmax = torch.log_softmax(y_pred, dim = 1)
    _, y_pred_tags = torch.max(y_pred_softmax, dim = 1)    
    
    correct_pred = (y_pred_tags == y_test).float()
    acc = correct_pred.sum() / len(correct_pred)
    
    acc = torch.round(acc) * 100
    
    return acc

In [229]:
accuracy_stats = {
    'train': [],
    "test": []
}
loss_stats = {
    'train': [],
    "test": []
}

In [241]:
net = Net()
net.double()
cpuDevice = torch.device('cpu')
device = torch.device('cuda:0')
net.to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters())

In [256]:
num_of_epochs = 100
prevNet = -1 #Used when testing the network

print('Beginning Training...')

for epoch in range(num_of_epochs):
    # TRAINING
    train_epoch_loss = 0
    train_epoch_acc = 0
    

    net.train()
    for train_batch in train_loader:
        optimizer.zero_grad()
        X_train_batch = train_batch['chromas'].double().to(device)
        y_train_batch = train_batch['label'].long().to(device)
        
        y_train_pred = net(X_train_batch)
        
        train_loss = loss_function(y_train_pred, y_train_batch)
        train_acc = multi_acc(y_train_pred, y_train_batch)
        
        train_loss.backward()
        optimizer.step()
        
        #Add the loss and accuracy for each epoch. Will need to divide by the length of the _loader afterwards.
        train_epoch_loss += train_loss.item()
        train_epoch_acc += train_acc.item()
        
    # VALIDATION    
    with torch.no_grad():
        
        test_epoch_loss = 0
        test_epoch_acc = 0
        
        net.eval()
        for test_batch in test_loader:
            #X_val_batch, y_val_batch = X_val_batch.to(device), y_val_batch.to(device)
            X_test_batch = test_batch['chromas'].double().to(device)
            y_test_batch = test_batch['label'].long().to(device)
            
            y_test_pred = net(X_test_batch)
                
                for j,row in enumerate(y_test_pred):
                    chordPred = chord_dict_reverse[torch.argmax(row).item()]
                    chordActual = chord_dict_reverse[y_test_batch[j].item()]
                    test_preds += [[chordPred, chordActual]]
                print(test_preds)
                #print(y_test_pred.shape)
                #print(y_test_batch.shape)
                        
            test_loss = loss_function(y_test_pred, y_test_batch)
            test_acc = multi_acc(y_test_pred, y_test_batch)
            
            test_epoch_loss += test_loss.item()
            test_epoch_acc += test_acc.item()
            
    loss_stats['train'].append(train_epoch_loss/len(train_loader))
    loss_stats['test'].append(test_epoch_loss/len(test_loader))
    accuracy_stats['train'].append(train_epoch_acc/len(train_loader))
    accuracy_stats['test'].append(test_epoch_acc/len(test_loader))
    prevNet = net
                              
    print(f'Epoch {epoch+0:03}: | Train Loss: {train_epoch_loss/len(train_loader):.5f} | Test Loss: {test_epoch_loss/len(test_loader):.5f} | Train Acc: {train_epoch_acc/len(train_loader):.3f}| Test Acc: {test_epoch_acc/len(test_loader):.3f}')


Beginning Training...
[['Ab:maj', 'Ab:maj'], ['A:maj', 'A:maj'], ['C:maj', 'F:maj'], ['F:maj', 'F:maj'], ['E:maj', 'B:maj'], ['D:maj', 'D:maj'], ['E:maj', 'Bb:maj/5'], ['G:maj', 'G:maj/3'], ['E:maj', 'D:min/5'], ['D:maj', 'D:maj'], ['D:maj', 'D:maj'], ['C:maj', 'C:maj'], ['E:maj', 'E:maj'], ['F:min', 'F:min'], ['G:maj', 'N'], ['D:maj', 'C:maj'], ['A:maj', 'A:maj'], ['G:maj', 'D:maj'], ['D:maj', 'D:maj'], ['F:maj', 'F:maj'], ['E:min', 'G:maj'], ['C:maj', 'A:min/b3'], ['Ab:maj', 'Bb:min'], ['G:maj', 'G:maj'], ['D:maj', 'G:min'], ['A:maj', 'Eb:maj'], ['G:maj', 'G:maj'], ['G:maj', 'G:maj'], ['G:maj', 'N'], ['E:maj', 'E:maj'], ['E:min', 'E:maj'], ['C:maj', 'G:maj'], ['Bb:maj', 'Bb:maj'], ['D:maj', 'E:maj'], ['Db:maj', 'Ab:maj'], ['Eb:maj', 'D:maj'], ['Ab:maj', 'Ab:maj'], ['Ab:maj', 'Ab:min'], ['Ab:maj', 'Ab:maj'], ['Db:maj', 'Db:maj'], ['Bb:maj', 'Bb:maj'], ['F:maj', 'F:maj'], ['Eb:maj', 'F:maj'], ['B:maj', 'B:maj'], ['G:maj', 'G:maj'], ['A:maj', 'B:min'], ['D:maj', 'D:maj'], ['C:maj', 'C:m

KeyboardInterrupt: 

# Test it on Chopin

In [243]:
net = prevNet

In [244]:
#First create a mapping from class number to chord
chord_dict_reverse = {0:'N', 1:'C:maj', 2:'C:min', 3:'Db:maj', 4:'Db:min', 5:'D:maj', 6:'D:min', 7:'Eb:maj',
                     8:'Eb:min', 9:'E:maj', 10:'E:min', 11:'F:maj', 12:'F:min', 13:'Gb:maj', 14:'Gb:min', 15:'G:maj',
                     16:'G:min', 17: 'Ab:maj', 18:'Ab:min', 19:'A:maj', 20:'A:min', 21:'Bb:maj', 22:'Bb:min',
                     23:'B:maj', 24:'B:min', 25:'C:maj/3', 26:'C:min/b3', 27:'C:maj/5', 28:'C:min/5', 29:'Db:maj/3',
                     30:'Db:min/b3', 31:'Db:maj/5', 32:'Db:min/5', 33:'D:maj/3', 34:'D:min/b3', 35:'D:maj/5',
                     36:'D:min/5', 37:'Eb:maj/3', 38:'Eb:min/b3', 39:'Eb:maj/5', 40:'Eb:min/5', 41:'E:maj/3',
                     42:'E:min/b3', 43:'E:maj/5', 44:'E:min/5', 45:'F:maj/3', 46:'F:min/b3', 47:'F:maj/5', 48:'F:min/5',
                     49:'Gb:maj/3', 50:'Gb:min/b3', 51:'Gb:maj/5', 52:'Gb:min/5', 53:'G:maj/3', 54:'G:min/b3',
                     55:'G:maj/5', 56:'G:min/5', 57:'Ab:maj/3', 58:'Ab:min/b3', 59:'Ab:maj/5', 60:'Ab:min/5',
                     61:'A:maj/3', 62:'A:min/b3', 63:'A:maj/5', 64:'A:min/5', 65:'Bb:maj/3', 66:'Bb:min/b3',
                     67:'Bb:maj/5', 68:'Bb:min/5', 69:'B:maj/3', 70:'B:min/b3', 71:'B:maj/5', 72:'B:min/5'}

In [248]:
#Get the intended chopin chroma values.

ChromasPerTimeStep = 24
NumOfTimeStepsToRead = 250
IdxToReadFrom = 271*44100//2048

chopinChromas = np.zeros((NumOfTimeStepsToRead, ChromasPerTimeStep))

with open('Data/ChopinChromagrams.csv', 'r') as f:
    rdr = csv.reader(f)
    for idx, row in enumerate(rdr):
        if  IdxToReadFrom <= idx < IdxToReadFrom + NumOfTimeStepsToRead:
            chopinChromas[idx-IdxToReadFrom,:] = row[1:]
            
        if idx >= IdxToReadFrom+NumOfTimeStepsToRead:
            break

chopinChromas = torch.Tensor(chopinChromas).double().to(device)

#Input into the net and get them back on the CPU.
chopin_predictions = net(chopinChromas).to(cpuDevice)

In [250]:
#Get output and turn it back into chords.
#chopin_predictions = (batch_num, time_step, chord_values)
secondsPerTimeStep = 2048/44100

timesAndChords = []

prevChord = -1

predictedChords = []

for time_step in range(NumOfTimeStepsToRead):
    #print(chopin_predictions[batchnum,time_step])
    chordNumber = torch.argmax(chopin_predictions[time_step]).item()
    predictedChords += [chordNumber]
    #print(maxIdx)
    entropy = torch.distributions.Categorical(probs=chopin_predictions[time_step]).entropy().item()
    val = torch.max(chopin_predictions[time_step]).item()
    chord = chord_dict_reverse[chordNumber]
    #print('Time:', time_step*secondsPerTimeStep, 'Chord:', chord, 'Val:', val, 'Entropy:', entropy)
    
    if chordNumber != prevChord:
        timesAndChords += [[time_step*secondsPerTimeStep, chord_dict_reverse[chordNumber]]]
    prevChord = chordNumber

for time, chord in timesAndChords:
    print("Time: %4.1f" % (time), 'Chord:', chord)

Time:  0.0 Chord: C:maj
Time:  0.0 Chord: F:maj
Time:  0.1 Chord: Bb:maj
Time:  0.4 Chord: F:maj
Time:  0.5 Chord: Bb:maj
Time:  0.6 Chord: F:maj
Time:  1.3 Chord: G:maj
Time:  1.3 Chord: Bb:maj
Time:  2.0 Chord: D:maj
Time:  2.1 Chord: F:maj
Time:  2.5 Chord: Bb:maj
Time:  4.4 Chord: E:maj
Time:  5.0 Chord: A:maj
Time:  7.0 Chord: D:maj
Time:  7.4 Chord: C:maj
Time:  7.4 Chord: G:min
Time:  8.0 Chord: C:maj
Time:  8.1 Chord: C:min
Time:  8.7 Chord: C:maj
Time:  8.8 Chord: Bb:maj
Time:  9.6 Chord: F:maj
Time: 10.3 Chord: Bb:maj


In [251]:
from scipy.signal import medfilt

In [252]:
#Median filter the chors and display nicely
chopin_chord_numbers_filtered = medfilt(predictedChords, kernel_size=25)
prevChord = -1
filteredTimesAndChords = []
for i, chordNumber in enumerate(chopin_chord_numbers_filtered):
    if chordNumber != prevChord:
        filteredTimesAndChords += [[i*secondsPerTimeStep, chord_dict_reverse[chordNumber]]]
    prevChord=chordNumber
    
for time, chord in filteredTimesAndChords:
    print("Time: %4.1f" % (time), 'Chord:', chord)

Time:  0.0 Chord: C:maj
Time:  0.0 Chord: F:maj
Time:  1.3 Chord: G:maj
Time:  1.3 Chord: Bb:maj
Time:  4.4 Chord: E:maj
Time:  5.0 Chord: A:maj
Time:  7.0 Chord: G:min
Time:  7.5 Chord: D:maj
Time:  7.9 Chord: C:min
Time:  8.8 Chord: Bb:maj
Time:  9.6 Chord: F:maj
Time: 10.3 Chord: Bb:maj


In [222]:
#Write the chord times to file.
import csv
with open('ChopinTimesAndChords.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    for time, chord in filteredTimesAndChords:
        writer.writerow([time, chord])
