In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Setting

Setup codes

In [None]:
import numpy as np
import os
import nltk
import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm

using gpu

In [None]:
if torch.cuda.is_available():
  print('Good to go!')
else:
  print('Please set GPU via Edit -> Notebook Settings.')
  
device = torch.device('cuda:0')

Good to go!


1. create chord template.
2. chord dictionary
3. path definition
4. files declaration

In [None]:
np.set_printoptions(linewidth=np.inf)

pcp = ['A(bass)', 'Bb(bass)', 'B(bass)', 'C(bass)', 'C#(bass)', 'D(bass)', 'Eb(bass)', 'E(bass)', 'F(bass)', 'F#(bass)', 'G(bass)', 'Ab(bass)', 'A', 'Bb', 'B', 'C', 'C#', 'D', 'Eb', 'E', 'F', 'F#', 'G', 'Ab']

chord2index = {'N': 0, 'B#:maj': 1, 'C:maj': 1, 'C#:maj': 2, 'Db:maj': 2, 'D:maj': 3, 'D#:maj': 4, 'Eb:maj': 4, 'E:maj': 5, 'Fb:maj': 5, 'E#:maj': 6, 'F:maj': 6, 'F#:maj': 7, 'Gb:maj': 7, 'G:maj': 8, 'G#:maj': 9, 'Ab:maj': 9, 'A:maj': 10, 'A#:maj': 11, 'Bb:maj': 11, 'B:maj': 12, 'Cb:maj': 12, 'B#:min': 13, 'C:min': 13, 'C#:min': 14, 'Db:min': 14, 'D:min': 15, 'D#:min': 16, 'Eb:min': 16, 'E:min': 17, 'Fb:min': 17, 'E#:min': 18, 'F:min': 18, 'F#:min': 19, 'Gb:min': 19, 'G:min': 20, 'G#:min': 21, 'Ab:min': 21, 'A:min': 22, 'A#:min': 23, 'Bb:min': 23, 'B:min': 24, 'Cb:min': 24, 'X': 25}
index2chord = {0: 'N', 1: 'C:maj', 2: 'C#:maj', 3: 'D:maj', 4: 'Eb:maj', 5: 'E:maj', 6: 'F:maj', 7: 'F#:maj', 8: 'G:maj', 9: 'Ab:maj', 10: 'A:maj', 11: 'Bb:maj', 12: 'B:maj', 13: 'C:min', 14: 'C#:min', 15: 'D:min', 16: 'Eb:min', 17: 'E:min', 18: 'F:min', 19: 'F#:min', 20: 'G:min', 21: 'Ab:min', 22: 'A:min', 23: 'Bb:min', 24: 'B:min', 25: 'X'}

num_chords = len(index2chord)

chroma_path = '/content/drive/MyDrive/ML Project/McGill-Billboard-2.0-chordino/'
chord_path = '/content/drive/MyDrive/ML Project/McGill-Billboard-2.0-mirex/'

files = os.listdir(chroma_path)

np.random.seed(1)

# Choose the files to use and split the files (train, validation, test)

In [None]:
def get_file_infos():
  file_info = dict()
  for file in tqdm(files):
    f = open(chord_path + file + '/majmin.lab', 'r')
    length = 0
    X_num = 0
    Major_num = 0
    Minor_num = 0
    for line in f:
        if not line: break
        start_end_chord = line.split()
        if len(start_end_chord) < 3: break
        length += 1
        chord = start_end_chord[2].lower()
        if chord == 'x':
          X_num += 1
        elif 'maj' in chord:
          Major_num += 1
        elif 'min' in chord:
          Minor_num += 1
    file_info[file] = (length, X_num, Major_num, Minor_num)
    f.close()
  return file_info

In [None]:
file_infos = get_file_infos()

100%|██████████| 890/890 [05:05<00:00,  2.92it/s]


In [None]:
valid_files = [file for file in file_infos if file_infos[file][0] < 200 and file_infos[file][1] < 10]
print(len(files))
print(len(valid_files))

np.random.shuffle(valid_files)
cut1 = int(0.8 * len(valid_files))
cut2 = int(0.9 * len(valid_files))
train_files = valid_files[:cut1]
validation_files = valid_files[cut1:cut2]
test_files = valid_files[cut2:]

print(len(train_files))
print(len(validation_files))
print(len(test_files))

890
588
470
59
59


# Task 1: MLP - Data Preprocessing

Making chroma_samples (length 24 float tensor), chord_labels (length 26 float tensor) (one-hot encoded)

In [None]:
def get_chroma_chord_data(file):
  chroma_tensors = []
  chord_tensors = []

  chroma_data = open(chroma_path + file + '/bothchroma.csv', 'r')
  chord_data = open(chord_path + file + '/majmin.lab', 'r')

  chord_line = chord_data.readline()

  start_end_chord = chord_line.split()
  end = float(start_end_chord[1])
  for chroma_line in chroma_data:
    time_chroma = chroma_line.split(',')
    t = float(time_chroma[1])
    if t > end:
      chord_line = chord_data.readline()
      if not chord_line: break
      start_end_chord = chord_line.split()
      if len(start_end_chord) < 3: break
      end = float(start_end_chord[1])

    chroma = time_chroma[2:]
    chroma[-1] = chroma[-1][:-1]
    chroma = list(map(float, chroma))
    chord = start_end_chord[2]
    chroma_tensors.append(torch.tensor(chroma, dtype = torch.float32, device = device))
    chord_tensors.append(torch.tensor([chord2index[chord]], dtype = torch.int64, device = device))

  chroma_data.close()
  chord_data.close()

  chroma_samples = torch.stack(chroma_tensors, dim=0)
  chord_labels = F.one_hot(torch.cat(chord_tensors, dim=0), num_classes=num_chords)
  return chroma_samples, chord_labels

In [None]:
chroma_tensors = []
chord_tensors = []

for file in tqdm(train_files):
  chroma_data = open(chroma_path + file + '/bothchroma.csv', 'r')
  chord_data = open(chord_path + file + '/majmin.lab', 'r')

  chord_line = chord_data.readline()

  start_end_chord = chord_line.split()
  end = float(start_end_chord[1])
  for chroma_line in chroma_data:
    time_chroma = chroma_line.split(',')
    t = float(time_chroma[1])
    if t > end:
      chord_line = chord_data.readline()
      if not chord_line: break
      start_end_chord = chord_line.split()
      if len(start_end_chord) < 3: break
      end = float(start_end_chord[1])

    chroma = time_chroma[2:]
    chroma[-1] = chroma[-1][:-1]
    chroma = list(map(float, chroma))
    chord = start_end_chord[2]
    chroma_tensors.append(torch.tensor(chroma, dtype = torch.float32, device = device))
    chord_tensors.append(torch.tensor([chord2index[chord]], dtype = torch.int64, device = device))

  chroma_data.close()
  chord_data.close()
train_chroma_samples = torch.stack(chroma_tensors, dim=0)
train_chord_labels = F.one_hot(torch.cat(chord_tensors, dim=0), num_classes=num_chords)

100%|██████████| 470/470 [04:17<00:00,  1.83it/s]


In [None]:
chroma_tensors = []
chord_tensors = []

for file in tqdm(validation_files):
  chroma_data = open(chroma_path + file + '/bothchroma.csv', 'r')
  chord_data = open(chord_path + file + '/majmin.lab', 'r')

  chord_line = chord_data.readline()

  start_end_chord = chord_line.split()
  end = float(start_end_chord[1])
  for chroma_line in chroma_data:
    time_chroma = chroma_line.split(',')
    t = float(time_chroma[1])
    if t > end:
      chord_line = chord_data.readline()
      if not chord_line: break
      start_end_chord = chord_line.split()
      if len(start_end_chord) < 3: break
      end = float(start_end_chord[1])

    chroma = time_chroma[2:]
    chroma[-1] = chroma[-1][:-1]
    chroma = list(map(float, chroma))
    chord = start_end_chord[2]
    chroma_tensors.append(torch.tensor(chroma, dtype = torch.float32, device = device))
    chord_tensors.append(torch.tensor([chord2index[chord]], dtype = torch.int64, device = device))

  chroma_data.close()
  chord_data.close()
validation_chroma_samples = torch.stack(chroma_tensors, dim=0)
validation_chord_labels = F.one_hot(torch.cat(chord_tensors, dim=0), num_classes=num_chords)

100%|██████████| 59/59 [00:33<00:00,  1.74it/s]


In [None]:
chroma_tensors = []
chord_tensors = []

for file in tqdm(test_files):
  chroma_data = open(chroma_path + file + '/bothchroma.csv', 'r')
  chord_data = open(chord_path + file + '/majmin.lab', 'r')

  chord_line = chord_data.readline()

  start_end_chord = chord_line.split()
  end = float(start_end_chord[1])
  for chroma_line in chroma_data:
    time_chroma = chroma_line.split(',')
    t = float(time_chroma[1])
    if t > end:
      chord_line = chord_data.readline()
      if not chord_line: break
      start_end_chord = chord_line.split()
      if len(start_end_chord) < 3: break
      end = float(start_end_chord[1])

    chroma = time_chroma[2:]
    chroma[-1] = chroma[-1][:-1]
    chroma = list(map(float, chroma))
    chord = start_end_chord[2]
    chroma_tensors.append(torch.tensor(chroma, dtype = torch.float32, device = device))
    chord_tensors.append(torch.tensor([chord2index[chord]], dtype = torch.int64, device = device))

  chroma_data.close()
  chord_data.close()
test_chroma_samples = torch.stack(chroma_tensors, dim=0)
test_chord_labels = F.one_hot(torch.cat(chord_tensors, dim=0), num_classes=num_chords)

100%|██████████| 59/59 [00:29<00:00,  2.02it/s]


In [None]:
print(train_chroma_samples.shape)
print(train_chord_labels.shape)
print(validation_chroma_samples.shape)
print(validation_chord_labels.shape)
print(test_chroma_samples.shape)
print(test_chord_labels.shape)

torch.Size([1995376, 24])
torch.Size([1995376, 26])
torch.Size([257653, 24])
torch.Size([257653, 26])
torch.Size([252085, 24])
torch.Size([252085, 26])


Split training data in order to choose a certain number of samples of each chord

In [None]:
train_data = torch.concat([train_chroma_samples, train_chord_labels.argmax(1).unsqueeze(1)], dim=1)

splitted_train_data = [[] for i in range(num_chords)]
for i in tqdm(range(train_data.shape[0])):
  row = train_data[i, :]
  label = int(row[-1].item())
  splitted_train_data[label].append(row)

100%|██████████| 1995376/1995376 [00:40<00:00, 49728.38it/s]


In [None]:
for i in range(num_chords):
  print('{:2} {:8} chord has {:6} samples'.format(i, '('+index2chord[i]+')', len(splitted_train_data[i])))

 0 (N)      chord has  91377 samples
 1 (C:maj)  chord has 185461 samples
 2 (C#:maj) chord has  70049 samples
 3 (D:maj)  chord has 163768 samples
 4 (Eb:maj) chord has  96068 samples
 5 (E:maj)  chord has 153347 samples
 6 (F:maj)  chord has 135023 samples
 7 (F#:maj) chord has  65753 samples
 8 (G:maj)  chord has 180865 samples
 9 (Ab:maj) chord has  92791 samples
10 (A:maj)  chord has 153873 samples
11 (Bb:maj) chord has  98615 samples
12 (B:maj)  chord has  82192 samples
13 (C:min)  chord has  30615 samples
14 (C#:min) chord has  30101 samples
15 (D:min)  chord has  50286 samples
16 (Eb:min) chord has  29200 samples
17 (E:min)  chord has  43541 samples
18 (F:min)  chord has  25757 samples
19 (F#:min) chord has  31811 samples
20 (G:min)  chord has  32438 samples
21 (Ab:min) chord has  10033 samples
22 (A:min)  chord has  57539 samples
23 (Bb:min) chord has  13591 samples
24 (B:min)  chord has  40313 samples
25 (X)      chord has  30969 samples


# Task 1: Viterbi - Data Preprocessing

In [None]:
def get_chord_sequences_for_files(files):
  delta_t = 2048/44100
  chord_sequences = []

  for file in tqdm(files):
    chord_data = open(chord_path + file + '/majmin.lab', 'r')
    chord_line = chord_data.readline()

    start_end_chord = chord_line.split()
    end = float(start_end_chord[1])
    t = 0
    song = []
    while True:
      t += delta_t
      if t > end:
        chord_line = chord_data.readline()
        if not chord_line: break
        start_end_chord = chord_line.split()
        if len(start_end_chord) < 3: break
        end = float(start_end_chord[1])

      chord = start_end_chord[2]
      song.append(chord2index[chord])

    chord_sequences.append(song)
    chord_data.close()
  return chord_sequences

In [None]:
train_chord_sequences = get_chord_sequences_for_files(train_files)

100%|██████████| 470/470 [00:00<00:00, 499.78it/s]


bigrams and cfd calculation

In [None]:
chord_bigrams = []

for chord_sequence in tqdm(train_chord_sequences):
    bigrams = list(nltk.bigrams(chord_sequence))
    chord_bigrams = chord_bigrams + bigrams
cfd = nltk.ConditionalFreqDist(chord_bigrams)
cfd.tabulate()

100%|██████████| 470/470 [00:21<00:00, 22.28it/s]


        0      1      2      3      4      5      6      7      8      9     10     11     12     13     14     15     16     17     18     19     20     21     22     23     24     25 
 0  91192     77     38     70     37     72     60     18     72     40     55     31     20     15      7     19      6     15      9      2     10      2     22      2      9     40 
 1     88 181853      3    439     17     37    971      0    812     23     46    252    104     56      0    240      0     70     62      0     54      0    195      5     74     59 
 2     32     16  68765      6    106      3      5    324      0    464      5     14     20     17     10      0    123      0     22     38      3      8      0     43      0     24 
 3     72    368      6 160929     12    233     42      9    867      1    557     29     92      7     54     25      0    111      0     32     37      0     98      0    157     40 
 4     42     25     62     25  94907      9    105     31     11    3

Create transition matrix, A

In [None]:
# calculate transition probability matrix A
transition_matrix = torch.zeros((num_chords, num_chords), device=device)
for i in range(num_chords):
    for j in range(num_chords):
         transition_matrix[i, j] = cfd[i][j]

sum = torch.sum(transition_matrix, dim=1).view(-1, 1)
sum[0] += 1
transition_matrix = torch.div(transition_matrix, sum)

transition_matrix.shape

torch.Size([26, 26])

# Task 2: LSTM - Data Preprocessing

In [None]:
train_chord_sequences = get_chord_sequences_for_files(train_files)
validation_chord_sequences = get_chord_sequences_for_files(validation_files)
test_chord_sequences = get_chord_sequences_for_files(test_files)

100%|██████████| 470/470 [00:00<00:00, 489.69it/s]
100%|██████████| 59/59 [00:00<00:00, 426.16it/s]
100%|██████████| 59/59 [00:00<00:00, 452.43it/s]


# Save Data

saving objects in data folder

In [None]:
torch.save({
    # General data
    'file_infos' : file_infos,
    'train_files' : train_files,
    'validation_files' : validation_files,
    'test_files' : test_files,
    
    # Task 1: MLP data
    'train_chroma_samples': train_chroma_samples,
    'train_chord_labels': train_chord_labels,
    'validation_chroma_samples': validation_chroma_samples,
    'validation_chord_labels': validation_chord_labels,
    'test_chroma_samples': test_chroma_samples,
    'test_chord_labels': test_chord_labels,
    'splitted_train_data': splitted_train_data,
    
    # Task 1: Viterbi data
    'transition_matrix': transition_matrix,

    # Task 2: LSTM data
    'train_chord_sequences': train_chord_sequences,
    'validation_chord_sequences': validation_chord_sequences,
    'test_chord_sequences': test_chord_sequences    
}, '/content/drive/MyDrive/ML Project/data/all.tar')

In [None]:
torch.save({
    # General data
    'file_infos' : file_infos,
    'train_files' : train_files,
    'validation_files' : validation_files,
    'test_files' : test_files
}, '/content/drive/MyDrive/ML Project/data/General_data.tar')

In [None]:
torch.save({
    # Task 2: LSTM data
    'train_chord_sequences': train_chord_sequences,
    'validation_chord_sequences': validation_chord_sequences,
    'test_chord_sequences': test_chord_sequences    
}, '/content/drive/MyDrive/ML Project/data/LSTM_data.tar')

load saved objects

In [None]:
checkpoint = torch.load('/content/drive/MyDrive/ML Project/data/all.tar')

# General data
file_infos = checkpoint['file_infos']
train_files = checkpoint['train_files']
validation_files = checkpoint['validation_files']
test_files = checkpoint['test_files']

# Task 1: MLP data
train_chroma_samples = checkpoint['train_chroma_samples']
train_chord_labels = checkpoint['train_chord_labels']
validation_chroma_samples = checkpoint['validation_chroma_samples']
validation_chord_labels = checkpoint['validation_chord_labels']
test_chroma_samples = checkpoint['test_chroma_samples']
test_chord_labels = checkpoint['test_chord_labels']
splitted_train_data = checkpoint['splitted_train_data']

# Task 1: Viterbi data
transition_matrix = checkpoint['transition_matrix']

# Task 2: LSTM data
train_chord_sequences = checkpoint['train_chord_sequences'] 
validation_chord_sequences = checkpoint['validation_chord_sequences']
test_chord_sequences = checkpoint['test_chord_sequences']