In [64]:
import os

import numpy as np
import pandas as pd

import scipy
from scipy.sparse import csc_matrix, csr_matrix, load_npz
from sklearn.preprocessing import OneHotEncoder

import matplotlib.pyplot as plt

from chord_rec_lib import delta_t #find_files, format_name

In [2]:
from chord_rec_lib import dnames

HEAD_DIR = '..'
for d in dnames:
    dnames[d] = os.path.join(HEAD_DIR, dnames[d])

In [3]:
dnames

{'BB_DS_DIR': '../base_data/McGill-Billboard',
 'BB_PARS_DS_DIR': '../base_data/billboard-2.0.1-lab/McGill-Billboard',
 'CL_DS_DIR': '../base_data/chordlab',
 'CSVS_DIR': '../csvs',
 'RAW_SONGS_DIR': '../raw_songs',
 'WAV_SONGS_DIR': '../wav_songs',
 'SPECTRS_DIR': '../spectrs',
 'SONGS_PARSED_DIR': '../songs_parsed',
 'CHORDS_DIR': '../chords',
 'X_DS_DIR': '../x_dataset'}

# Load data

In [4]:
def gen_fname(X_id):
    return 'X_{}.npz'.format(X_id)
def gen_fnames(X_ids):
    return ['X_{}.npz'.format(X_id) for X_id in X_ids]

def get_sample(X_id):
    return load_npz(os.path.join(dnames['X_DS_DIR'], 'X_{}.npz'.format(X_id)))

In [84]:
X_ds = pd.read_csv(os.path.join(dnames['CSVS_DIR'], 'X_ds.csv'), index_col=0)
X_ds = X_ds.fillna('N')
X_ds.head()

Unnamed: 0,song_id,fullname,starttime,endtime,chord_id,duration,id,tone,majmin
0,906,E:(1),119.730158,119.780948,100,0.05079,0,E,N
1,906,E:(1),119.780948,119.831738,100,0.05079,1,E,N
2,906,E:(1),119.831738,119.882528,100,0.05079,2,E,N
3,906,E:(1),119.882528,119.933318,100,0.05079,3,E,N
4,906,E:(1),119.933318,119.984108,100,0.05079,4,E,N


In [88]:
X_ds['majmin_enc'] = [line for line
                      in OneHotEncoder(sparse=False).fit_transform(X_ds['majmin'].values.reshape(-1,1))]

In [89]:
X_ds.index = X_ds['majmin']
idxs = X_ds.sample(n=2000, weights=X_ds.groupby('majmin').count()['id'])['id']
X_ds.index = X_ds['id']
#idxs = X_ds.sample(n=2000)['id']
whole_ds = np.array([get_sample(i) for i in idxs])
whole_ds = np.array([s.toarray().flatten() for s in whole_ds])
X_train, y_train = whole_ds[:1500], X_ds.loc[idxs[:1500]]['majmin_enc']
X_test, y_test = whole_ds[1500:], X_ds.loc[idxs[1500:]]['majmin_enc']

Defaulting to column, but this will raise an ambiguity error in a future version
  


# Modeling

In [34]:
import tensorflow as tf
import keras
import keras.layers as L
from keras.optimizers import SGD

## Dense model. Just for fun

In [None]:
keras.utils.

In [33]:
s = whole_ds[0]
print(s.toarray().flatten().shape)
del s

(1290,)


In [132]:
model = keras.Sequential()
model.add(L.Dense(1024, activation='relu', input_dim=1290))
model.add(L.Dense(256, activation='relu'))
model.add(L.Dense(64, activation='relu'))
model.add(L.Dense(3, activation='softmax', input_dim=64))

sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

In [115]:
y_train = [y.reshape(-1,1) for y in y_train]
y_train = np.concatenate(y_train).reshape(1500,3)

In [128]:
y_test = [y.reshape(-1,1) for y in y_test]
y_test = np.concatenate(y_test).reshape(500,3)

In [133]:
model.fit(X_train, y_train, epochs=20, batch_size=100)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f09d40f0748>

In [134]:
score = model.evaluate(X_test, y_test, batch_size=20)



In [135]:
score

[9.698609790802003, 0.3600000035762787]

Surprisingly it's not so good

## Trying with convolutional network

In [183]:
X_ds.index = X_ds['majmin']
idxs = X_ds.sample(n=2000, weights=X_ds.groupby('majmin').count()['id'])['id']
X_ds.index = X_ds['id']
#idxs = X_ds.sample(n=2000)['id']
whole_ds = np.array([get_sample(i) for i in idxs])
whole_ds = np.array([s.todense().reshape(129,10, 1) for s in whole_ds])
X_train, y_train = whole_ds[:1500], X_ds.loc[idxs[:1500]]['majmin_enc']
X_test, y_test = whole_ds[1500:], X_ds.loc[idxs[1500:]]['majmin_enc']

Defaulting to column, but this will raise an ambiguity error in a future version
  


In [178]:
X_train[0].shape

(129, 10)

In [215]:
model = keras.Sequential()
# input: 129x10 images with 1 channels -> (129, 10, 1) tensors.
# this applies 32 convolution filters of size 3x3 each.
model.add(L.Conv1D(10, (3), activation='relu', input_shape=(129, 10), padding='same'))
model.add(L.Conv1D(32, (3), activation='relu', padding='same'))
model.add(L.MaxPooling1D(pool_size=(2)))
model.add(L.Dropout(0.25))
model.add(L.Conv1D(64, (3), activation='relu', padding='same'))
model.add(L.Conv1D(64, (3), activation='relu', padding='same'))
model.add(L.MaxPooling1D(pool_size=(2)))
model.add(L.Dropout(0.25))

model.add(L.Flatten())
model.add(L.Dense(256, activation='relu'))
model.add(L.Dropout(0.5))
model.add(L.Dense(3, activation='softmax'))

sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

In [192]:
X_train = [X_train[i].reshape(129,10,1) for i in range(len(X_train))]

In [196]:
X_train = np.concatenate(X_train)

In [207]:
X_train = X_train.reshape(1500, 129,10)

In [210]:
y_train = [y.reshape(-1,1) for y in y_train]
y_train = np.concatenate(y_train).reshape(1500,3)

In [216]:
model.fit(X_train, y_train, batch_size=32, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f0995ccb4e0>

In [217]:
y_test = [y.reshape(-1,1) for y in y_test]
y_test = np.concatenate(y_test).reshape(500,3)

In [218]:
score = model.evaluate(X_test, y_test, batch_size=32)



In [219]:
score

[10.002817565917969, 0.3359999997615814]