In [13]:
import os
import scipy.io as spio
import numpy as np
from os.path import join, isdir, isfile
%matplotlib inline
import matplotlib.pyplot as plt
# In-python Chroma extraction (may not be necessary if we use MATLAB-only)
DATA_DIR='../data'

# 'file_name' 'class_name' 'class' 'eng' 'mfc' 'chroma' 't'
# 'keystrength' 'brightness' 'zerocross' 'roughness' 'inharmonic' 'hcdf'

def pad(nparrs):
    shape = tuple((max(c) for c in zip(*(x.shape for x in nparrs))))
    def to_pad(x): return tuple(((0, a - b) for a, b in zip(shape, x.shape)))
    return [np.pad(x, to_pad(x), mode='constant') for x in nparrs]

## List of all features that can be extracted ##
L = ['eng', 'chroma', 't', 'keystrength', 'brightness', 'zerocross', 'roughness', 'inharmonic', 'hcdf']

## Dictionary of feature matrices ##
D = {}

for feature in L:
    X = []
    for genre in sorted(os.listdir(DATA_DIR)):
        path = join(DATA_DIR, genre)
        if not isdir(path): continue
        def load(base, feat): return spio.loadmat(join(path, base))['DAT'][feat][0,0]
        arrs = [load(i, feature) for i in sorted(os.listdir(path))]
        X.append([genre, np.array(pad(arrs))])
        print('Read in', genre, feature)
    [classes, X] = zip(*X)
    X = np.concatenate(pad(X))
    D[feature] = X




Read in blues eng
Read in classical eng
Read in country eng
Read in disco eng
Read in hiphop eng
Read in jazz eng
Read in metal eng
Read in pop eng
Read in reggae eng
Read in rock eng
Read in blues chroma
Read in classical chroma
Read in country chroma
Read in disco chroma
Read in hiphop chroma
Read in jazz chroma
Read in metal chroma
Read in pop chroma
Read in reggae chroma
Read in rock chroma


In [17]:
print(D['eng'][3].shape)
print(D['eng'][7].shape)
print(D['chroma'][2].shape)
print(D['chroma'][5].shape)

(1, 1222)
(1, 1222)
(12, 1222)
(12, 1222)


In [57]:
import itertools

def one_hot(x, n):
    z = np.zeros([n, 10])
    z[:, x] = 1
    return z

trX = np.concatenate(pad(list(itertools.chain(x[:90] for x in X))))
teX = np.concatenate(pad(list(itertools.chain(x[90:] for x in X))))
trY = np.concatenate([one_hot(i, 90) for i in range(10)])
teY = np.concatenate([one_hot(i, 10) for i in range(10)])

p = np.random.permutation(len(trX))
trX, trY = trX[p], trY[p]

print('trX {} trY {} teX {} teY {}'.format(trX.shape, trY.shape, teX.shape, teY.shape))
# todo analyze range, draw pictures


############# The trX, trY, etc... should be by chroma ################

trX (900, 12, 1198) trY (900, 10) teX (100, 12, 1198) teY (100, 10)


In [8]:
import tensorflow as tf
import sys
if '../tools/TensorFlow-Learn/code' not in sys.path:
    sys.path.append('../tools/TensorFlow-Learn/code')
from data_set import DataSet
import regression

train = DataSet(trX, trY)
test = DataSet(teX, teY)

def batchify(t, wildcard=-1):
    t = list(t)
    t[0] = wildcard # why doesn't -1 work?
    return t

# Any batch size on flattened pixel values
x = tf.placeholder("float", shape=batchify(trX.shape, wildcard=None))
y = tf.placeholder("float", shape=batchify(trY.shape, wildcard=None))

x_image = tf.reshape(x, batchify(trX.shape) + [1])

def weight_variable(shape):
  return tf.Variable(tf.truncated_normal(shape, stddev=0.1))

def bias_variable(shape):
  return tf.Variable(tf.constant(0.1, shape=shape))

def conv2d(x, W):
  return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_3x3(x):
  return tf.nn.max_pool(x, ksize=[1, 3, 3, 1],
                        strides=[1, 3, 3, 1], padding='SAME')
# TODO max filter instead?

W_conv1 = weight_variable([5, 50, 1, 4])
b_conv1 = bias_variable([4])
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_3x3(h_conv1)

W_conv2 = weight_variable([4, 10, 4, 16])
b_conv2 = bias_variable([16])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_3x3(h_conv2)

W_conv3 = weight_variable([2, 2, 16, 128])
b_conv3 = bias_variable([128])
h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3)
h_pool3 = max_pool_3x3(h_conv3)

def max_pool_size(orig, filt):
    if (orig // filt) * filt == orig: return orig // filt
    else: return orig // filt + 1

def layer3(x): return max_pool_size(max_pool_size(max_pool_size(x, 3), 3), 3)

l3size = 128 * layer3(trX.shape[1]) * layer3(trX.shape[2])
W_fc1 = weight_variable([l3size, 1024])
b_fc1 = bias_variable([1024])

h_pool3_flat = tf.reshape(h_pool3, [-1, l3size])
h_fc1 = tf.nn.relu(tf.matmul(h_pool3_flat, W_fc1) + b_fc1)

# Don't drop out when testing by setting keep_prob to 1.0
keep_prob = tf.placeholder("float")
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

output = regression.SoftMax(h_fc1_drop, y)

train_step = tf.train.AdamOptimizer(1e-4).minimize(output.cross_entropy)

NUM_CORES = 3 # let me do stuff in the background
sess = tf.Session(config=tf.ConfigProto(inter_op_parallelism_threads=NUM_CORES,
    intra_op_parallelism_threads=NUM_CORES))

with sess.as_default():
    sess.run(tf.initialize_all_variables())

    # TODO add a saver here, this takes forever... Also, time it.
    NUM_EPOCHS = 100
    UPDATE_COARSENESS = 5
    BATCH_SIZE = 50 
    for epoch in range(1, NUM_EPOCHS + 1):
        for i, batch in enumerate(train.new_epoch(BATCH_SIZE), 1):
            tot_batches = train.size // BATCH_SIZE
            two_percent_done = i * 50 // tot_batches
            print(('\rEpoch {:02d}/{:02d} [' + two_percent_done * '-' + (50 - two_percent_done) * ' '
                   + '] {}/{}').format(epoch, NUM_EPOCHS, i, tot_batches), end='')
            sys.stdout.flush()
            train_step.run(feed_dict={x: batch[0], y: batch[1], keep_prob: 0.5})
        print('')
        if epoch == NUM_EPOCHS or UPDATE_COARSENESS and epoch % UPDATE_COARSENESS == 0:
            batch = DataSet(*next(train.new_epoch(100)))
            err = batch.multiclass_error(x, output.y, y, feed_dict={keep_prob:1.0})
            print('Epoch {}/{} 100-sample training error {}'.format(epoch, NUM_EPOCHS, err))

    print('Test error {}'.format(test.multiclass_error(x, output.y, y, feed_dict={keep_prob:1.0})))

sess.close()

11776
Epoch 01/100 [--------------------------------------------------] 18/18
Epoch 02/100 [--------------------------------------------------] 18/18
Epoch 03/100 [--------------------------------------------------] 18/18
Epoch 04/100 [--------------------------------------------------] 18/18
Epoch 05/100 [--------------------------------------------------] 18/18
Epoch 5/100 100-sample training error 0.6699999868869781
Epoch 06/100 [--------------------------------------------------] 18/18
Epoch 07/100 [--------------------------------------------------] 18/18
Epoch 08/100 [--------------------------------------------------] 18/18
Epoch 09/100 [--------------------------------------------------] 18/18
Epoch 10/100 [--------------------------------------------------] 18/18
Epoch 10/100 100-sample training error 0.5600000023841858
Epoch 11/100 [--------------------------------------------------] 18/18
Epoch 12/100 [--------------------------------------------------] 18/18
Epoch 13/100 [-