In [18]:
import os
import tensorflow as tf
from tensorflow.python.ops import rnn, rnn_cell
from tensorflow.python.ops.rnn_cell import BasicRNNCell, BasicLSTMCell, GRUCell
import pandas as pd
import numpy as np
import pickle
import math
import librosa
import matplotlib.pyplot as plt
from tensorflow.python.platform import gfile
from time import strftime, localtime, time

In [2]:
# Feature Extraction function

def get_features(tids):
    feature_list = []
    try:
        for n, tid in enumerate(tids):
            tid, features= compute_features(tid)
            feature_list.append(features)
            print("Extracted features audio track", n)
    except Exception as e:
        print('{}: {}'.format(tid, repr(e)))

    return np.array(feature_list)

def compute_features(tid):
    threshold = 1278900
    timeseries_length = 2498
    hop_length = 512
    try:
        filepath = get_audio_path('music/music_training', tid)
        x, sr = librosa.load(filepath, sr=None, mono=True, duration=29.0)  # kaiser_fast
        x = x.tolist()
        if(len(x) < threshold):
            raise ValueError('song length is shorter than threshold')
        else:
            x = x[:int(1278900)]#131000
        x = np.array(x)
        data = np.zeros((timeseries_length, 33), dtype=np.float64)
        mfcc = librosa.feature.mfcc(x, sr=sr, hop_length=hop_length, n_mfcc=13)
        spectral_center = librosa.feature.spectral_centroid(x, sr=sr, hop_length=hop_length)
        chroma = librosa.feature.chroma_stft(x, sr=sr, hop_length=hop_length)
        spectral_contrast = librosa.feature.spectral_contrast(x, sr=sr, hop_length=hop_length)
        data[:, 0:13] = mfcc.T[0:timeseries_length, :]
        data[:, 13:14] = spectral_center.T[0:timeseries_length, :]
        data[:, 14:26] = chroma.T[0:timeseries_length, :]
        data[:, 26:33] = spectral_contrast.T[0:timeseries_length, :]

    except Exception as e:
        print('{}: {}'.format(tid, repr(e)))
        return tid, 0

    return tid, data

def get_audio_path(audio_dir, track_id):
    tid_str = '{:06d}'.format(track_id)
    return os.path.join(audio_dir, tid_str[:3], tid_str + '.mp3')



In [16]:
#Extract Training Sets

metadata_path = 'dataset/track_metadata.csv'
label_column_name = 'listens'
is_train_mode = True
label_dict = {'low': 0,
 'medium': 1,
 'high': 2}

metadata_df = pd.read_csv(metadata_path)
if is_train_mode:
    metadata_df = metadata_df[metadata_df['set_split'] == 'training']
else:
    metadata_df = metadata_df[metadata_df['set_split'] == 'validation']
track_ids = np.array(metadata_df['track_id'])
tn = len(track_ids)
mix = get_features(track_ids)

########IMPORTANT############ Modify Training Sets  

split = 5
n = mix.shape[0]
h = mix.shape[1]//split
w = mix.shape[2]
mixe = np.zeros((n*split, h, w))
for i in range(split):
    mixe[i*n:(i+1)*n, :, :h] = mix[:,i*h:(i+1)*h,:]
label_array = np.zeros((metadata_df.shape[0]*split, len(label_dict)))
labels = metadata_df[label_column_name].values
for j in range(split):
    for i, label in enumerate(labels):
        label_pos = label_dict.get(label)
        label_array[j*tn + i, label_pos] = 1
ys = label_array.copy()

((3995, 499, 33), (31905, 3))

In [115]:
#Save as pkl
with open('mixe.pkl', 'wb') as mixf:
    pickle.dump(mixe, mixf, protocol=pickle.HIGHEST_PROTOCOL)
with open('ye.pkl', 'wb') as yf:
    pickle.dump(ys, yf, protocol=pickle.HIGHEST_PROTOCOL)

In [116]:
# Load Training Sets

class MacOSFile(object):
    def __init__(self, f):
        self.f = f

    def __getattr__(self, item):
        return getattr(self.f, item)

    def read(self, n):
        if n >= (1 << 31):
            buffer = bytearray(n)
            pos = 0
            while pos < n:
                size = min(n - pos, 1 << 31 - 1)
                chunk = self.f.read(size)
                buffer[pos:pos + size] = chunk
                pos += size
            return buffer
        return self.f.read(n)

with open("mixe.pkl", 'rb') as mixf:
	Xd = pickle.load(MacOSFile(mixf))
with open("ye.pkl", 'rb') as yf:
	yd = pickle.load(yf)

In [23]:
###############Extract Validation/Test Sets###############

is_train_mode = False

metadata_df = pd.read_csv(metadata_path)
if is_train_mode:
    metadata_df = metadata_df[metadata_df['set_split'] == 'training']
else:
    metadata_df = metadata_df[metadata_df['set_split'] == 'validation'] ######################## Modify here to insert test index
track_ids_val = np.array(metadata_df['track_id'])
vn = len(track_ids_val)
mix_val = get_features(track_ids_val)
########IMPORTANT############ Modify Training Sets
split = 5
n = mix_val.shape[0]
h = mix_val.shape[1]//split
w = mix_val.shape[2]
mixe_val = np.zeros((n*split, h, w))
for i in range(split):
    mixe_val[i*n:(i+1)*n, :, :h] = mix_val[:,i*h:(i+1)*h,:]
label_array_val = np.zeros((metadata_df.shape[0]*split, len(label_dict)))
labels_val = metadata_df[label_column_name].values
for j in range(split):
    for i, label in enumerate(labels_val):
        label_pos = label_dict.get(label)
        label_array_val[j*vn + i, label_pos] = 1
ys_val = label_array_val.copy()
mixe_val.shape, ys_val.shape

((3995, 499, 33), (3995, 3))

In [24]:
#Save as pkl
with open('mixe_val.pkl', 'wb') as mix_valf:
    pickle.dump(mixe_val, mix_valf, protocol=pickle.HIGHEST_PROTOCOL)
with open('ye_val.pkl', 'wb') as y_valf:
    pickle.dump(ys_val, y_valf, protocol=pickle.HIGHEST_PROTOCOL)

In [25]:
#Load Test Sets

with open('mixe_val.pkl', 'rb') as mix_valf:
	Xd_val = pickle.load(mix_valf)
with open('ye_val.pkl', 'rb') as y_valf:
	yd_val = pickle.load(y_valf)

In [19]:
# Model for training and validation

tf.reset_default_graph()

n_input = 33 * 499
n_classs = 3
image_height = 33
timesteps = 499
# rnn property
num_hidden = 250
num_layers = 2

# Placeholder and variables
# TODO : declare placeholder and variables

X = tf.placeholder(tf.float32, [None, timesteps, image_height])
y = tf.placeholder(tf.int64, [None, n_classs])
is_training = tf.placeholder(tf.bool)

# Build model
# TODO : build your model here
# Model

def model(X,y,is_training):
    w_out = tf.get_variable("W_out", shape=[num_hidden, n_classs])
    b_out = tf.get_variable("b_out", shape=[n_classs])

    x = tf.unstack(X, timesteps, 1)    
    stack_rnn = []
    for i in range(num_layers):
        cell = tf.nn.rnn_cell.BasicLSTMCell(num_hidden)
        cell = tf.nn.rnn_cell.DropoutWrapper(cell=cell, output_keep_prob=0.8)
        stack_rnn.append(cell)
    stacked_cell = tf.nn.rnn_cell.MultiRNNCell(stack_rnn, state_is_tuple=True)

    outputs, last_states = rnn.static_rnn(stacked_cell, x, dtype=tf.float32)

    logits = tf.matmul(outputs[-1], w_out) + b_out
    return logits

y_out = model(X,y,is_training)

# Loss and optimizer
# TODO : declare loss and optimizer operation

total_loss = tf.losses.softmax_cross_entropy(y,logits=y_out) 
mean_loss = tf.reduce_mean(total_loss)
optimizer = tf.train.AdamOptimizer(1e-4) 
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(extra_update_ops):
    train_step = optimizer.minimize(mean_loss)    
correct_prediction = tf.equal(tf.argmax(y_out,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))



In [None]:
# properties
# General
# TODO : declare additional properties
# not fixed (change or add property as you like)
batch_size = 128
epoch_num = 5
print_every = 10

# fixed
metadata_path = 'dataset/track_metadata.csv'
# True if you want to train, False if you already trained your model
# TODO : IMPORTANT !!! Please change it to False when you submit your code
is_train_mode = False
validation = True
# TODO : IMPORTANT !!! Please specify the path where your best model is saved
# example : checkpoint/run-0925-0348
checkpoint_path = 'checkpoint/mix5_rnn_2_250'

# X, y, mean_loss,correct_prediction,train_step, accuracy = my_model(lr = 2e-4, rl = 1e-4,  is_training= is_train_mode)
#load data

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    saver.restore(sess, checkpoint_path)			
    if is_train_mode:
        variables = [mean_loss,correct_prediction,train_step]
        train_indicies = np.arange(Xd.shape[0])
        np.random.shuffle(train_indicies)
        iter_cnt = 0
        for e in range(epoch_num):              
            correct = 0
            losses = []
            for i in range(int(math.ceil(Xd.shape[0]/batch_size))):
                start_idx = (i*batch_size)%Xd.shape[0]
                idx = train_indicies[start_idx:start_idx+batch_size]
                feed_dict = {X: Xd[idx,:],
                             y: yd[idx] }
                actual_batch_size = yd[idx].shape[0]
                loss, corr, _ = sess.run(variables,feed_dict=feed_dict)
                losses.append(loss*actual_batch_size)
                correct += np.sum(corr)
                if is_train_mode and (iter_cnt % print_every) == 0:
                    print("Iteration {0}: with minibatch training loss = {1:.3g} and accuracy of {2:.2g}"\
                          .format(iter_cnt,loss,np.sum(corr)/actual_batch_size))
                iter_cnt += 1
            total_correct = correct/Xd.shape[0]
            total_loss = np.sum(losses)/Xd.shape[0]
            print("Epoch {2}, Overall loss = {0:.3g} and accuracy of {1:.3g}"\
              .format(total_loss,total_correct,e+1))
            plt.plot(losses)
            plt.grid(True)
            plt.title('Epoch {} Loss'.format(e+1))
            plt.xlabel('minibatch number')
            plt.ylabel('minibatch loss')
            plt.show()
        print('Training finished !')
#         output_dir = checkpoint_path + '/run-%02d%02d-%02d%02d' % tuple(localtime(time()))[1:5]
#         output_dir = checkpoint_path
#         if not gfile.Exists(output_dir):
#             gfile.MakeDirs(output_dir)
#         saver.save(sess, output_dir)
#         print('Model saved in file : %s' % output_dir)

    if validation:
        correct = 0
        losses = []
        preds = np.zeros([1,3])
        train_indicies = np.arange(Xd_val.shape[0])
        variables = [mean_loss,correct_prediction,y_out]
        for j in range(int(math.ceil(Xd_val.shape[0]/batch_size))):
            start_idx = (j*batch_size)%Xd_val.shape[0]
            idx = train_indicies[start_idx:start_idx+batch_size]
            feed_dict_val = {X: Xd_val[idx, :],
                         y: yd_val[idx] }
            actual_batch_size = yd_val[idx].shape[0]
            loss, corr, predict = sess.run(variables,feed_dict=feed_dict_val)
            preds = np.concatenate((preds, predict), axis = 0)
            losses.append(loss*actual_batch_size)
            correct += np.sum(corr)
        votes = np.zeros([Xd_val.shape[0]//split, 3])
        for n, i in enumerate(preds[1:]):
#             votes[n%(Xd_val.shape[0]//split),np.argmax(i)] += 1
            votes[n%(Xd_val.shape[0]//split),:] += i
        total_val_correct = correct/Xd_val.shape[0]
        total_val_loss = np.sum(losses)/Xd_val.shape[0]
        sum_total_val_correct = sum(((np.argmax(votes, axis = 1) == np.argmax(yd_val[:Xd_val.shape[0]//split], axis = 1))))/(Xd_val.shape[0]//split)
        print("Validation loss, Overall loss = {0:.3g}, accuracy of {1:.3g}"\
          .format(total_val_loss,sum_total_val_correct))


INFO:tensorflow:Restoring parameters from checkpoint/mix5_rnn_2_250
