# MY NOTEBOOK

## CONFIG

In [1]:
%load_ext autoreload
%autoreload

### Imports

In [2]:
import os
import fnmatch
import re
import random
import time
import sys

import librosa
import tensorflow as tf

import pandas as pd
import numpy as np
from matplotlib import pyplot as plt 

import itertools

import progressbar
from time import sleep

from models import *

  from ._conv import register_converters as _register_converters


## Constants

In [3]:
MTT_DIR = "../MTT/"
DATA_DIRECTORY = MTT_DIR + "dataset/"
# CSV containing song details (title, artist, id, mp3_path,...)
CLIP_INFO_FINAL = "clip_info_final.csv"
# CSV containg the labels (clip_id, labels, mp3_path)
ANNOTATIONS_FINAL = "annotations_final.csv"
# CSV : what is it useful for ?
COMPARISONS_FINAL = "comparisons_final.csv"
LABELS_FILE = MTT_DIR + ANNOTATIONS_FINAL

SUB_DIRS = "0123456789abcdef"

AUDIO1_path = "../MTT/mtt_data_mp3.zip/0/american_bach_soloists-j_s__bach_\
_transcriptions_of_italian_music-02-concerto_in_a_minor_for_four_harpsichords\
_bwv_1065_ii_largo-88-117.mp3"

In [74]:
FILE_NB = 25880 #25863
FILE_LENGTH = 465984
# 465984 = 2 × 2 × 2 × 2 × 2 × 2 × 3 × 3 × 809
# useful for batches > for now divide by 9 (instead of 10)

BATCH_NB = 9
BATCH_SIZE = int(FILE_LENGTH/BATCH_NB)
SAMPLE_SIZE = 0
SAMPLE_RATE = 16000
RECEPTIVE_FIELD = 0

EPOCHS = 200

BASIC_CONFIG ={'numOutputNeurons':500}

## Loading tool functions

In [5]:
def load_labels(labels_file_name):
    pd.read_csv(labels_file_name)

#def load_1label(labels_file_name):
 #   data = pd.read_csv(labels_file_name)
  #  print(data.head())

def randomize_files(files):
    for file in files:
        file_index = random.randint(0, (len(files) - 1))
        yield files[file_index]
        
def load_generic_audio(directory, sample_rate):
    '''Generator that yields audio waveforms from the directory.'''
    files = find_files(directory)
    # format of content of files :
    # <../MTT/dataset/9/the_kokoon-berlin-07-sm_art-146-175.mp3>
    # files[i][len(DATA_DIRECTORY):] extract the part corresponding
    # to mp3_path in annotations csv :  <9/the_kokoon-berlin-07-sm_art-146-175.mp3>

 #   x = load_1label(LABELS_FILE)
  #  return 
    #print("files length: {}".format(len(files)))
   # randomized_files = randomize_files(files)
    count = 0
   # for filename in randomized_files:
    for filename in files:

        try :
            audio, _ = librosa.load(filename, sr=sample_rate, mono=True)
            audio = audio.reshape(-1, 1)
            count +=1
            yield audio, filename, count
        except EOFError :
            print("EOFERROR : The following file could not be loaded with librosa - ", filename)



def find_files(directory, pattern='*.mp3', sample=None):
    '''Recursively finds all files matching the pattern.'''
    files = []
    for root, dirnames, filenames in os.walk(directory):
        for filename in fnmatch.filter(filenames, pattern):
            files.append(os.path.join(root, filename))
    if sample!=None :
        try:
            return files[:sample]
        except TypeError:
            print("Argument sample should be either None, or an integer :\
             the number of first n samples to take.")
    else :
        return files

## Load and clean labels for prediction

In [6]:
labels = pd.read_csv(LABELS_FILE, sep = '"\t"')

  """Entry point for launching an IPython kernel.


In [7]:
# Prepare header to put back in the end
# remove quotes and take all columns except the first one
header = list(map(lambda x : x.replace('"', ''), labels))[1:]
# add back the first column, separated in two
header = ['clip_id', 'no_voice']+header
# create dictionary
header = dict(enumerate(header))

In [8]:
# Solve format problem : two first columns are merged
# extract first column and rest
left, right = labels['"clip_id\t""no voice"'], labels.iloc[:, 1:]
# split first column in two part at separator "\t"
split = left.str.split(pat = "\t", expand=True).replace('"', '')

# put back the first column which is now two, with the rest
cleaned = pd.concat([split, right], axis=1, ignore_index=True) 
# clean by removing quotes and add back header
cleaned = cleaned.apply(lambda col : col.apply(lambda x : x.replace('"', ''))).rename(columns = header)
cleaned.head(5)

Unnamed: 0,clip_id,no_voice,singer,duet,plucking,hard rock,world,bongos,harpsichord,female singing,...,rap,metal,hip hop,quick,water,baroque,women,fiddle,english,mp3_path
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,f/american_bach_soloists-j_s__bach_solo_cantat...
1,6,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,f/american_bach_soloists-j_s__bach_solo_cantat...
2,10,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,f/american_bach_soloists-j_s__bach_solo_cantat...
3,11,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,f/american_bach_soloists-j_s__bach_solo_cantat...
4,12,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,f/american_bach_soloists-j_s__bach_solo_cantat...


In [9]:
## TODO : Some statistics on the MTT dataset ?
nb_labels_per_song = cleaned.iloc[:,1:-1].astype(int).sum(axis=1)
nb_song_per_label = cleaned.iloc[:,1:-1].astype(int).sum(axis=0)

In [10]:
print("Number of songs : " , cleaned.shape[0])
print("Number of labels : " , cleaned.shape[1])
print("Max number of songs tagged with the same label : ",max(nb_song_per_label))
print("Max number of labels for a single song : ",max(nb_labels_per_song))

Number of songs :  25863
Number of labels :  190
Max number of songs tagged with the same label :  4852
Max number of labels for a single song :  27


In [11]:
#plt.hist(nb_labels_per_song)#, bins = [0,20,40,60,80,100]) 
#plt.title("histogram") 
#plt.show()

In [12]:
sample_labels = cleaned[['clip_id', 'no_voice', 'singer', 'mp3_path']]
sample_labels.shape

(25863, 4)

## Load and format mp3 data 

In [13]:
iterator = load_generic_audio(DATA_DIRECTORY, None)

## Neural Network model

In [127]:
# TODO : not efficient to 
# - give the whole label dataset to the function as argument
# - look in the label dataset at each iteration to retrieve the label
#   > should concat before randomize
# - append to an array and then convert to numpy ? > check

def load_audio_label(labels, directory, sample_rate, num_songs):
    files = find_files(directory, sample=num_songs)
        # format of content of files :
        # <../MTT/dataset/9/the_kokoon-berlin-07-sm_art-146-175.mp3>
        # files[i][len(DATA_DIRECTORY):] extract the part corresponding
        # to mp3_path in annotations csv :  <9/the_kokoon-berlin-07-sm_art-146-175.mp3>

     #   x = load_1label(LABELS_FILE)
      #  return 
        #print("files length: {}".format(len(files)))
    randomized_files = randomize_files(files)
    count = 0
    audios = np.ndarray(shape=(num_songs, BATCH_NB, BATCH_SIZE, 1), dtype=float, order='F')
    tags = np.ndarray(shape=(num_songs, BATCH_NB, labels.shape[1]-2, 1), dtype=float, order='F')
    
    for filename in randomized_files:
    #    for filename in files:

        #if count > 200 :
         #   return
        
        try :
            audio, _ = librosa.load(filename, sr=sample_rate, mono=True)
        except EOFError :
            print("EOFERROR : The following file could not be loaded with librosa - ", filename)
        #print(audio)
        audio = audio.reshape(-1, 1)
        #print(audio)
        #create batches
        audio_batch = np.ndarray(shape=(BATCH_NB, BATCH_SIZE, 1), dtype=float, order='F')
        label_batch = np.ndarray(shape=(BATCH_NB, labels.shape[1]-2, 1), dtype=float, order='F')
        for n in range(BATCH_NB) :
            audio_batch[n] = audio[n*BATCH_SIZE: (n+1)*BATCH_SIZE,:]
            label_batch[n] = labels.loc[labels['mp3_path']==filename[len(DATA_DIRECTORY):]] \
                               .values[:, 1:-1].reshape(labels.shape[1]-2,1)
        #print(len(audio_batch), len(audio_batch[0]), len(audio_batch[0][0]))
        
        #audios.append(audio_batch) 
        audios[count] = audio_batch
        tags[count] = label_batch
        
        count +=1
        if (count % 100) == 0:
            print(count)
            #print("AUDIO : ", audio)
            #print("LABELS : ", labels.loc[labels['mp3_path']==filename[len(DATA_DIRECTORY):]])
            #print("FILENAME :", filename)
         
        # TODO : create batches > here or later to rereandomize the order ?

        #print()
        
        
    return audios, tags
        #yield audio, labels, count

In [128]:
# load audios and labels > convert to numpy
# CAREFUL : the argument num_songs is important and shouldn't be too big 
# > otherwise MEMORY ISSUES !!!!!!
audios_sample, tags_sample = load_audio_label(sample_labels, DATA_DIRECTORY, None, num_songs = 3)
#audios_sample = np.asarray(audios_sample)
tags_sample = np.asarray(tags_sample)

In [129]:
print("Shape of audios list :")
print(audios_sample.shape)
print()
print(">> shape is : [num_song, num_batches, batch_size, 1]")


Shape of audios list :
(3, 9, 51776, 1)

>> shape is : [num_song, num_batches, batch_size, 1]


In [132]:
print("Shape of tags list :")
print(tags_sample.shape)
print()
print(">> shape is : [num_song, num_labels (mp3 file and id excluded), 1] ")
print("(or : [num_song, num_labels, 1, str_len, 1])")

Shape of tags list :
(3, 9, 2, 1)

>> shape is : [num_song, num_labels (mp3 file and id excluded), 1] 
(or : [num_song, num_labels, 1, str_len, 1])


In [139]:
# put all batches at the same level (remove song dimension in the array)
# ex : go from dimensions (3, 9, 51776, 1) > to (27, 51776, 1)
audios_sample_flatten = audios_sample.reshape(-1, audios_sample.shape[-2], audios_sample.shape[-1])
tags_sample_flatten = tags_sample.reshape(-1, tags_sample.shape[-2], tags_sample.shape[-1])

In [140]:
voice_no_voice = tags_sample_flatten[:,1,:]
voice_no_voice.shape

(27, 1)

In [141]:
### EXAMPLE FROM TOWARDS DATASCIENCE
    #EPOCHS = 10
    #BATCH_SIZE = 16
    ## using two numpy arrays
    #features, labels = (np.array([np.random.sample((100,2))]), 
    #                    np.array([np.random.sample((100,1))]))
    #dataset = tf.data.Dataset.from_tensor_slices((features,labels)).repeat().batch(BATCH_SIZE)
    #iter = dataset.make_one_shot_iterator()
    #x, y = iter.get_next()
    ## make a simple model
    #net = tf.layers.dense(x, 8, activation=tf.tanh) # pass the first value from iter.get_next() as input
    #net = tf.layers.dense(net, 8, activation=tf.tanh)
    #prediction = tf.layers.dense(net, 1, activation=tf.tanh)
    #loss = tf.losses.mean_squared_error(prediction, y) # pass the second value from iter.get_net() as label
    #train_op = tf.train.AdamOptimizer().minimize(loss)
    #with tf.Session() as sess:
    #    sess.run(tf.global_variables_initializer())
    #    for i in range(EPOCHS):
    #        _, loss_value = sess.run([train_op, loss])
    #        print("Iter: {}, Loss: {:.4f}".format(i, loss_value))

In [145]:
#audio,_,_ = next(iterator)

# Load the first song (entirely, without cutting into 3 pieces)
#print(audios[0])
#audio_np = np.asarray(audios_sample[0], np.float32)
audio_tf= tf.convert_to_tensor(audios_sample_flatten, np.float32)
print(audio_tf.shape)

# TODO : do I need to include the labels in the audio batch ?
net = build_model(audio_tf, is_training=True, config=BASIC_CONFIG) 
prediction = tf.layers.dense(net, 1, activation=tf.tanh) # should be the nb pf layers > activation sigmoid
loss = tf.losses.mean_squared_error(prediction, voice_no_voice) # pass the second value from iter.get_net() as label
train_op = tf.train.AdamOptimizer().minimize(loss)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    for i in range(EPOCHS):
        _, loss_value = sess.run([train_op, loss])
        print("Iter: {}, Loss: {:.4f}".format(i, loss_value))


(27, 51776, 1)
Iter: 0, Loss: 0.3362
Iter: 1, Loss: 0.2398
Iter: 2, Loss: 0.1775
Iter: 3, Loss: 0.1845
Iter: 4, Loss: 0.2480
Iter: 5, Loss: 0.2809
Iter: 6, Loss: 0.1809
Iter: 7, Loss: 0.1597
Iter: 8, Loss: 0.2054
Iter: 9, Loss: 0.1596
Iter: 10, Loss: 0.1308
Iter: 11, Loss: 0.2189
Iter: 12, Loss: 0.2347
Iter: 13, Loss: 0.1724
Iter: 14, Loss: 0.2413
Iter: 15, Loss: 0.1937
Iter: 16, Loss: 0.1857
Iter: 17, Loss: 0.2029
Iter: 18, Loss: 0.1291
Iter: 19, Loss: 0.1768
Iter: 20, Loss: 0.1764
Iter: 21, Loss: 0.0937
Iter: 22, Loss: 0.1230
Iter: 23, Loss: 0.1274
Iter: 24, Loss: 0.1406
Iter: 25, Loss: 0.0779
Iter: 26, Loss: 0.1005
Iter: 27, Loss: 0.1233
Iter: 28, Loss: 0.0865
Iter: 29, Loss: 0.0960
Iter: 30, Loss: 0.0908
Iter: 31, Loss: 0.0841
Iter: 32, Loss: 0.0938
Iter: 33, Loss: 0.1221
Iter: 34, Loss: 0.0803
Iter: 35, Loss: 0.1015
Iter: 36, Loss: 0.0873
Iter: 37, Loss: 0.0798
Iter: 38, Loss: 0.0669
Iter: 39, Loss: 0.0929
Iter: 40, Loss: 0.0853
Iter: 41, Loss: 0.0548
Iter: 42, Loss: 0.0578
Iter: 

In [143]:
# run this cell everytime before relaunching tensorflow session
tf.reset_default_graph()

In [144]:
len(tags)



NameError: name 'tags' is not defined