In [2]:
%matplotlib inline

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn as skl
import sklearn.utils, sklearn.preprocessing, sklearn.decomposition, sklearn.svm
import librosa
import librosa.display
from IPython import display

AUDIO_PATH = 'dataset/FMA/fma_small/'
METADATA_PATH = 'dataset/FMA/fma_metadata/'

In [3]:
# A simple wrapper class for (1-channel) audio data
# data is a 1-D NumPy array containing the data
# rate is a number expressing the samples per second
# == Modified from 554X class example code ==
class Audio:
    def __init__(self, data, rate, fn):
        self.data = data
        self.rate = rate
        self.filename = fn.split("/")[-1]
    def play(self):
        return display.Audio(self.data, rate=self.rate)
    def plot_wave(self):
        librosa.display.waveplot(self.data, sr=self.rate)
    def create_spectrum(self, n_fft, hop_length):
        # n_fft = int(self.rate / 20)
        # hop_length = n_fft / 4
        D = librosa.amplitude_to_db(np.abs(librosa.stft(self.data, n_fft=n_fft)), ref=np.max)
        return D
    def create_melspectrum(self, n_fft, hop_length):
        D = librosa.power_to_db(librosa.feature.melspectrogram(self.data, sr=self.rate, n_fft=n_fft, hop_length=hop_length), ref=np.max)
        return D
    def plot_spectrum(self, D, y_axis, hop_length):
        librosa.display.specshow(D, y_axis=y_axis, x_axis='time', sr=self.rate, hop_length=hop_length)
    @classmethod
    def fromfile(cls, fn):
        return cls(*librosa.load(fn, sr=None), fn)

In [4]:
def get_tids_from_directory(audio_dir):
    """Get track IDs from the mp3s in a directory.
    Parameters
    ----------
    audio_dir : str
        Path to the directory where the audio files are stored.
    Returns
    -------
        A list of track IDs.
    """
    tids = []
    for _, dirnames, files in os.walk(audio_dir):
        if dirnames == []:
            tids.extend(int(file[:-4]) for file in files)
    return tids


def get_audio_path(audio_dir, track_id):
    """
    Return the path to the mp3 given the directory where the audio is stored
    and the track ID.
    Examples
    --------
    >>> import utils
    >>> AUDIO_DIR = os.environ.get('AUDIO_DIR')
    >>> utils.get_audio_path(AUDIO_DIR, 2)
    '../data/fma_small/000/000002.mp3'
    """
    tid_str = '{:06d}'.format(track_id)
    return os.path.join(audio_dir, tid_str[:3], tid_str + '.mp3')

In [5]:
tids = get_tids_from_directory(AUDIO_PATH)
print(len(tids))
print(get_audio_path(AUDIO_PATH, tids[0]))

8000
dataset/FMA/fma_small/135/135054.mp3


In [6]:
# Load genres and metadata
tracks = pd.read_csv(os.path.join(METADATA_PATH, "tracks.csv"), index_col=0, header=[0, 1])
keep_cols = [('set', 'split'), ('set', 'subset'), ('track', 'genre_top')]

df_all = tracks[keep_cols]
df_all = df_all[df_all[('set', 'subset')] == 'small'] # only extract FMA_small metadata
df_all['track_id'] = df_all.index
print(df_all.shape)

# Create dictionary of genres from unique genre labels
unique_genres = df_all[('track', 'genre_top')].unique()
dict_genres = { unique_genres[i] : i+1 for i in range(0, len(unique_genres)) } # i+1 because feels weird to have 0 as label
print(dict_genres)

# df_all.head(15)

(8000, 4)
{'Hip-Hop': 1, 'Pop': 2, 'Folk': 3, 'Experimental': 4, 'Rock': 5, 'International': 6, 'Electronic': 7, 'Instrumental': 8}


In [7]:
# Function to process data into spectrogram and genre labels for model
def setup_model_data(df):
    genres = []
    X_spect = np.empty((0, 640, 128))
    count = 0
    for index, row in df.iterrows():
        try:
            count += 1
            tid = int(row['track_id'])
            genre = str(row[('track', 'genre_top')])
            genres.append(dict_genres[genre])
            
            audio = Audio.fromfile(get_audio_path(AUDIO_PATH, tid))
            spect = audio.create_melspectrum(2048, 1024)
            spect = spect.T[:640, :]
            X_spect = np.append(X_spect, [spect], axis=0)
            if count % 100 == 0:
                print("Currently processing: ", count)
        except:
            print("Couldn't process: ", count)
            continue
    y_labels = np.array(genres)
    return X_spect, y_labels

In [8]:
# Split full data into train, validation, test data frames
df_train = df_all[df_all[('set', 'split')]=='training']
df_valid = df_all[df_all[('set', 'split')]=='validation']
df_test = df_all[df_all[('set', 'split')]=='test']

In [11]:
# ONLY RUN AND UNCOMMENT THIS IF YOU NEED TO PROCESS TRAIN DATA AGAIN. 

# X_test, y_test = setup_model_data(df_test)
# np.savez('test_data', X_test, y_test)
# print(X_test.shape, y_test.shape)

In [12]:
# ONLY RUN AND UNCOMMENT THIS IF YOU NEED TO PROCESS VALIDATION DATA AGAIN.

# X_valid, y_valid = setup_model_data(df_valid)
# np.savez('valid_data', X_valid, y_valid)
# print(X_valid.shape, y_valid.shape)

Currently processing:  100
Currently processing:  200
Currently processing:  300
Currently processing:  400
Currently processing:  500
Currently processing:  600
Currently processing:  700
Currently processing:  800
(800, 640, 128) (800,)


In [None]:
# ONLY RUN AND UNCOMMENT THIS IF YOU NEED TO PROCESS TRAINING DATA AGAIN.
# WARNING: TAKES A VERY LONG TIME!!!

X_train, y_train = setup_model_data(df_train)
np.savez('train_arr', X_train, y_train)
print(X_train.shape, y_train.shape)

Currently processing:  100
Currently processing:  200
Currently processing:  300
Currently processing:  400
Currently processing:  500
Currently processing:  600
Currently processing:  700
Currently processing:  800
Currently processing:  900
Currently processing:  1000
Currently processing:  1100
Currently processing:  1200
Currently processing:  1300
Currently processing:  1400
Currently processing:  1500
Currently processing:  1600
Currently processing:  1700
Currently processing:  1800
Currently processing:  1900
Currently processing:  2000
Currently processing:  2100
Currently processing:  2200
Currently processing:  2300
Currently processing:  2400
Currently processing:  2500
Currently processing:  2600
Currently processing:  2700
Currently processing:  2800
Currently processing:  2900
Currently processing:  3000
Currently processing:  3100
Currently processing:  3200
Couldn't process:  3265


In [10]:
# Batch training data because it's too big to be processed all at once

def splitDataFrameIntoSmaller(df, chunkSize = 1600): 
    listOfDf = list()
    numberChunks = len(df) // chunkSize + 1
    for i in range(numberChunks):
        listOfDf.append(df[i*chunkSize:(i+1)*chunkSize])
    return listOfDf

listDf = splitDataFrameIntoSmaller(df_train)
df1_train = listDf[0]
df2_train = listDf[1]
df3_train = listDf[2]
df4_train = listDf[3]
print(df1_train.shape, df2_train.shape, df3_train.shape, df4_train.shape)

(1600, 4) (1600, 4) (1600, 4) (1600, 4)


In [9]:
X1_train, y1_train = setup_model_data(df1_train)
np.savez('train1_arr', X1_train, y1_train)
print(X1_train.shape, y1_train.shape)

Currently processing:  100
Currently processing:  200
Currently processing:  300
Currently processing:  400
Currently processing:  500
Currently processing:  600
Currently processing:  700
Currently processing:  800
Currently processing:  900
Currently processing:  1000
Currently processing:  1100
Currently processing:  1200
Currently processing:  1300
Currently processing:  1400
Currently processing:  1500
Currently processing:  1600
(1600, 640, 128) (1600,)


In [10]:
X2_train, y2_train = setup_model_data(df2_train)
np.savez('train2_arr', X2_train, y2_train)
print(X2_train.shape, y2_train.shape)

Currently processing:  100
Currently processing:  200
Currently processing:  300
Currently processing:  400
Currently processing:  500
Currently processing:  600
Currently processing:  700
Currently processing:  800
Currently processing:  900
Currently processing:  1000
Currently processing:  1100
Currently processing:  1200
Currently processing:  1300
Currently processing:  1400
Currently processing:  1500
Currently processing:  1600
(1600, 640, 128) (1600,)


In [11]:
X4_train, y4_train = setup_model_data(df4_train)
np.savez('train4_arr', X4_train, y4_train)
print(X4_train.shape, y4_train.shape)

Currently processing:  100
Currently processing:  200
Currently processing:  300
Currently processing:  400
Currently processing:  500
Currently processing:  600
Currently processing:  700
Currently processing:  800
Couldn't process:  812
Currently processing:  900
Currently processing:  1000
Currently processing:  1100
Currently processing:  1200
Currently processing:  1300
Currently processing:  1400
Currently processing:  1500
Currently processing:  1600
(1599, 640, 128) (1600,)


In [11]:
X3_train, y3_train = setup_model_data(df3_train)
np.savez('train3_arr', X3_train, y3_train)
print(X3_train.shape, y3_train.shape)

Currently processing:  100
Currently processing:  200
Couldn't process:  296
Couldn't process:  297
Couldn't process:  298
Currently processing:  300
Couldn't process:  331
Currently processing:  400
Currently processing:  500
Currently processing:  600
Couldn't process:  698
Currently processing:  700
Currently processing:  800
Currently processing:  900
Currently processing:  1000
Currently processing:  1100
Currently processing:  1200
Currently processing:  1300
Currently processing:  1400
Currently processing:  1500
Currently processing:  1600
(1595, 640, 128) (1600,)


In [15]:
npzfile = np.load('mel_valid_data.npz')
X_valid = npzfile['arr_0']
y_valid = npzfile['arr_1']
print(X_valid.shape, y_valid.shape)

(800, 640, 128) (800,)


In [12]:
npzfile1 = np.load('train1_arr.npz')
npzfile2 = np.load('train2_arr.npz')
npzfile3 = np.load('train3_arr.npz')
npzfile4 = np.load('train4_arr.npz')
X_train1 = npzfile1['arr_0']
y_train1 = npzfile1['arr_1']
X_train2 = npzfile2['arr_0']
y_train2 = npzfile2['arr_1']
X_train3 = npzfile3['arr_0']
y_train3 = npzfile3['arr_1']
X_train4 = npzfile4['arr_0']
y_train4 = npzfile4['arr_1']

X_train = np.concatenate((X_train1, X_train2, X_train3, X_train4), axis = 0)
y_train = np.concatenate((y_train1, y_train2, y_train3, y_train4), axis = 0)
print(X_train.shape, y_train.shape)

(6394, 640, 128) (6400,)
-80.0 3.814697265625e-06 -48.55885427922595


NameError: name 'y_valid' is not defined

In [16]:
print(np.amin(X_train), np.amax(X_train), np.mean(X_train))
y_train = y_train -1
y_valid = y_valid -1
print(np.amin(y_train), np.amax(y_train), np.mean(y_train))

-80.0 3.814697265625e-06 -48.55885427922595
-1 6 2.5


In [None]:
print(np.amin(y_valid), np.amax(y_valid), np.mean(y_valid))

In [None]:
X_train_raw = librosa.core.db_to_power(X_train, ref=1.0)
print(np.amin(X_train_raw), np.amax(X_train_raw), np.mean(X_train_raw))

In [None]:
X_train_log = np.log(X_train_raw)
print(np.amin(X_train_log), np.amax(X_train_log), np.mean(X_train_log))

In [None]:
X_valid_raw = librosa.core.db_to_power(X_valid, ref=1.0)
X_valid_log = np.log(X_valid_raw)