In [None]:
import os
import numpy as np
import librosa
import librosa.display
import pandas as pd
import matplotlib.pyplot as plt
import torch
from torch.utils.data import Dataset, DataLoader

%matplotlib inline

In [None]:
AUDIO_DIR = 'audio_files/fma_small'

In [None]:
def get_tids_from_directory(audio_dir):
    tids = []
    for _, dirnames, files in os.walk(audio_dir):
        if dirnames == []:
            tids.extend(int(file[:-4]) for file in files)
    return tids

def get_audio_path(audio_dir, track_id):
    tid_str = '{:06d}'.format(track_id)
    return os.path.join(audio_dir, tid_str[:3], tid_str + '.mp3')

In [None]:
tids = get_tids_from_directory(AUDIO_DIR)
print(len(tids))

In [None]:
def create_spectogram(track_id):
    filename = get_audio_path(AUDIO_DIR, track_id)
    y, sr = librosa.load(filename)
    spect = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=1024)
    spect = librosa.power_to_db(spect, ref=np.max)
    return spect.T

In [None]:
def plot_spect(track_id):
    spect = create_spectogram(track_id)
    print(spect.shape)
    plt.figure(figsize=(10, 4))
    librosa.display.specshow(spect.T, y_axis='mel', fmax=8000, x_axis='time')
    plt.colorbar(format='%+2.0f dB')
    plt.show()

In [None]:
filepath = 'audio_files/tracks.csv'
tracks = pd.read_csv(filepath, index_col=0, header=[0, 1])
keep_cols = [('set', 'split'), ('set', 'subset'), ('track', 'genre_top')]

df_all = tracks[keep_cols]
df_all = df_all[df_all[('set', 'subset')] == 'small']

df_all['track_id'] = df_all.index
df_all.head()

In [None]:
dict_genres = {'Electronic': 1, 'Experimental': 2, 'Folk': 3, 'Hip-Hop': 4,
               'Instrumental': 5, 'International': 6, 'Pop': 7, 'Rock': 8}

In [None]:
def create_array(df):
    genres = []
    X_spect = np.empty((0, 640, 128))
    count = 0
    for index, row in df.iterrows():
        try:
            count += 1
            track_id = int(row['track_id'])
            genre = str(row[('track', 'genre_top')])
            spect = create_spectogram(track_id)

            spect = spect[:640, :]
            X_spect = np.append(X_spect, [spect], axis=0)
            genres.append(dict_genres[genre])
            if count % 100 == 0:
                print("Currently processing: ", count)
        except:
            print("Couldn't process: ", count)
            continue
    y_arr = np.array(genres)
    return X_spect, y_arr

In [None]:
df_train = df_all[df_all[('set', 'split')] == 'training']
df_valid = df_all[df_all[('set', 'split')] == 'validation']
df_test = df_all[df_all[('set', 'split')] == 'test']

print(df_train.shape, df_valid.shape, df_test.shape)

In [None]:
X_test, y_test = create_array(df_test)
print(X_test.shape, y_test.shape)

In [None]:
X_test_raw = librosa.db_to_power(X_test, ref=1.0)
X_test_log = np.log(X_test_raw)

X_test_torch = torch.FloatTensor(X_test_log)
y_test_torch = torch.LongTensor(y_test - 1)

In [None]:
np.savez('test_arr', X_test_log, y_test - 1)

In [None]:
X_valid, y_valid = create_array(df_valid)
X_valid_raw = librosa.db_to_power(X_valid, ref=1.0)
X_valid_log = np.log(X_valid_raw)

np.savez('valid_arr', X_valid_log, y_valid - 1)

In [None]:
def splitDataFrameIntoSmaller(df, chunkSize=1600):
    listOfDf = list()
    numberChunks = len(df) // chunkSize + 1
    for i in range(numberChunks):
        listOfDf.append(df[i * chunkSize:(i + 1) * chunkSize])
    return listOfDf

In [None]:
listDf = splitDataFrameIntoSmaller(df_train)
X_train_list = []
y_train_list = []

for i, sub_df in enumerate(listDf):
    print(f"Processing train chunk {i + 1}/{len(listDf)}")
    X_chunk, y_chunk = create_array(sub_df)
    X_train_list.append(X_chunk)
    y_train_list.append(y_chunk)

In [None]:
X_train = np.concatenate(X_train_list, axis=0)
y_train = np.concatenate(y_train_list, axis=0)

print(X_train.shape, y_train.shape)

In [None]:
X_train_raw = librosa.db_to_power(X_train, ref=1.0)
X_train_log = np.log(X_train_raw)

y_train = y_train - 1

In [None]:
def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

X_train, y_train = unison_shuffled_copies(X_train_log, y_train)

In [None]:
np.savez('shuffled_train', X_train, y_train)
np.savez('shuffled_valid', X_valid_log, y_valid - 1)