In [27]:
#### https://www.kaggle.com/dikshabhati2002/let-s-classify-audio-ml-dl

import matplotlib.pyplot as plt
import librosa
import librosa.display
import IPython.display as ipd
from scipy.io import wavfile as wav
import pandas as pd
import math
import os
import numpy as np
import seaborn as sns
import json


from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import SGDClassifier, LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier, XGBRFClassifier
import catboost as cb

from sklearn.metrics import confusion_matrix, accuracy_score, roc_auc_score, roc_curve
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
#from tensorflow.keras.callbacks import ReduceLROnPlateau,EarlyStopping,ModelCheckpoint,LearningRateScheduler
#import tensorflow.keras as keras
#from tensorflow.keras import optimizers
from tensorflow.keras import Sequential
#from tensorflow.keras.layers import *

SymbolAlreadyExposedError: Symbol arg_max is already exposed as ().

In [None]:
sample_rate = 22050
sr = 22050     # sample rate. based on Nyquist frequency, we only care about frequencies up to 10kHz therefor the sample rate will only perserve those frequencies 
n_fft = 2048
hop_length = 512
duration = 29 # length of song to be used (in seconds) 
n_mels=128
samples_per_track = sr * duration
n_mfcc = 40

json_path = r"data.json"

In [None]:
# set a path pointing to folder with data
filepath = '/Users/stephen/Flatiron/Phase4/MIR/Data/genres_original'
# folders contain different genres (classes), lets list all the genres
print(list(os.listdir(f'{filepath}/')))

In [None]:
# load csv with metadata
df = pd.read_csv('/Users/stephen/Flatiron/Phase4/MIR/Data/features_3_sec.csv')
df.head()

In [None]:
df.shape

In [None]:
df

In [None]:
df['label'].value_counts()

In [None]:
x = df[["label", "tempo"]]

fig, ax = plt.subplots(figsize=(16, 8));
sns.boxplot(x = "label", y = "tempo", data = x, palette = 'husl');

plt.title('BPM Boxplot for Genres', fontsize = 20)
plt.xticks(fontsize = 14)
plt.yticks(fontsize = 10);
plt.xlabel("Genre", fontsize = 15)
plt.ylabel("BPM", fontsize = 15)
plt.savefig("BPM_Boxplot.png")

In [None]:
def save_mfcc(dataset_path, json_path, n_mfcc=13, n_fft=2048,
             hop_length=512, num_segments=5):
    # Data storage dictionary
    data = {
        "mapping": [],
        "mfcc": [],
        "labels": [],
    }
    samples_ps = int(samples_per_track/num_segments) # ps = per segment
    expected_vects_ps = math.ceil(samples_ps/hop_length)
    
    # loop through all the genres
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):
        # ensuring not at root
        if dirpath is not dataset_path:
            # save the semantic label
            dirpath_comp = dirpath.split("/")
            semantic_label = dirpath_comp[-1]
            data["mapping"].append(semantic_label)
            print(f"Processing: {semantic_label}")
            
            # process files for specific genre
            for f in filenames:
                if(f==str("jazz.00054.wav")):
                    # As librosa only read files <1Mb
                    continue
                else:
                    # load audio file
                    file_path = os.path.join(dirpath, f)
                    signal,sr = librosa.load(file_path,sr=sample_rate)
                    for s in range(num_segments):
                        start_sample = samples_ps * s
                        finish_sample = start_sample + samples_ps

                        mfcc = librosa.feature.mfcc(signal[start_sample:finish_sample],
                                                    sr = sr,
                                                    n_fft = n_fft,
                                                    n_mfcc = n_mfcc,
                                                    hop_length = hop_length)

                        mfcc = mfcc.T

                        # store mfcc if it has expected length 
                        if len(mfcc)==expected_vects_ps:
                            data["mfcc"].append(mfcc.tolist())
                            data["labels"].append(i-1)
                            print(f"{file_path}, segment: {s+1}")

    with open(json_path,"w") as f:
        json.dump(data,f,indent=4)
     

In [None]:
save_mfcc(filepath,json_path,num_segments=10)
#clear_output()

In [None]:
# loading in the data 
def load_data(filepath):
    with open(filepath, 'r') as f:
        data = json.load(f)

    # convert list to numpy arrays
    inputs = np.array(data['mfcc'])
    targets = np.array(data['labels'])

    return inputs, targets 

In [None]:
# set up variables 
inputs, targets = load_data(r'/Users/stephen/Flatiron/Phase4/MIR/notebooks/stephen/data.json')
inputs.shape

In [None]:
# splitting the data 
# input_train, input_test, target_train, target_test
X_train, X_test, y_train, y_test = train_test_split(inputs, targets, test_size=0.3)
print(X_train.shape, y_train.shape)

In [None]:
model = Sequential()

model.add(Flatten(input_shape=(inputs.shape[1],inputs.shape[2])))
model.add(Dense(512, activation='relu', kernel_regularizer = keras.regularizers.l2(0.001)))
model.add(Dropout(0.3))
model.add(Dense(256, activation='relu', kernel_regularizer = keras.regularizers.l2(0.003)))
model.add(Dropout(0.3))
model.add(Dense(64, activation='relu', kernel_regularizer = keras.regularizers.l2(0.01)))
model.add(Dropout(0.3))
model.add(Dense(32, activation='relu'))
model.add(Dense(10, activation='softmax'))
model.summary()


In [None]:
adam = optimizers.Adam(learning_rate=1e-4)

In [None]:
model.compile(optimizer=adam, 
            loss='sparse_categorical_crossentropy', 
            metrics=['accuracy'])

hist = model.fit(X_train, y_train, 
                validation_data = (X_test, y_test), 
                epochs = 40, 
                batch_size = 32)



In [None]:
def plot_history(hist):
    plt.figure(figsize=(20,15))
    fig, axs = plt.subplots(2)
    # accuracy subplot
    axs[0].plot(hist.history["accuracy"], label="train accuracy")
    axs[0].plot(hist.history["val_accuracy"], label="test accuracy")    
    axs[0].set_ylabel("Accuracy")
    axs[0].legend(loc="lower right")
    axs[0].set_title("Accuracy eval")
    
    # Error subplot
    axs[1].plot(hist.history["loss"], label="train error")
    axs[1].plot(hist.history["val_loss"], label="test error")    
    axs[1].set_ylabel("Error")
    axs[1].set_xlabel("Epoch")
    axs[1].legend(loc="upper right")
    axs[1].set_title("Error eval")
    
    plt.show()


In [None]:
plot_history(hist)

In [None]:
test_error, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
print(f"Test accuracy: {test_accuracy}")

In [None]:
out = model.predict(X_test)
out.shape

In [None]:
X_test.shape