In [5]:
cd ../

/Users/mattgraziano/Google Drive/Documents/Learning/IU/Fall18/engr511/project/engr511


In [6]:
from tqdm import tqdm_notebook as tqdm
import matplotlib.pyplot as plt
from glob import glob
import pandas as pd
import numpy as np
import librosa
from livelossplot import PlotLossesKeras
import scipy.fftpack

In [7]:
from keras.models import Model
from keras.layers import Input, Dense, Softmax, Dropout, BatchNormalization, Activation

from keras.utils import to_categorical

from sklearn.preprocessing import LabelEncoder

import keras

In [8]:
from src.Progression_Modeling import Load, Prepare, Mel_transform, normalize, melspectrogram, trim, CQT_transform, pseudo_cqt, Chroma
import soundfile as sf
from scipy.signal import stft

## Import data

In [9]:
# recording_filenames = glob("data/recordings/*/*.wav", recursive=True)

In [10]:
recording_filenames = glob("data/recordings_new/*/*/*.wav", recursive=True)

In [11]:
len(recording_filenames)

334

In [12]:
A0 = 27.5
A1 = 55.00
C1 = 32.70
A2 = 110.00
C2 = 65.41
E6 = 1318.51
C7 = 2093.00
C8 = 4186.01

In [13]:
def nth_largest_col_sum(arr, n=-80):
    # column sum
    agg = np.sum(arr, axis=0)
    agg.sort()
    return agg[n]
    

In [14]:
def get_spectrogram(signal, sr=44100, spectype="mel"):
    if spectype == "mel":
        spectrogram = melspectrogram(Prepare(signal), sr=sr, fmin=C2, fmax=C7, n_fft=2048 * 2, hop_length=1024)
        spectrogram_norm = pow(normalize(spectrogram), .25)
        
    elif spectype == "cqt":
        A1 = 55.00
        spectrogram = abs(pseudo_cqt(Prepare(signal), sr, fmin=A1, n_bins=120, bins_per_octave=24, sparsity=.95, window=('kaiser', 10)))
        spectrogram_norm = normalize(spectrogram, norm='l1', axis=0)
        
    elif spectype == "chroma":
        CQT = CQT_transform(Prepare(signal), sr)
        spectrogram = Chroma(CQT)
        spectrogram_norm = normalize(spectrogram, norm='l1', axis=0)
        
    elif spectype == "stft":
        spectrogram = stft(signal, fs=sr, window="hann")[2]
        spectrogram_norm = np.real(normalize(spectrogram, norm='l1', axis=0))

    return spectrogram_norm

In [15]:
def get_clean_x(signal, sr=44100, nth_largest=-45, spectype="mel"):
    spectrogram = get_spectrogram(signal, sr=sr, spectype=spectype)

    # Get 45th largest value
    # This is needed to noisy data
    nth_largest_threshold = nth_largest_col_sum(spectrogram, nth_largest)
    
    # remove all columsn whose sum is less than t
    clean = spectrogram[:, np.sum(spectrogram, axis=0) >= nth_largest_threshold]
    
    # Get column averages of the max n columns in lieu of using the entire spectrogram
    clean = np.mean(clean, axis=1)
    
    return clean

In [16]:
def get_clean_x(signal, sr=44100, nth_largest=-45, spectype="mel"):
    spectrogram = get_spectrogram(signal, sr=sr, spectype=spectype)

    # Get 45th largest value
    # This is needed to noisy data
    nth_largest_threshold = nth_largest_col_sum(spectrogram, nth_largest)
    
    # remove all columsn whose sum is less than t
    clean = spectrogram[:, np.sum(spectrogram, axis=0) >= nth_largest_threshold]
    
    # Get column averages of the max n columns in lieu of using the entire spectrogram
    clean = np.mean(clean, axis=1)
    
    return clean

In [17]:
def get_clean_x_avg_all(signal, sr=44100, spectype="mel"):
    spectrogram = get_spectrogram(signal, sr=sr, spectype=spectype)

    # Get column averages of the max n columns in lieu of using the entire spectrogram
    clean = np.mean(spectrogram, axis=1)
    
    return clean

# Modeling:

## Grid Search - K-fold cross val

In [18]:
n_folds=10

In [19]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

  from numpy.core.umath_tests import inner1d


In [20]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from keras.wrappers.scikit_learn import KerasClassifier

import warnings
warnings.filterwarnings('ignore')

### Neural Network Functions

In [21]:
# Define various neural network architectures to cross validate
def mod1(input_shape, output_shape, lay1_units, lay2_units, lay3_units, activation='relu'):
    # 3 layer
    in_data = Input(shape=(input_shape, ))

    lay1 = Dense(lay1_units, activation=activation)(in_data)
    lay1 = BatchNormalization()(lay1)

    lay2 = Dense(lay2_units, activation=activation)(lay1)
    lay2 = BatchNormalization()(lay2)

    lay3 = Dense(lay3_units, activation=activation)(lay2)
    lay3 = BatchNormalization()(lay3)

    
    output = Dense(output_shape, activation = "softmax")(lay3)

    model = Model(inputs=[in_data],
                  outputs=[output])

    model.compile(optimizer=keras.optimizers.Adam(),
                  loss="categorical_crossentropy",
                  metrics=['acc'])
    
    return model


def mod2(input_shape, output_shape, lay1_units, lay2_units, activation='relu'):
    # 2 layer
    in_data = Input(shape=(input_shape, ))

    lay1 = Dense(lay1_units, activation=activation)(in_data)
    lay1 = BatchNormalization()(lay1)

    lay2 = Dense(lay2_units, activation=activation)(lay1)
    lay2 = BatchNormalization()(lay2)
    
    output = Dense(output_shape, activation = "softmax")(lay2)

    model = Model(inputs=[in_data],
                  outputs=[output])

    model.compile(optimizer=keras.optimizers.Adam(),
                  loss="categorical_crossentropy",
                  metrics=['acc'])
    
    return model


def mod3(input_shape, output_shape, lay1_units, activation='relu'):
    # 1 layer
    in_data = Input(shape=(input_shape, ))

    lay1 = Dense(lay1_units, activation=activation)(in_data)
    lay1 = BatchNormalization()(lay1)
    
    output = Dense(output_shape, activation = "softmax")(lay1)

    model = Model(inputs=[in_data],
                  outputs=[output])

    model.compile(optimizer=keras.optimizers.Adam(),
                  loss="categorical_crossentropy",
                  metrics=['acc'])
    
    return model



def mod4(input_shape, output_shape, lay1_units, lay2_units, lay3_units, activation='relu'):
    # Test if dropout adds anything
    in_data = Input(shape=(input_shape, ))

    lay1 = Dense(lay1_units, activation=activation)(in_data)
    lay1 = Dropout(.3)(lay1)
    lay1 = BatchNormalization()(lay1)

    lay2 = Dense(lay2_units, activation=activation)(lay1)
    lay2 = Dropout(.3)(lay2)
    lay2 = BatchNormalization()(lay2)

    lay3 = Dense(lay3_units, activation=activation)(lay2)
    lay3 = Dropout(.3)(lay3)
    lay3 = BatchNormalization()(lay3)

    output = Dense(output_shape, activation = "softmax")(lay3)

    model = Model(inputs=[in_data],
                  outputs=[output])

    model.compile(optimizer=keras.optimizers.Adam(),
                  loss="categorical_crossentropy",
                  metrics=['acc'])
    
    return model


def mod5(input_shape, output_shape, lay1_units, lay2_units, lay3_units, activation='relu'):
    # 3 layer, no batch norm
    in_data = Input(shape=(input_shape, ))

    lay1 = Dense(lay1_units, activation=activation)(in_data)

    lay2 = Dense(lay2_units, activation=activation)(lay1)

    lay3 = Dense(lay3_units, activation=activation)(lay2)
    
    output = Dense(output_shape, activation = "softmax")(lay3)

    model = Model(inputs=[in_data],
                  outputs=[output])

    model.compile(optimizer=keras.optimizers.Adam(),
                  loss="categorical_crossentropy",
                  metrics=['acc'])
    
    return model


def mod6(input_shape, output_shape, lay1_units, lay2_units, lay3_units, lay4_units, activation='relu'):
    # 4 layer
    in_data = Input(shape=(input_shape, ))

    lay1 = Dense(lay1_units, activation=activation)(in_data)
    lay1 = BatchNormalization()(lay1)

    lay2 = Dense(lay2_units, activation=activation)(lay1)
    lay2 = BatchNormalization()(lay2)

    lay3 = Dense(lay3_units, activation=activation)(lay2)
    lay3 = BatchNormalization()(lay3)

    lay4 = Dense(lay4_units, activation=activation)(lay3)
    lay4 = BatchNormalization()(lay4)

    
    output = Dense(output_shape, activation = "softmax")(lay4)

    model = Model(inputs=[in_data],
                  outputs=[output])

    model.compile(optimizer=keras.optimizers.Adam(),
                  loss="categorical_crossentropy",
                  metrics=['acc'])
    
    return model


def mod7(input_shape, output_shape, lay1_units, lay2_units, lay3_units, lay4_units, lay5_units, activation='relu'):
    # 4 layer
    in_data = Input(shape=(input_shape, ))

    lay1 = Dense(lay1_units, activation=activation)(in_data)
    lay1 = BatchNormalization()(lay1)

    lay2 = Dense(lay2_units, activation=activation)(lay1)
    lay2 = BatchNormalization()(lay2)

    lay3 = Dense(lay3_units, activation=activation)(lay2)
    lay3 = BatchNormalization()(lay3)

    lay4 = Dense(lay4_units, activation=activation)(lay3)
    lay4 = BatchNormalization()(lay4)

    lay5 = Dense(lay5_units, activation=activation)(lay4)
    lay5 = BatchNormalization()(lay5)
    
    output = Dense(output_shape, activation = "softmax")(lay5)

    model = Model(inputs=[in_data],
                  outputs=[output])

    model.compile(optimizer=keras.optimizers.Adam(),
                  loss="categorical_crossentropy",
                  metrics=['acc'])
    
    return model

### Other Models (BASELINE MODELS)

In [None]:
transforms = ['stft', 'mel', 'cqt', 'chroma']

results_param_df = []
for transform in tqdm(transforms):
    # DATA IMPORT
    raw_signals = []
    samp_rates = []
    x_train = []
    for filename in tqdm(recording_filenames):
        sound, sr = sf.read(filename)

        raw_signals.append(sound)
        samp_rates.append(sr)
        cleaned = get_clean_x_avg_all(sound, sr=sr, spectype=transform)
        x_train.append(cleaned.flatten())

    # Standardize input
    x_train = np.array(x_train)
    x_train = (x_train - x_train.min(0)) / x_train.ptp(0)
    

    # Get y categories
    y_train = [i.split('/')[2] for i in recording_filenames]
    le_chord_structure = LabelEncoder()
    le_chord_structure.fit(y_train)
    y_train_int = to_categorical(le_chord_structure.transform(y_train))

    
    # TRAIN MODELS
    
    #RF
    print("RF")
    rf = RandomForestClassifier(n_estimators=500)

    # Adjust max_features based on features available after each transform
    
    features_avail = x_train[0].shape[0]
    max_features_iter = [10, 40, 70, 100]
    
    param_grid = {"max_depth": [3, 5, 10],
                  "max_features": list(np.array(max_features_iter)[np.where(features_avail >= np.array(max_features_iter))[0]]),
                  "min_samples_split": [5, 10, 15],
                  "bootstrap": [True],
                  "criterion": ["gini"]}

    # run grid search
    rf_grid_search = GridSearchCV(rf, param_grid=param_grid, cv=10, n_jobs=8)

    rf_grid_search.fit(x_train, y_train)
    
    # Convert parameter grid and cv results to dataframe
    rf_results_df = pd.DataFrame(rf_grid_search.cv_results_)[['params', 'mean_train_score', 'mean_test_score']]
    rf_results_df['transform'] = transform
    rf_results_df['mod'] = 'RF'
    results_param_df.append(rf_results_df)
    
    
    # LR
    print("LR")
    lr = LogisticRegression()

    param_grid = {"penalty": ['l1', 'l2'],
                  "dual": [False],
                  "fit_intercept": [True, False],
                  "multi_class": ['ovr']}

    # run grid search
    lr_grid_search = GridSearchCV(lr, param_grid=param_grid, cv=10, n_jobs=8)

    lr_grid_search.fit(x_train, y_train)

    # Convert parameter grid and cv results to dataframe
    lr_results_df = pd.DataFrame(lr_grid_search.cv_results_)[['params', 'mean_train_score', 'mean_test_score']]
    lr_results_df['transform'] = transform
    lr_results_df['mod'] = 'LR'
    results_param_df.append(lr_results_df)
    

    # SVM
    print("SVM")
    svm = SVC()

    param_grid = {"C": [.5, 1, 1.5],
                  "kernel": ['rbf', 'poly', 'sigmoid'],
                  "degree": [3, 4, 5],
                  "shrinking": [True, False]}

    # run grid search
    svm_grid_search = GridSearchCV(svm, param_grid=param_grid, cv=10, n_jobs=8)

    svm_grid_search.fit(x_train, y_train)

    # Convert parameter grid and cv results to dataframe
    svm_results_df = pd.DataFrame(svm_grid_search.cv_results_)[['params', 'mean_train_score', 'mean_test_score']]
    svm_results_df['transform'] = transform
    svm_results_df['mod'] = 'SVM'
    results_param_df.append(svm_results_df)

    
    # GBM
    print("GBM")
    gbm = GradientBoostingClassifier(learning_rate=.01, n_estimators=500)

    param_grid = {"subsample": [.8, 1],
                  "learning_rate": [2, 4, 6],
                  "min_samples_leaf": [1, 3, 5],
                  "max_depth": [3, 5, 10]}

    # run grid search
    gbm_grid_search = GridSearchCV(gbm, param_grid=param_grid, cv=10, n_jobs=8)

    gbm_grid_search.fit(x_train, y_train)

    # Convert parameter grid and cv results to dataframe
    gbm_results_df = pd.DataFrame(gbm_grid_search.cv_results_)[['params', 'mean_train_score', 'mean_test_score']]
    gbm_results_df['transform'] = transform
    gbm_results_df['mod'] = 'GBM'
    results_param_df.append(gbm_results_df)



HBox(children=(IntProgress(value=0, max=4), HTML(value='')))

HBox(children=(IntProgress(value=0, max=334), HTML(value='')))

RF
LR
SVM
GBM


HBox(children=(IntProgress(value=0, max=334), HTML(value='')))

RF
LR
SVM
GBM


HBox(children=(IntProgress(value=0, max=334), HTML(value='')))

In [None]:
results_param_df_all = pd.concat(results_param_df)

In [None]:
results_param_df_all.to_csv("data/cv_results/otherMods_gridsearch_results.csv")

## Neural Network Grid search
Standardized Input with average of all frames

In [None]:
# selected_model will be an alias to the model function defined above
# Swap this out for a model defined in the above defs
selected_model = mod1
mod_name = "mod1"

# NOTE: define paramter grid in below cell

In [None]:
def nn_grid_search(selected_model, param_grid, transforms, recording_filenames, mod_name="nn")
    results_param_df = []
    for transform in tqdm(transforms):
        # DATA IMPORT
        raw_signals = []
        samp_rates = []
        x_train = []
        for filename in tqdm(recording_filenames):
            sound, sr = sf.read(filename)

            raw_signals.append(sound)
            samp_rates.append(sr)
            cleaned = get_clean_x_avg_all(sound, sr=sr, spectype=transform)
            x_train.append(cleaned.flatten())

        x_train = np.array(x_train)
        x_train = (x_train - x_train.min(0)) / x_train.ptp(0)

        # Get y categories
        y_train = [i.split('/')[2] for i in recording_filenames]
        le_chord_structure = LabelEncoder()
        le_chord_structure.fit(y_train)
        y_train_int = to_categorical(le_chord_structure.transform(y_train))

        # NN mod 1
        print("NN1")
        mod_nn = KerasClassifier(build_fn=selected_model,
                                 verbose=0)

        # Add input_shape and output shape to param_grid
        param_grid['input_shape'] = [x_train[0].shape[0]]
        param_grid['output_shape'] = [y_train_int.shape[1]]

        mod1_nn_grid_search = GridSearchCV(estimator=mod_nn, param_grid=param_grid, cv=10, n_jobs=-1)
        print("NN1-a")    
        mod1_nn_grid_search.fit(np.array(x_train), y_train_int)
        print("NN1-b")

        # Convert parameter grid and cv results to dataframe
        mod1_nn_results_df = pd.DataFrame(mod1_nn_grid_search.cv_results_)[['params', 'mean_train_score', 'mean_test_score']]
        mod1_nn_results_df['transform'] = transform
        mod1_nn_results_df['mod'] = 'NN_1'
        results_param_df.append(mod1_nn_results_df)
        
        out = pd.concat(results_param_df)
        out['mod_name'] = mod_name
        
    return out

In [None]:
# 3 layers, batch norm
mod_grid = dict(lay1_units=[50, 100],
                lay2_units=[50, 100],
                lay3_units=[50, 100],
                activation=['relu', 'tanh'],
                batch_size=[16],
                epochs=[25, 35])

mod_transforms = ['stft', 'mel', 'cqt', 'chroma']

grid_results = nn_grid_search(selected_model=mod1,
                              param_grid=mod_grid,
                              transforms=mod_transforms,
                              recording_filenames=recording_filenames,
                              mod_name="mod1")

grid_results.to_csv("data/cv_results/grid_results_nn_mod1.csv")

In [None]:
# 2 layers, batch norm
mod_grid = dict(lay1_units=[50, 100],
                lay2_units=[50, 100],
                activation=['relu', 'tanh'],
                batch_size=[16],
                epochs=[25, 35])

mod_transforms = ['stft', 'mel', 'cqt', 'chroma']

grid_results = nn_grid_search(selected_model=mod2,
                              param_grid=mod_grid,
                              transforms=mod_transforms,
                              recording_filenames=recording_filenames,
                              mod_name="mod2")

grid_results.to_csv("data/cv_results/grid_results_nn_mod2.csv")

In [None]:
# 1 layers, batch norm
mod_grid = dict(lay1_units=[20, 50, 100],
                activation=['relu', 'tanh'],
                batch_size=[16],
                epochs=[25, 35])

mod_transforms = ['stft', 'mel', 'cqt', 'chroma']

grid_results = nn_grid_search(selected_model=mod3,
                              param_grid=mod_grid,
                              transforms=mod_transforms,
                              recording_filenames=recording_filenames,
                              mod_name="mod3")

grid_results.to_csv("data/cv_results/grid_results_nn_mod3.csv")

In [None]:
# 3 layers, batch norm, dropout
mod_grid = dict(lay1_units=[50, 100],
                lay2_units=[50, 100],
                lay3_units=[50, 100],
                lay4_units=[50, 100],
                activation=['relu', 'tanh'],
                batch_size=[16],
                epochs=[25, 35])

mod_transforms = ['stft', 'mel', 'cqt', 'chroma']

grid_results = nn_grid_search(selected_model=mod4,
                              param_grid=mod_grid,
                              transforms=mod_transforms,
                              recording_filenames=recording_filenames,
                              mod_name="mod4")

grid_results.to_csv("data/cv_results/grid_results_nn_mod4.csv")

In [None]:
# 3 layers, (no batch norm or dropout)
mod_grid = dict(lay1_units=[50, 100],
                lay2_units=[50, 100],
                lay3_units=[50, 100],
                lay4_units=[50, 100],
                activation=['relu', 'tanh'],
                batch_size=[16],
                epochs=[25, 35])

mod_transforms = ['stft', 'mel', 'cqt', 'chroma']

grid_results = nn_grid_search(selected_model=mod5,
                              param_grid=mod_grid,
                              transforms=mod_transforms,
                              recording_filenames=recording_filenames,
                              mod_name="mod5")

grid_results.to_csv("data/cv_results/grid_results_nn_mod5.csv")

In [None]:
# 4 layers, batch norm
mod_grid = dict(lay1_units=[50, 100],
                lay2_units=[50, 100],
                lay3_units=[50, 100],
                lay4_units=[50, 100],
                activation=['relu', 'tanh'],
                batch_size=[16],
                epochs=[25, 35])

mod_transforms = ['stft', 'mel', 'cqt', 'chroma']

grid_results = nn_grid_search(selected_model=mod6,
                              param_grid=mod_grid,
                              transforms=mod_transforms,
                              recording_filenames=recording_filenames,
                              mod_name="mod6")

grid_results.to_csv("data/cv_results/grid_results_nn_mod6.csv")

In [None]:
# 5 layers, batch norm
mod_grid = dict(lay1_units=[50, 100],
                lay2_units=[50, 100],
                lay3_units=[50, 100],
                lay4_units=[50, 100],
                lay5_units=[50, 100],
                activation=['relu', 'tanh'],
                batch_size=[16],
                epochs=[25, 35])

mod_transforms = ['stft', 'mel', 'cqt', 'chroma']

grid_results = nn_grid_search(selected_model=mod7,
                              param_grid=mod_grid,
                              transforms=mod_transforms,
                              recording_filenames=recording_filenames,
                              mod_name="mod4")

grid_results.to_csv("data/cv_results/grid_results_nn_mod7.csv")

In [None]:
# transforms = ['chroma']
transforms = ['stft', 'mel', 'cqt', 'chroma']

results_param_df = []
for transform in tqdm(transforms):
    # DATA IMPORT
    raw_signals = []
    samp_rates = []
    x_train = []
    for filename in tqdm(recording_filenames):
        sound, sr = sf.read(filename)

        raw_signals.append(sound)
        samp_rates.append(sr)
        cleaned = get_clean_x_avg_all(sound, sr=sr, spectype=transform)
        x_train.append(cleaned.flatten())
    
    x_train = np.array(x_train)
    x_train = (x_train - x_train.min(0)) / x_train.ptp(0)
    
    # Get y categories
    y_train = [i.split('/')[2] for i in recording_filenames]
    le_chord_structure = LabelEncoder()
    le_chord_structure.fit(y_train)
    y_train_int = to_categorical(le_chord_structure.transform(y_train))

    # NN mod 1
    print("NN1")
    mod_nn = KerasClassifier(build_fn=selected_model,
                             verbose=0)

    # Define paramter grid for that model
    param_grid = dict(input_shape=[x_train[0].shape[0]],
                      output_shape=[y_train_int.shape[1]],
                      lay1_units=[50, 100],
                      lay2_units=[50, 100],
                      lay3_units=[50, 100],
                      activation=['relu', 'tanh'],
                      batch_size=[16],
                      epochs=[25, 35])
    
    mod1_nn_grid_search = GridSearchCV(estimator=mod_nn, param_grid=param_grid, cv=10, n_jobs=-1)
    print("NN1-a")    
    mod1_nn_grid_search.fit(np.array(x_train), y_train_int)
    print("NN1-b")

    # Convert parameter grid and cv results to dataframe
    mod1_nn_results_df = pd.DataFrame(mod1_nn_grid_search.cv_results_)[['params', 'mean_train_score', 'mean_test_score']]
    mod1_nn_results_df['transform'] = transform
    mod1_nn_results_df['mod'] = 'NN_1'
    results_param_df.append(mod1_nn_results_df)

In [None]:
results_param_df_all = pd.concat(results_param_df)
results_param_df_all.to_csv("data/cv_results/nn1_grid_output_chroma_run1.csv")

In [None]:
# run 1
# 3 layers, batch norm
mod1
param_grid = dict(input_shape=[x_train[0].shape[0]],
                  output_shape=[y_train_int.shape[1]],
                  lay1_units=[50, 100],
                  lay2_units=[50, 100],
                  lay3_units=[50, 100],
                  activation=['relu', 'tanh'],
                  batch_size=[16],
                  epochs=[25])


In [None]:
# run 2
mod1
param_grid = dict(input_shape=[x_train[0].shape[0]],
                  output_shape=[y_train_int.shape[1]],
                  lay1_units=[50, 100],
                  lay2_units=[50, 100],
                  lay3_units=[50, 100],
                  activation=['relu', 'tanh'],
                  batch_size=[16],
                  epochs=[25])


In [None]:
#    # Test if dropout adds anything


## 

In [None]:
results_param_df_all = pd.concat(results_param_df)
results_param_df_all.to_csv("data/nn6_grid_output_chroma_standardized_avg_all_frames.csv")