In [42]:
%lsmagic

Available line magics:
%alias  %alias_magic  %autoawait  %autocall  %automagic  %autosave  %bookmark  %cat  %cd  %clear  %colors  %conda  %config  %connect_info  %cp  %debug  %dhist  %dirs  %doctest_mode  %ed  %edit  %env  %gui  %hist  %history  %killbgscripts  %ldir  %less  %lf  %lk  %ll  %load  %load_ext  %loadpy  %logoff  %logon  %logstart  %logstate  %logstop  %ls  %lsmagic  %lx  %macro  %magic  %man  %matplotlib  %mkdir  %more  %mv  %notebook  %page  %pastebin  %pdb  %pdef  %pdoc  %pfile  %pinfo  %pinfo2  %pip  %popd  %pprint  %precision  %prun  %psearch  %psource  %pushd  %pwd  %pycat  %pylab  %qtconsole  %quickref  %recall  %rehashx  %reload_ext  %rep  %rerun  %reset  %reset_selective  %rm  %rmdir  %run  %save  %sc  %set_env  %store  %sx  %system  %tb  %time  %timeit  %unalias  %unload_ext  %who  %who_ls  %whos  %xdel  %xmode

Available cell magics:
%%!  %%HTML  %%SVG  %%bash  %%capture  %%debug  %%file  %%html  %%javascript  %%js  %%latex  %%markdown  %%perl  %%prun  %%pypy  %%

## Data Preparation

In [2]:
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import FunctionTransformer, RobustScaler, StandardScaler
from sklearn.pipeline import make_pipeline, FeatureUnion, Pipeline
import numpy
import pandas
import tensorflow as tf

In [82]:
features = pandas.read_csv('data/cleaned_features.csv', index_col=0)

In [83]:
y = features.loc[:,'genre_label']
X = features.loc[:, features.columns.difference(['genre_label'])]

In [84]:
features_all = features.columns.values
features_with_outliers = ['mean_beats', 'mean_spec_bw', 'mean_spec_centroid', 'mean_spec_rolloff',
                          'median_beats','median_spec_bw', 'median_spec_centroid', 
                          'median_spec_rolloff', 'std_beats', 'std_mfcc_1', 'var_spec_centroid',
                          'var_spec_rolloff'
                         ]
features_without_outliers = features.loc[:, features.columns.difference(features_with_outliers)].columns.values

In [85]:
class AddColumnNames(BaseEstimator, TransformerMixin):
    def __init__(self, columns):
        self.columns = columns

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        return pd.DataFrame(data=X, columns=self.columns)

class ColumnSelector(BaseEstimator, TransformerMixin):
    def __init__(self, columns):
        self.columns = columns

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        assert isinstance(X, pandas.DataFrame)

        try:
            return X[self.columns]
        except KeyError:
            cols_error = list(set(self.columns) - set(X.columns))
            raise KeyError("The DataFrame does not include the columns: %s" % cols_error)

In [99]:
def create_preprocessing_pipeline(features_all, features_with_outliers,
                    features_without_outliers):
    return make_pipeline(
        AddColumnNames(columns=features_all),
        FeatureUnion(transformer_list=[
            ('features_with_outliers', make_pipeline(
                ColumnSelector(columns=features_with_outliers),
                FunctionTransformer(numpy.log),
                RobustScaler()
            )),
            ('features_without_outliers', make_pipeline(
                ColumnSelector(columns=features_without_outliers),
                StandardScaler()
            )),
        ])
    )

In [97]:
0 in numpy.log(X[features_with_outliers]).isna().values

  """Entry point for launching an IPython kernel.


True

In [98]:
features = create_preprocessing_pipeline(features_all, 
                                         features_with_outliers, 
                                         features_without_outliers
                                        ).fit_transform(X)
features

ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

In [49]:
wavelet_features = features.filter(regex=(r'.+_db[458]{1}_.+'))
# wavelet_features.loc[:,'genre_label'] = features['genre_label']
wavelet_features

Unnamed: 0,mean_db4_cA4,mean_db4_cD1,mean_db4_cD2,mean_db4_cD3,mean_db4_cD4,mean_db5_cA4,mean_db5_cD1,mean_db5_cD2,mean_db5_cD3,mean_db5_cD4,...,var_db5_cD3,var_db5_cD4,var_db8_cA7,var_db8_cD1,var_db8_cD2,var_db8_cD3,var_db8_cD4,var_db8_cD5,var_db8_cD6,var_db8_cD7
0,-0.070889,2.706283e-09,-1.245192e-04,-0.000069,0.001073,-0.070887,-5.621066e-08,0.000056,0.000162,0.000837,...,0.019942,0.046657,0.274625,0.001701,0.007485,0.020088,0.046672,0.081224,0.155366,0.330626
1,-0.000740,8.956381e-06,-9.212386e-05,0.000984,0.000213,-0.000663,8.905696e-06,0.000155,-0.000132,0.000948,...,0.049648,0.035356,5.768055,0.005669,0.021515,0.049319,0.035902,0.047964,0.155877,2.908547
2,-0.011143,-1.990486e-09,-7.116798e-06,0.000019,0.000046,-0.011147,-2.517544e-08,0.000016,0.000054,-0.000327,...,0.001726,0.014415,0.197478,0.000111,0.000377,0.001819,0.014656,0.060417,0.145343,0.366973
3,-0.002008,3.508618e-05,-2.028877e-05,-0.001993,-0.002459,-0.002042,3.514088e-05,-0.000050,0.002159,-0.000566,...,0.065848,0.126208,3.323095,0.003317,0.017679,0.065951,0.127043,0.417846,0.342882,2.281345
4,-0.001342,3.828920e-05,-4.344582e-05,0.000076,-0.000319,-0.001298,3.830900e-05,0.000045,0.000180,-0.000469,...,0.016205,0.039046,4.279692,0.001725,0.004758,0.016258,0.038775,0.067460,0.173948,1.401585
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3294,0.000255,-3.682523e-07,1.021450e-05,-0.000065,0.000057,0.000255,-3.695653e-07,-0.000011,-0.000012,0.000091,...,0.002403,0.006349,0.743181,0.000051,0.000517,0.002568,0.006101,0.018049,0.031775,0.095214
3295,0.000069,-3.853609e-07,3.565250e-05,-0.000009,-0.000013,0.000071,-3.862376e-07,-0.000040,0.000019,-0.000001,...,0.001577,0.005256,0.422114,0.000782,0.001361,0.001597,0.005355,0.027341,0.171636,0.263704
3296,0.000784,-1.326012e-06,6.062675e-07,-0.000024,-0.000014,0.000801,-1.327916e-06,0.000006,-0.000070,0.000009,...,0.001071,0.014495,2.386954,0.000049,0.000448,0.001113,0.015577,0.112773,0.093115,0.154832
3297,-0.000206,-3.779051e-07,2.349206e-05,-0.000132,0.000083,-0.000215,-3.830803e-07,-0.000030,0.000085,-0.000310,...,0.004083,0.014905,0.605454,0.000114,0.001510,0.004065,0.016273,0.122244,0.221618,0.446114


In [57]:
wavelet_feature_labels = wavelet_features.columns.values
timbral_rhythmic_features = features.loc[:, features.columns.difference(
    numpy.append(wavelet_feature_labels, 'genre_label'))]
timbral_rhythmic_features

Unnamed: 0,lpc_1,lpc_2,lpc_3,lpc_4,mean_beats,mean_beats_timestamp,mean_mfcc_1,mean_mfcc_10,mean_mfcc_11,mean_mfcc_12,...,var_spec_centroid,var_spec_contrast_1,var_spec_contrast_2,var_spec_contrast_3,var_spec_contrast_4,var_spec_contrast_5,var_spec_contrast_6,var_spec_contrast_7,var_spec_rolloff,var_zcr
0,1.0,-1.178271,0.647402,-0.352129,649.120690,15.072553,-80.892197,19.216515,-13.114045,11.498654,...,4.185230e+05,17.861575,21.121083,25.873245,13.038216,6.962883,7.988678,14.555389,1.564158e+06,0.002891
1,1.0,-1.086489,0.542333,-0.396561,617.465116,14.337512,-81.181618,20.319712,-2.532140,12.753151,...,9.392262e+05,29.258739,24.407069,23.032866,18.185633,12.981837,12.461155,107.433159,2.076888e+06,0.005135
2,1.0,-1.365355,0.560194,-0.172092,631.428571,14.661743,-202.881241,9.440219,-11.644846,7.070107,...,6.335591e+05,14.853086,20.595163,23.554291,21.775710,11.043003,11.478725,17.035474,3.405313e+06,0.002481
3,1.0,-1.240807,0.505005,-0.190461,619.227273,14.378429,-40.265057,16.794779,-3.511151,8.467217,...,4.953543e+05,25.631542,17.320487,26.199990,13.617381,7.566584,8.344646,142.327995,1.811020e+06,0.002283
4,1.0,-0.962111,0.201135,-0.206167,631.382979,14.660684,-140.261337,16.041454,-3.190496,11.844710,...,1.073302e+06,35.138048,34.537389,32.614328,30.967176,19.603025,10.813280,102.154238,4.354818e+06,0.006124
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3294,1.0,-1.878301,1.492194,-0.602686,1246.841270,14.475799,-285.722076,1.724312,3.334758,-1.919123,...,6.717626e+05,25.312044,16.848874,13.235958,17.875383,10.292653,7.975512,70.855880,3.494444e+06,0.001376
3295,1.0,-0.757695,0.322183,-0.531011,1269.770833,14.742011,-325.716064,-5.354380,-14.410619,-4.959855,...,9.118337e+06,36.373645,31.511738,26.118394,30.467805,48.009289,32.720426,98.557969,2.956056e+07,0.007543
3296,1.0,-1.862517,1.301521,-0.433298,1254.981818,14.570310,-264.064270,0.347791,5.412341,5.308122,...,9.370093e+05,47.880472,15.887094,19.648933,20.805391,14.361217,9.615398,48.662283,6.126479e+06,0.000358
3297,1.0,-1.882439,1.470879,-0.573606,1272.877551,14.778080,-178.481781,4.596525,6.592151,10.097037,...,1.390328e+06,32.331749,18.345547,18.593669,21.698056,14.748304,8.958216,19.165002,5.990872e+06,0.001409


## Predictive Modelling

In [None]:
?tf.keras.Sequential