# Sample duration = 5s, model = RFC, target = TENOR

In [12]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectFromModel
from sklearn.metrics import accuracy_score
 
from pickles_to_pandas import pickles_to_pandas


In [2]:
df = pickles_to_pandas('./data/5s/labeled/features_r02')

In [3]:
df_filtered = df[df['excl'] == '0']  # exclude records we want to exclude
df_filtered.shape

(3483, 10789)

### Remove all other labeled instruments and scale

In [4]:
df_filtered = df_filtered[df_filtered['alto'] == '0']  # exclude records we want to exclude
df_filtered = df_filtered[df_filtered['sop'] == '0']
df_filtered = df_filtered[df_filtered['bari'] == '0']
df_filtered = df_filtered[df_filtered['clrt'] == '0']
df_filtered = df_filtered[df_filtered['tora'] == '0']
df_filtered = df_filtered[df_filtered['othr'] == '0']
df_filtered = df_filtered[df_filtered['trmp'] == '0']
df_filtered = df_filtered[df_filtered['trmb'] == '0']
df_filtered = df_filtered[df_filtered['otrb'] == '0']

num_x_cols = df_filtered.shape[1] - 12 - 1  # last bit to adjust for zero indexing
data = df_filtered.iloc[:, 1:num_x_cols].to_numpy() 
# ^ These are the features. Exclude the filename (start at 1)
scaler = StandardScaler()
scaler.fit(data)
data = scaler.transform(data)

target = df_filtered[['tenr']].to_numpy().ravel()  # << This is the label

print(data.shape)
print(target.shape)

(2260, 10775)
(2260,)


In [5]:
x_train, x_test, y_train, y_test = \
     train_test_split(data, target, test_size=0.25, random_state=0)

### Test on default RFC

In [6]:
clf = RandomForestClassifier(n_estimators=300)
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
print(clf.score(x_test, y_test))

0.8212389380530973


In [7]:
c_matrix = confusion_matrix(y_test, y_pred)
idx = cols = [0, 1]
pd.DataFrame(c_matrix, index=idx, columns=cols)


Unnamed: 0,0,1
0,290,46
1,55,174


In [8]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.84      0.86      0.85       336
           1       0.79      0.76      0.78       229

    accuracy                           0.82       565
   macro avg       0.82      0.81      0.81       565
weighted avg       0.82      0.82      0.82       565



In [13]:
# SelectFromModel(estimator=RandomForestClassifier(bootstrap=True, 
#                 class_weight=None, criterion='gini',
#                 max_depth=None, max_features='auto', 
#                 max_leaf_nodes=None, min_impurity_split=1e-07, 
#                 min_samples_leaf=1, min_samples_split=2, 
#                 min_weight_fraction_leaf=0.0, n_estimators=10000, 
#                 n_jobs=-1, oob_score=False, random_state=0,
#                 verbose=0, warm_start=False),
#                 prefit=False, threshold=0.15)
sfm = SelectFromModel(clf, threshold=0.0002)
sfm.fit(x_train, y_train)

SelectFromModel(estimator=RandomForestClassifier(n_estimators=300),
                threshold=0.0002)

In [14]:
feature_names = df.columns[1:]
for feature_list_index in sfm.get_support(indices=True):
    print(feature_names[feature_list_index])

zeros
mfcc_0_0
mfcc_0_1
mfcc_0_6
mfcc_0_7
mfcc_0_13
mfcc_0_14
mfcc_0_15
mfcc_0_21
mfcc_0_22
mfcc_0_24
mfcc_0_25
mfcc_0_26
mfcc_0_27
mfcc_0_28
mfcc_0_32
mfcc_0_37
mfcc_0_41
mfcc_0_43
mfcc_0_45
mfcc_0_46
mfcc_0_47
mfcc_0_48
mfcc_0_49
mfcc_0_53
mfcc_0_55
mfcc_0_62
mfcc_0_63
mfcc_0_65
mfcc_0_66
mfcc_0_68
mfcc_0_69
mfcc_0_70
mfcc_0_73
mfcc_0_75
mfcc_0_82
mfcc_0_84
mfcc_0_86
mfcc_0_87
mfcc_0_88
mfcc_0_89
mfcc_0_90
mfcc_0_91
mfcc_0_92
mfcc_0_93
mfcc_0_94
mfcc_0_95
mfcc_0_97
mfcc_0_98
mfcc_0_99
mfcc_0_100
mfcc_0_101
mfcc_0_102
mfcc_0_103
mfcc_0_107
mfcc_0_109
mfcc_0_111
mfcc_0_112
mfcc_0_113
mfcc_0_114
mfcc_0_116
mfcc_0_117
mfcc_0_118
mfcc_0_119
mfcc_0_120
mfcc_0_121
mfcc_0_122
mfcc_0_123
mfcc_0_125
mfcc_0_127
mfcc_0_128
mfcc_0_133
mfcc_0_136
mfcc_0_138
mfcc_0_142
mfcc_0_145
mfcc_0_149
mfcc_0_150
mfcc_0_151
mfcc_0_152
mfcc_0_153
mfcc_0_154
mfcc_0_155
mfcc_0_157
mfcc_0_161
mfcc_0_167
mfcc_0_170
mfcc_0_171
mfcc_0_173
mfcc_0_174
mfcc_0_175
mfcc_0_176
mfcc_0_177
mfcc_0_178
mfcc_0_179
mfcc_0_180
mf

In [11]:
# Looks like zero crossings, mfcc, spectral centroids, spectral rolloff
