# Saxophone

* Sample duration = 5s
* model = SVM
* target = all saxes

Distinguish between samples with saxophone from those without saxophone. Other brass and reed instruments excluded.

In [54]:
from IPython.display import Audio
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA, KernelPCA
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split, \
    RandomizedSearchCV, GridSearchCV
from sklearn.ensemble import BaggingClassifier
from scipy.stats import uniform

# PCA and train/test split is done in _all_saxes
from _all_saxes import df_filtered, data, d, target, x_train, x_test, y_train, y_test, \
     NUM_LABEL_COLS, NUM_COMPONENTS, TEST_SIZE, RANDOM_STATE

In [2]:
df_filtered.shape

(6036, 10791)

In [24]:
NUM_COMPONENTS

38

## Test plain vanilla SVM

In [3]:
clf = SVC()
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
print(clf.score(x_test, y_test))

0.8766556291390728


In [4]:
c_matrix = confusion_matrix(y_test, y_pred)
idx = cols = [0, 1]
pd.DataFrame(c_matrix, index=idx, columns=cols)

Unnamed: 0,0,1
0,417,87
1,62,642


In [5]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.87      0.83      0.85       504
           1       0.88      0.91      0.90       704

    accuracy                           0.88      1208
   macro avg       0.88      0.87      0.87      1208
weighted avg       0.88      0.88      0.88      1208



## Determine good value for C using random search

In [7]:
model_params = {
    'C': uniform(0.1, 5.0)
}

# RandomizedSearchCV(estimator, param_distributions, *, n_iter=10, 
#                    scoring=None, n_jobs=None, iid='deprecated', 
#                    refit=True, cv=None, verbose=0, 
#                    pre_dispatch='2*n_jobs', random_state=None, 
#                    error_score=nan, return_train_score=False)
svm_model = SVC()
clf = RandomizedSearchCV(svm_model, model_params, n_iter=10, 
                         cv=5)
model = clf.fit(x_train, y_train)
params = model.best_estimator_.get_params()

In [8]:
params

{'C': 4.510493445767971,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': False,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

In [9]:
clf = SVC(C=params['C'])
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
print(clf.score(x_test, y_test))


0.8932119205298014


In [10]:
c_matrix = confusion_matrix(y_test, y_pred)
idx = cols = [0, 1]
pd.DataFrame(c_matrix, index=idx, columns=cols)

Unnamed: 0,0,1
0,434,70
1,59,645


In [11]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.88      0.86      0.87       504
           1       0.90      0.92      0.91       704

    accuracy                           0.89      1208
   macro avg       0.89      0.89      0.89      1208
weighted avg       0.89      0.89      0.89      1208



## Test ensemble approach with bagging classifier

#### Determine good parameters to use with bagging classifier. Start with grid search.

In [13]:
# Parameters
p = {'n_estimators': [100, 200, 300, 400, 500],
     'max_features': [NUM_COMPONENTS - 2, NUM_COMPONENTS - 3, 
                      NUM_COMPONENTS - 4, NUM_COMPONENTS - 5,
                      NUM_COMPONENTS - 6, NUM_COMPONENTS - 7]}

base_estimator = SVC(C=params['C'])
estimator = BaggingClassifier(base_estimator=base_estimator)
# Question: Should we include C here, or ... ? There's no reason it should differ
# for different values of n_estimators, but what about max_features? Might different
# values for C be best for different max_features? Maybe. Dunno. Maybe we should
# search for optimal C *after* we've found the best value for max_features?

# GridSearchCV(estimator, param_grid, *, scoring=None, n_jobs=None, 
#              iid='deprecated', refit=True, cv=None, verbose=0, 
#              pre_dispatch='2*n_jobs', error_score=nan, return_train_score=False)
clf = GridSearchCV(estimator, p, n_jobs=-1)
search = clf.fit(x_train, y_train)
search.best_params_

# {'max_features': 33, 'n_estimators': 300}

{'max_features': 33, 'n_estimators': 300}

In [18]:
# BaggingClassifier(base_estimator=None, n_estimators=10, *, 
#                   max_samples=1.0, max_features=1.0, 
#                   bootstrap=True, bootstrap_features=False, 
#                   oob_score=False, warm_start=False, 
#                   n_jobs=None, random_state=None, verbose=0) 
clf = BaggingClassifier(base_estimator=SVC(C=params['C']),
                        n_estimators=search.best_params_['n_estimators'], 
                        max_features=search.best_params_['max_features'],
                        random_state=RANDOM_STATE,
                        n_jobs=-1)
model = clf.fit(x_train, y_train)

In [21]:
y_pred = clf.predict(x_test)
print(clf.score(x_test, y_test))

0.8956953642384106


In [22]:
c_matrix = confusion_matrix(y_test, y_pred)
idx = cols = [0, 1]
pd.DataFrame(c_matrix, index=idx, columns=cols)

Unnamed: 0,0,1
0,429,75
1,51,653


In [23]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.89      0.85      0.87       504
           1       0.90      0.93      0.91       704

    accuracy                           0.90      1208
   macro avg       0.90      0.89      0.89      1208
weighted avg       0.90      0.90      0.90      1208



## Try weakening classifiers

In [None]:
pca = PCA(n_components=20)  # reduce from 38 to 20
pca.fit(data)
d2 = pca.transform(data)

In [29]:
x_train, x_test, y_train, y_test = \
     train_test_split(d2, target,
                      test_size=TEST_SIZE,
                      random_state=RANDOM_STATE)

In [30]:
# BaggingClassifier(base_estimator=None, n_estimators=10, *, 
#                   max_samples=1.0, max_features=1.0, 
#                   bootstrap=True, bootstrap_features=False, 
#                   oob_score=False, warm_start=False, 
#                   n_jobs=None, random_state=None, verbose=0) 
clf = BaggingClassifier(base_estimator=SVC(),
                        n_estimators=300, 
                        max_features=16,
                        random_state=RANDOM_STATE,
                        n_jobs=-1)
model = clf.fit(x_train, y_train)

In [31]:
y_pred = clf.predict(x_test)
print(clf.score(x_test, y_test))

0.8642384105960265


In [32]:
c_matrix = confusion_matrix(y_test, y_pred)
idx = cols = [0, 1]
pd.DataFrame(c_matrix, index=idx, columns=cols)

Unnamed: 0,0,1
0,402,102
1,62,642


In [33]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.87      0.80      0.83       504
           1       0.86      0.91      0.89       704

    accuracy                           0.86      1208
   macro avg       0.86      0.85      0.86      1208
weighted avg       0.86      0.86      0.86      1208



In [34]:
pca = PCA(n_components=16)  # reduce from 38 to 16
pca.fit(data)
d16 = pca.transform(data)

In [35]:
x_train, x_test, y_train, y_test = \
     train_test_split(d16, target,
                      test_size=TEST_SIZE,
                      random_state=RANDOM_STATE)

In [36]:
# BaggingClassifier(base_estimator=None, n_estimators=10, *, 
#                   max_samples=1.0, max_features=1.0, 
#                   bootstrap=True, bootstrap_features=False, 
#                   oob_score=False, warm_start=False, 
#                   n_jobs=None, random_state=None, verbose=0) 
clf = BaggingClassifier(base_estimator=SVC(),
                        n_estimators=300, 
                        max_features=12,
                        random_state=RANDOM_STATE,
                        n_jobs=-1)
model = clf.fit(x_train, y_train)

In [37]:
y_pred = clf.predict(x_test)
print(clf.score(x_test, y_test))

0.8534768211920529


In [38]:
c_matrix = confusion_matrix(y_test, y_pred)
idx = cols = [0, 1]
pd.DataFrame(c_matrix, index=idx, columns=cols)

Unnamed: 0,0,1
0,388,116
1,61,643


In [39]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.86      0.77      0.81       504
           1       0.85      0.91      0.88       704

    accuracy                           0.85      1208
   macro avg       0.86      0.84      0.85      1208
weighted avg       0.85      0.85      0.85      1208



**Interesting. As we reduce features, recall of positive class doesn't suffer much, but recall of negative class does. This is a little unexpected. What happens if we go the other direction and increase the number of PCA components?**

In [40]:
pca = PCA(n_components=48)  # increas from 38 to 48
pca.fit(data)
d48 = pca.transform(data)

In [41]:
x_train, x_test, y_train, y_test = \
     train_test_split(d48, target,
                      test_size=TEST_SIZE,
                      random_state=RANDOM_STATE)

In [42]:
# BaggingClassifier(base_estimator=None, n_estimators=10, *, 
#                   max_samples=1.0, max_features=1.0, 
#                   bootstrap=True, bootstrap_features=False, 
#                   oob_score=False, warm_start=False, 
#                   n_jobs=None, random_state=None, verbose=0) 
clf = BaggingClassifier(base_estimator=SVC(),
                        n_estimators=300, 
                        max_features=42,
                        random_state=RANDOM_STATE,
                        n_jobs=-1)
model = clf.fit(x_train, y_train)

In [43]:
y_pred = clf.predict(x_test)
print(clf.score(x_test, y_test))

0.875


In [44]:
c_matrix = confusion_matrix(y_test, y_pred)
idx = cols = [0, 1]
pd.DataFrame(c_matrix, index=idx, columns=cols)

Unnamed: 0,0,1
0,413,91
1,60,644


In [45]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.87      0.82      0.85       504
           1       0.88      0.91      0.90       704

    accuracy                           0.88      1208
   macro avg       0.87      0.87      0.87      1208
weighted avg       0.87      0.88      0.87      1208



### Start over

In [46]:
pca = PCA(0.90)  # 90% of variance
pca.fit(data)
print(pca.n_components_)
d90 = pca.transform(data)

x_train, x_test, y_train, y_test = \
     train_test_split(d90, target,
                      test_size=TEST_SIZE,
                      random_state=RANDOM_STATE)

In [47]:
clf = SVC()
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
print(clf.score(x_test, y_test))

0.8600993377483444


In [48]:
c_matrix = confusion_matrix(y_test, y_pred)
idx = cols = [0, 1]
pd.DataFrame(c_matrix, index=idx, columns=cols)

Unnamed: 0,0,1
0,406,98
1,71,633


In [49]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.85      0.81      0.83       504
           1       0.87      0.90      0.88       704

    accuracy                           0.86      1208
   macro avg       0.86      0.85      0.85      1208
weighted avg       0.86      0.86      0.86      1208



In [50]:
pca = PCA(0.85)  # 85% of variance
pca.fit(data)
print(pca.n_components_)
d85 = pca.transform(data)

x_train, x_test, y_train, y_test = \
     train_test_split(d85, target,
                      test_size=TEST_SIZE,
                      random_state=RANDOM_STATE)


In [51]:
clf = SVC()
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
print(clf.score(x_test, y_test))

0.8617549668874173


In [52]:
c_matrix = confusion_matrix(y_test, y_pred)
idx = cols = [0, 1]
pd.DataFrame(c_matrix, index=idx, columns=cols)

Unnamed: 0,0,1
0,410,94
1,73,631


In [53]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.85      0.81      0.83       504
           1       0.87      0.90      0.88       704

    accuracy                           0.86      1208
   macro avg       0.86      0.85      0.86      1208
weighted avg       0.86      0.86      0.86      1208



In [73]:
pca = PCA(0.75)  # 75% of variance
pca.fit(data)
print(pca.n_components_)
d75 = pca.transform(data)

x_train, x_test, y_train, y_test = \
     train_test_split(d75, target,
                      test_size=TEST_SIZE,
                      random_state=RANDOM_STATE)


In [74]:
clf = SVC()
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
print(clf.score(x_test, y_test))

0.8634105960264901


In [75]:
c_matrix = confusion_matrix(y_test, y_pred)
idx = cols = [0, 1]
pd.DataFrame(c_matrix, index=idx, columns=cols)

Unnamed: 0,0,1
0,409,95
1,70,634


In [76]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.85      0.81      0.83       504
           1       0.87      0.90      0.88       704

    accuracy                           0.86      1208
   macro avg       0.86      0.86      0.86      1208
weighted avg       0.86      0.86      0.86      1208



In [65]:
pca = PCA(0.50)  # 50% of variance
pca.fit(data)
print(pca.n_components_)
d50 = pca.transform(data)

x_train, x_test, y_train, y_test = \
     train_test_split(d50, target,
                      test_size=TEST_SIZE,
                      random_state=RANDOM_STATE)


In [66]:
clf = SVC()
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
print(clf.score(x_test, y_test))

0.8650662251655629


In [67]:
c_matrix = confusion_matrix(y_test, y_pred)
idx = cols = [0, 1]
pd.DataFrame(c_matrix, index=idx, columns=cols)

Unnamed: 0,0,1
0,407,97
1,66,638


In [68]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.86      0.81      0.83       504
           1       0.87      0.91      0.89       704

    accuracy                           0.87      1208
   macro avg       0.86      0.86      0.86      1208
weighted avg       0.86      0.87      0.86      1208



In [56]:
kpca = KernelPCA(kernel='rbf')  # Will prob. return around 6000 components
kpca.fit(data)
kd = kpca.transform(data)

x_train, x_test, y_train, y_test = \
     train_test_split(kd, target,
                      test_size=TEST_SIZE,
                      random_state=RANDOM_STATE)


In [57]:
clf = SVC()
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
print(clf.score(x_test, y_test))

0.8509933774834437


In [58]:
c_matrix = confusion_matrix(y_test, y_pred)
idx = cols = [0, 1]
pd.DataFrame(c_matrix, index=idx, columns=cols)

Unnamed: 0,0,1
0,390,114
1,66,638


In [59]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.86      0.77      0.81       504
           1       0.85      0.91      0.88       704

    accuracy                           0.85      1208
   macro avg       0.85      0.84      0.84      1208
weighted avg       0.85      0.85      0.85      1208

