# Tenor saxophone

* Sample duration = 5s
* model = SVM
* target = TENOR

Distinguish between samples with tenor saxophone from those without tenor saxophone. Other brass and reed instruments excluded.

In [1]:
from IPython.display import Audio
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA 
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.model_selection import train_test_split, \
                                    RandomizedSearchCV, GridSearchCV
from sklearn.ensemble import BaggingClassifier
from scipy.stats import uniform

from pickles_to_pandas import pickles_to_pandas

In [2]:
df = pickles_to_pandas('./data/5s/labeled/features_r02/selected')

In [3]:
NUM_LABEL_COLS = 13
NUM_COMPONENTS = 32
TEST_SIZE = 0.2
RANDOM_SEED = 0

In [4]:
df_filtered = df[df['excl'] == '0']  # exclude records we want to exclude
df_filtered = df
df_filtered.shape

(1316, 10790)

In [5]:
# exclude records we want to exclude
df_filtered = df_filtered[df_filtered['alto'] == '0'] 
df_filtered = df_filtered[df_filtered['sop'] == '0']
df_filtered = df_filtered[df_filtered['bari'] == '0']
df_filtered = df_filtered[df_filtered['clrt'] == '0']
df_filtered = df_filtered[df_filtered['tora'] == '0']
df_filtered = df_filtered[df_filtered['othr'] == '0']
df_filtered = df_filtered[df_filtered['trmp'] == '0']
df_filtered = df_filtered[df_filtered['trmb'] == '0']
df_filtered = df_filtered[df_filtered['otrb'] == '0']

num_x_cols = df_filtered.shape[1] - NUM_LABEL_COLS - 1  
# ^ last bit to adjust for zero indexing
data = df_filtered.iloc[:, 1:num_x_cols].to_numpy() 
# ^ These are the features
scaler = StandardScaler()
scaler.fit(data)
data = scaler.transform(data)

target = df_filtered[['tenr']].to_numpy().ravel()  # << This is the label

print(data.shape)
print(target.shape)

(806, 10775)
(806,)


In [6]:
pca = PCA(n_components=NUM_COMPONENTS)

In [7]:
pca.fit(data)

PCA(n_components=32)

In [8]:
# print(pca.explained_variance_ratio_)

In [9]:
# print(pca.singular_values_)

In [10]:
d = pca.transform(data)

In [11]:
# d

In [12]:
x_train, x_test, y_train, y_test = \
     train_test_split(d, target, test_size=TEST_SIZE, random_state=RANDOM_SEED)

## Test plain vanilla SVM

In [13]:
clf = SVC()
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
print(clf.score(x_test, y_test))

0.845679012345679


In [14]:
c_matrix = confusion_matrix(y_test, y_pred)
idx = cols = [0, 1]
pd.DataFrame(c_matrix, index=idx, columns=cols)

Unnamed: 0,0,1
0,99,6
1,19,38


In [15]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.84      0.94      0.89       105
           1       0.86      0.67      0.75        57

    accuracy                           0.85       162
   macro avg       0.85      0.80      0.82       162
weighted avg       0.85      0.85      0.84       162



## Determine good value for C using random search

In [16]:
model_params = {
    'C': uniform(0.1, 4.0)
}

# RandomizedSearchCV(estimator, param_distributions, *, n_iter=10, 
#                    scoring=None, n_jobs=None, iid='deprecated', 
#                    refit=True, cv=None, verbose=0, 
#                    pre_dispatch='2*n_jobs', random_state=None, 
#                    error_score=nan, return_train_score=False)
svm_model = SVC()
clf = RandomizedSearchCV(svm_model, model_params, n_iter=10, 
                         cv=5)
model = clf.fit(x_train, y_train)
params = model.best_estimator_.get_params()

In [17]:
params

{'C': 3.6179237940301725,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': False,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

In [18]:
clf = SVC(C=params['C'])
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
print(clf.score(x_test, y_test))


0.8950617283950617


In [19]:
c_matrix = confusion_matrix(y_test, y_pred)
idx = cols = [0, 1]
pd.DataFrame(c_matrix, index=idx, columns=cols)

Unnamed: 0,0,1
0,97,8
1,9,48


In [20]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.92      0.92      0.92       105
           1       0.86      0.84      0.85        57

    accuracy                           0.90       162
   macro avg       0.89      0.88      0.88       162
weighted avg       0.89      0.90      0.89       162

