# All Saxophones, Brass OK: SVM

* model = SVM
* target = ALL SAXES

Use SVM model to classify whether or not a saxophone is playing in a sample, samples including brass instruments are left in the dataset.

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from tensorflow.keras.metrics import AUC, Recall
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import BaggingClassifier
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import precision_recall_curve, \
    roc_curve, roc_auc_score, confusion_matrix, \
    classification_report
from scipy.stats import uniform

from _common import NUM_LABEL_COLS, RANDOM_SEED
from _all_saxes_brass_ok_lmsc import TEST_SIZE, VALIDATION_SIZE, \
    BANDS, TIME_SLICES, master, lmss, data, target

Reading lms_data_0.pkl...
(857, 9230)
Reading lms_data_1.pkl...
(867, 9230)
Appending 1...
Reading lms_data_2.pkl...
(873, 9230)
Appending 2...
Reading lms_data_3.pkl...
(882, 9230)
Appending 3...
Reading lms_data_4.pkl...
(874, 9230)
Appending 4...
Reading lms_data_5.pkl...
(859, 9230)
Appending 5...
Reading lms_data_6.pkl...
(877, 9230)
Appending 6...
Reading lms_data_7.pkl...
(867, 9230)
Appending 7...
Reading lms_data_8.pkl...
(871, 9230)
Appending 8...
Making labels...
(7827, 9231)
Selecting columns...
(7827, 9216)
Trying to make numpy...
Applying scaler...
Done


In [2]:
pca = PCA(n_components=24)    # optimal n_components value determined in mt_cc_all_saxes_brass_ok_pca_lmsc

In [3]:
pca.fit(data)

PCA(n_components=24)

In [4]:
# print(pca.explained_variance_ratio_)

In [5]:
# print(pca.singular_values_)

In [6]:
d = pca.transform(data)

In [7]:
# d

In [8]:
x_train, x_test, y_train, y_test = \
     train_test_split(data, target,
                      test_size=TEST_SIZE,
                      random_state=RANDOM_SEED)

## Test plain vanilla SVM

In [9]:
clf = SVC()
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
print(clf.score(x_test, y_test))

KeyboardInterrupt: 

In [None]:
c_matrix = confusion_matrix(y_test, y_pred)
idx = cols = [0, 1]
pd.DataFrame(c_matrix, index=idx, columns=cols)

In [None]:
print(classification_report(y_test, y_pred))

## Determine good value for C using random search

In [None]:
model_params = {
    'C': uniform(0.1, 3.0)
}

svm_model = SVC()
clf = RandomizedSearchCV(svm_model, model_params, n_iter=10, 
                         cv=5)
model = clf.fit(x_train, y_train)
params = model.best_estimator_.get_params()

In [None]:
clf = SVC(C=params['C'])
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
print(clf.score(x_test, y_test))


In [None]:
c_matrix = confusion_matrix(y_test, y_pred)
idx = cols = [0, 1]
pd.DataFrame(c_matrix, index=idx, columns=cols)

In [None]:
print(classification_report(y_test, y_pred))

## Test ensemble approach with bagging classifier

In [None]:
 clf = BaggingClassifier(base_estimator=SVC(C=params['C']),
                        n_estimators=400, 
                        max_features=NUM_COMPONENTS - 4,
                        random_state=42)
model = clf.fit(x_train, y_train)

In [None]:
y_pred = clf.predict(x_test)
print(clf.score(x_test, y_test))

In [None]:
c_matrix = confusion_matrix(y_test, y_pred)
idx = cols = [0, 1]
pd.DataFrame(c_matrix, index=idx, columns=cols)

In [None]:
print(classification_report(y_test, y_pred))