# All Saxophones, Brass OK: SVM

* model = SVM
* features = LMSC
* target = ALL SAXES

Use SVM model to classify whether or not a saxophone is playing in a sample, samples including brass instruments are left in the dataset.

In [20]:
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from tensorflow.keras.metrics import AUC, Recall
from sklearn.model_selection import train_test_split, RandomizedSearchCV, \
    GridSearchCV
from sklearn.ensemble import BaggingClassifier
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import precision_recall_curve, \
    roc_curve, roc_auc_score, confusion_matrix, \
    classification_report
from scipy.stats import uniform

from _common import NUM_LABEL_COLS, RANDOM_SEED
from _all_saxes_brass_ok_lmsc import TEST_SIZE, VALIDATION_SIZE, \
    BANDS, TIME_SLICES, master, lmss, data, target

In [18]:
NUM_COMPONENTS = 24

In [2]:
pca = PCA(n_components=NUM_COMPONENTS)    # optimal n_components value determined in mt_cc_all_saxes_brass_ok_pca_lmsc

In [3]:
pca.fit(data)

PCA(n_components=24)

In [4]:
# print(pca.explained_variance_ratio_)

In [5]:
# print(pca.singular_values_)

In [6]:
d = pca.transform(data)

In [7]:
# d

In [8]:
x_train, x_test, y_train, y_test = \
     train_test_split(d, target,
                      test_size=TEST_SIZE,
                      random_state=RANDOM_SEED)

## Test plain vanilla SVM

In [9]:
clf = SVC()
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
print(clf.score(x_test, y_test))

0.801404853128991


In [10]:
c_matrix = confusion_matrix(y_test, y_pred)
idx = cols = [0, 1]
pd.DataFrame(c_matrix, index=idx, columns=cols)

Unnamed: 0,0,1
0,496,192
1,119,759


In [11]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.81      0.72      0.76       688
           1       0.80      0.86      0.83       878

    accuracy                           0.80      1566
   macro avg       0.80      0.79      0.80      1566
weighted avg       0.80      0.80      0.80      1566



## Determine good value for C using random search

In [12]:
model_params = {
    'C': uniform(0.1, 3.0)
}

svm_model = SVC()
clf = RandomizedSearchCV(svm_model, model_params, n_iter=10, 
                         cv=5)
model = clf.fit(x_train, y_train)
params = model.best_estimator_.get_params()

In [13]:
clf = SVC(C=params['C'])
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
print(clf.score(x_test, y_test))

0.8026819923371648


In [14]:
c_matrix = confusion_matrix(y_test, y_pred)
idx = cols = [0, 1]
pd.DataFrame(c_matrix, index=idx, columns=cols)

Unnamed: 0,0,1
0,501,187
1,122,756


In [15]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.80      0.73      0.76       688
           1       0.80      0.86      0.83       878

    accuracy                           0.80      1566
   macro avg       0.80      0.79      0.80      1566
weighted avg       0.80      0.80      0.80      1566



## Test ensemble approach with bagging classifier

In [21]:
# param_grid = {'max_features': [NUM_COMPONENTS - 2, NUM_COMPONENTS - 3, 
#                                NUM_COMPONENTS - 4, NUM_COMPONENTS - 5,
#                                NUM_COMPONENTS - 6, NUM_COMPONENTS - 7, 
#                                NUM_COMPONENTS - 8, NUM_COMPONENTS - 9,
#                                NUM_COMPONENTS - 10, NUM_COMPONENTS - 11,
#                                NUM_COMPONENTS - 12, NUM_COMPONENTS - 13]}

# base_estimator = SVC(C=params['C'])
# estimator = BaggingClassifier(base_estimator=base_estimator, n_estimators=100)

# clf = GridSearchCV(estimator, param_grid, n_jobs=-1)
# search = clf.fit(x_train, y_train)
# search.best_params_

# max_features = 16

{'max_features': 16}

In [30]:
 clf = BaggingClassifier(base_estimator=SVC(C=params['C']),
                        n_estimators=400, 
                        max_features=16,
                        random_state=42)
model = clf.fit(x_train, y_train)

In [31]:
y_pred = clf.predict(x_test)
print(clf.score(x_test, y_test))

0.7969348659003831


In [32]:
c_matrix = confusion_matrix(y_test, y_pred)
idx = cols = [0, 1]
pd.DataFrame(c_matrix, index=idx, columns=cols)

Unnamed: 0,0,1
0,480,208
1,110,768


In [33]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.81      0.70      0.75       688
           1       0.79      0.87      0.83       878

    accuracy                           0.80      1566
   macro avg       0.80      0.79      0.79      1566
weighted avg       0.80      0.80      0.79      1566

