In [20]:
import pickle
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report, \
    precision_recall_curve, roc_curve, roc_auc_score
from sklearn.model_selection import train_test_split, \
                                    RandomizedSearchCV, GridSearchCV
from sklearn.ensemble import BaggingClassifier
from scipy.stats import uniform

from _common import NUM_LABEL_COLS, RANDOM_SEED
from _all_saxes_xbrass_lmsc import TEST_SIZE, VALIDATION_SIZE, \
    BANDS, TIME_SLICES, master, lmss, data, target
# Note that records are read from pickle, selected, filtered,
# and scaled with `sklearn.preprocessing.StandardScaler` 
# in `_all_saxes_xbrass_lmsc.py`.

In [2]:
x_train, x_test, y_train, y_test = \
    train_test_split(data, target, \
        test_size=TEST_SIZE, \
        random_state=RANDOM_SEED)

# Default SVM

In [3]:
clf = SVC()
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
print(clf.score(x_test, y_test))

0.8419881305637982


In [15]:
c_matrix = confusion_matrix(y_test, y_pred)
idx = cols = [0, 1]
pd.DataFrame(c_matrix, index=idx, columns=cols)

Unnamed: 0,0,1
0,437,124
1,79,708


In [16]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.85      0.78      0.81       561
           1       0.85      0.90      0.87       787

    accuracy                           0.85      1348
   macro avg       0.85      0.84      0.84      1348
weighted avg       0.85      0.85      0.85      1348



# Random Search → C = 2.97

In [6]:
model_params = {
    'C': uniform(0.1, 10.0)
}

# RandomizedSearchCV(estimator, param_distributions, *, n_iter=10, 
#                    scoring=None, n_jobs=None, iid='deprecated', 
#                    refit=True, cv=None, verbose=0, 
#                    pre_dispatch='2*n_jobs', random_state=None, 
#                    error_score=nan, return_train_score=False)
svm_model = SVC()
clf = RandomizedSearchCV(svm_model, model_params, n_iter=10, 
                         cv=5)
model = clf.fit(x_train, y_train)
params = model.best_estimator_.get_params()

In [17]:
 params

{'C': 2.9675384496404225,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': False,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

In [13]:
clf = SVC(C=params['C'])
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
print(clf.score(x_test, y_test))

KeyboardInterrupt: 

In [18]:
c_matrix = confusion_matrix(y_test, y_pred)
idx = cols = [0, 1]
pd.DataFrame(c_matrix, index=idx, columns=cols)

Unnamed: 0,0,1
0,437,124
1,79,708


In [19]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.85      0.78      0.81       561
           1       0.85      0.90      0.87       787

    accuracy                           0.85      1348
   macro avg       0.85      0.84      0.84      1348
weighted avg       0.85      0.85      0.85      1348



In [None]:
fn = './models/sax_xbrass_svm_bagging_C_3733_MF_23.pkl'
with open(fn, 'wb') as fh:
    pickle.dump(clf, fh)

# Evaluation of Tuned SVM

In [12]:
probas_pred = clf.predict_proba(x_test)

AttributeError: predict_proba is not available when  probability=False

In [None]:
y_true = [1 if x == 1 else 0 for x in y_test]

In [None]:
y_pred_pos = probas_pred[:, [1]]
precision, recall, threshold = precision_recall_curve(y_true, y_pred_pos)

In [14]:
fig = plt.figure(figsize=(10, 7))    
plt.plot(recall, precision)
plt.xlabel('recall')
plt.ylabel('precision')
plt.title("Precision-Recall Curve")

plt.savefig('SVM_LMS_PRC.png')

NameError: name 'recall' is not defined

<Figure size 720x504 with 0 Axes>

In [None]:
fpr, tpr, threshold = roc_curve(y_true, y_pred_pos)
auc = roc_auc_score(y_true, y_pred_pos)

In [None]:
fig = plt.figure(figsize=(10, 7))    
plt.plot(fpr, tpr, 'b')
plt.plot([0, 1], [0, 1], ':', color='0.1')
plt.xlabel('FPR')
plt.ylabel('TPR')
plt.title("ROC Curve: AUC = {:.3f}".format(auc)) 

plt.savefig('SVM_LMS_ROC.png')