In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

from skopt import BayesSearchCV
from skopt.space import Real, Integer

from rpscv import imgproc

In [2]:
X, y = imgproc.generateGrayFeatures()

Completed processing 2188 images


In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25,
                                                    stratify=y, random_state=42)

In [4]:
pipeline = Pipeline([('pca', PCA()),
                     ('clf', SVC(kernel='rbf'))])

In [5]:
opt_params = {'pca__n_components': Integer(20, 60),
              'clf__gamma': Real(.0001, .01, prior='log-uniform'),
              'clf__C': Real(1, 1000, prior='log-uniform')}

In [6]:
model = BayesSearchCV(pipeline,
                      search_spaces=opt_params,
                      n_iter=100,
                      cv=StratifiedKFold(n_splits=5),
                      scoring='f1_micro',
                      n_jobs=-1,
                      return_train_score=True)

In [7]:
%%time
model.fit(X_train, y_train)

CPU times: user 7min 47s, sys: 11min 35s, total: 19min 22s
Wall time: 29min 18s


BayesSearchCV(cv=StratifiedKFold(n_splits=5, random_state=None, shuffle=False),
       error_score='raise',
       estimator=Pipeline(memory=None,
     steps=[('pca', PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)), ('clf', SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False))]),
       fit_params=None, iid=True, n_iter=100, n_jobs=-1, n_points=1,
       optimizer_kwargs=None, pre_dispatch='2*n_jobs', random_state=None,
       refit=True, return_train_score=True, scoring='f1_micro',
       search_spaces={'pca__n_components': Integer(low=20, high=60), 'clf__gamma': Real(low=0.0001, high=0.01, prior='log-uniform', transform='identity'), 'clf__C': Real(low=1, high=1000, prior='log-uniform', transform='identity')},
       verbose=0)

In [8]:
model.best_params_

{'clf__C': 1000.0,
 'clf__gamma': 0.0005471228743514208,
 'pca__n_components': 60}

In [9]:
model.best_score_

0.9841560024375381

In [10]:
model.score(X_test, y_test)

0.9744058500914077

In [11]:
y_pred = model.predict(X_test)

In [12]:
from rpscv.utils import gestureTxt

In [13]:
print(classification_report(y_test, y_pred, target_names=gestureTxt.values()))

              precision    recall  f1-score   support

        rock       0.96      0.99      0.97       182
       paper       0.97      0.96      0.96       178
    scissors       1.00      0.97      0.99       187

   micro avg       0.97      0.97      0.97       547
   macro avg       0.97      0.97      0.97       547
weighted avg       0.97      0.97      0.97       547



In [14]:
conf_matrix = pd.DataFrame(confusion_matrix(y_test, y_pred))
conf_matrix.index = pd.MultiIndex.from_tuples([('true label', label) for label in gestureTxt.values()])
conf_matrix.columns = pd.MultiIndex.from_tuples([('predicted label', label) for label in gestureTxt.values()])
conf_matrix

Unnamed: 0_level_0,Unnamed: 1_level_0,predicted label,predicted label,predicted label
Unnamed: 0_level_1,Unnamed: 1_level_1,rock,paper,scissors
true label,rock,180,2,0
true label,paper,7,171,0
true label,scissors,1,4,182
