# 学習用のnotebook

In [1]:
import numpy as np

import os
import pickle

from sklearn import  model_selection
from skimage import feature

%run path.py

# 画像の読み込み→縮小→HOG計算

In [2]:
OK_name_list = os.listdir(img_path + "OK")
NG_name_list = os.listdir(img_path + "NG")

In [17]:
# 時間がかかるので、結果をpickle化してコメントアウト
do_pickle_file_exist = True

if not do_pickle_file_exist:
    OK_image_data = np.vstack([ feature.hog( \
                                            rescale(skimage.io.imread(img_path + "OK/" + name ), 1.0 / 8.0, mode='constant', multichannel=True, anti_aliasing=True) \
                                           , block_norm='L1')
                               for name in OK_name_list] )

    OK_image_data.shape # => (187, 222345)

    NG_image_data = np.vstack([ feature.hog( \
                                            rescale(skimage.io.imread(img_path + "NG/" + name ), 1.0 / 8.0, mode='constant', multichannel=True, anti_aliasing=True) \
                                           , block_norm='L1')
                               for name in NG_name_list] )
    NG_image_data.shape # => (186, 222345)


    # OK_image_data, NG_image_data をpickle化して保存する

    with open('OK_image_data.pickle', 'wb') as f:
        pickle.dump(OK_image_data, f)

    with open('NG_image_data.pickle', 'wb') as f:
        pickle.dump(NG_image_data, f)

In [4]:
with open('OK_image_data.pickle', 'rb') as f:
    OK_image_data = pickle.load(f)

with open('NG_image_data.pickle', 'rb') as f:
    NG_image_data = pickle.load(f)

In [5]:
target = np.zeros(len(OK_name_list) + len(NG_name_list))
target[:len(OK_name_list)] = 1

# trainとtestを分離

In [6]:
X_train, X_test, y_train, y_test = \
model_selection.train_test_split(np.vstack((OK_image_data, NG_image_data)), target, random_state=42 )

In [7]:
# ナイーブベイズ
from sklearn.naive_bayes import  GaussianNB
from sklearn.model_selection import  cross_val_score

val = cross_val_score(GaussianNB(), X_train, y_train)
print(val)
print(val.mean())



[0.7311828  0.87096774 0.67741935]
0.7598566308243727


# 学習モデルの構築と学習、交差検証で評価

In [8]:
# 線形SVM グリッドサーチ
from sklearn.svm import LinearSVC
from sklearn.model_selection import GridSearchCV

grid = GridSearchCV(LinearSVC(), {'C': [1.0, 2.0, 4.0, 8.0]})
grid.fit(X_train, y_train)



GridSearchCV(cv='warn', error_score='raise-deprecating',
       estimator=LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'C': [1.0, 2.0, 4.0, 8.0]}, pre_dispatch='2*n_jobs',
       refit=True, return_train_score='warn', scoring=None, verbose=0)

In [9]:
print(grid.best_score_)

grid.best_score_ > val.mean()
# ナイーブベイズよりもSVMのほうが悪い……

0.7204301075268817


False

In [10]:
# 線形分離が困難なのではないかと予想し、RBFカーネルによるSVMに切り替える
from sklearn.svm import SVC

grid2 = GridSearchCV(SVC(), {'C': [1.0, 2.0, 4.0, 8.0]})
grid2.fit(X_train, y_train)



GridSearchCV(cv='warn', error_score='raise-deprecating',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'C': [1.0, 2.0, 4.0, 8.0]}, pre_dispatch='2*n_jobs',
       refit=True, return_train_score='warn', scoring=None, verbose=0)

In [11]:
print(grid2.best_score_)

grid2.best_score_ > val.mean()
# さらに悪い……

0.5161290322580645


False

# SVMの中で性能が最高のモデルを使い、学習用データ全体で改めて学習する

In [13]:
model = grid.best_estimator_
model.fit(X_train, y_train)

LinearSVC(C=2.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0)

# 検証用データおよびモデルをpickle化する

In [16]:
with open('X_test.pickle', 'wb') as f:
    pickle.dump(X_test, f)

with open('y_test.pickle', 'wb') as f:
    pickle.dump(y_test, f)

with open('model.pickle', 'wb') as f:
    pickle.dump(model, f)