In [1]:
from platform import python_version
print(python_version())

3.6.5


In [2]:
from sklearn.svm import LinearSVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from scipy.misc import imread
from sklearn.externals import joblib
from sklearn.base import BaseEstimator, ClassifierMixin

import random as rand
import numpy as np 
import cv2
import glob
import time

import matplotlib.pyplot as plt
%matplotlib inline

from featuresourcer import FeatureSourcer

import warnings
warnings.filterwarnings('ignore')

In [18]:
class HOG_SVM(BaseEstimator, ClassifierMixin):
    def __init__(self):
        self.SVM = LinearSVC()
        self.scaler = StandardScaler()
        self.sourcer = None

    def set_params(self, **params):
        self.sourcer = FeatureSourcer(params)
        print("processing parameters", params)
        return self


    def _extract_features(self, imgs):
        features = []

        for img in imgs:
            features.append(self.sourcer.features(img))

        features = np.asarray(features)

        return features

    def fit(self, imgs, y=None):
        features = self._extract_features(imgs)

        self.scaler.fit(features)
        x = self.scaler.transform(features)

        self.SVM.fit(x, y)

        return self

    def predict(self, imgs, y=None):
        features = self._extract_features(imgs)
        x = self.scaler.transform(features)

        return self.SVM.predict(x)


In [19]:
img_size = (96, 96)

insect_imgs, non_insect_imgs = [], []
insect_paths = glob.glob('datasets/cropped/insect/**/*.png')
non_insect_paths = glob.glob('datasets/cropped/non_insect/*.png')

for path in insect_paths: insect_imgs.append(cv2.resize(cv2.imread(path), (img_size, img_size)))
for path in non_insect_paths: non_insect_imgs.append(cv2.resize(cv2.imread(path), (img_size, img_size)))

insect_imgs, non_insect_imgs = np.asarray(insect_imgs), np.asarray(non_insect_imgs)
total_insects, total_non_insects = insect_imgs.shape[0], non_insect_imgs.shape[0]

x = np.vstack((insect_imgs, non_insect_imgs))
y = np.hstack((np.ones(total_insects), np.zeros(total_non_insects)))

In [22]:
from sklearn.model_selection import GridSearchCV

tuned_params = {
  'color_model': ['hsv', 'yuv'],  # hsv, yuv
  'bounding_box_size': [96],             # 64, 96, 128
  'number_of_orientations': [6, 12],        # 6, 12
  'pixels_per_cell': [8, 16],               # 8, 16
  'cells_per_block': [1, 2, 4],                # 1, 2, 4
}
gs = GridSearchCV(HOG_SVM(), tuned_params, cv=3)

gs.fit(x, y)

processing parameters {'bounding_box_size': 96, 'cells_per_block': 1, 'color_model': 'hsv', 'number_of_orientations': 6, 'pixels_per_cell': 8}
processing parameters {'bounding_box_size': 96, 'cells_per_block': 1, 'color_model': 'hsv', 'number_of_orientations': 6, 'pixels_per_cell': 8}
processing parameters {'bounding_box_size': 96, 'cells_per_block': 1, 'color_model': 'hsv', 'number_of_orientations': 6, 'pixels_per_cell': 8}
processing parameters {'bounding_box_size': 96, 'cells_per_block': 1, 'color_model': 'hsv', 'number_of_orientations': 6, 'pixels_per_cell': 16}
processing parameters {'bounding_box_size': 96, 'cells_per_block': 1, 'color_model': 'hsv', 'number_of_orientations': 6, 'pixels_per_cell': 16}
processing parameters {'bounding_box_size': 96, 'cells_per_block': 1, 'color_model': 'hsv', 'number_of_orientations': 6, 'pixels_per_cell': 16}
processing parameters {'bounding_box_size': 96, 'cells_per_block': 1, 'color_model': 'hsv', 'number_of_orientations': 12, 'pixels_per_cell'

processing parameters {'bounding_box_size': 96, 'cells_per_block': 4, 'color_model': 'hsv', 'number_of_orientations': 12, 'pixels_per_cell': 16}
processing parameters {'bounding_box_size': 96, 'cells_per_block': 4, 'color_model': 'hsv', 'number_of_orientations': 12, 'pixels_per_cell': 16}
processing parameters {'bounding_box_size': 96, 'cells_per_block': 4, 'color_model': 'hsv', 'number_of_orientations': 12, 'pixels_per_cell': 16}
processing parameters {'bounding_box_size': 96, 'cells_per_block': 4, 'color_model': 'yuv', 'number_of_orientations': 6, 'pixels_per_cell': 8}
processing parameters {'bounding_box_size': 96, 'cells_per_block': 4, 'color_model': 'yuv', 'number_of_orientations': 6, 'pixels_per_cell': 8}
processing parameters {'bounding_box_size': 96, 'cells_per_block': 4, 'color_model': 'yuv', 'number_of_orientations': 6, 'pixels_per_cell': 8}
processing parameters {'bounding_box_size': 96, 'cells_per_block': 4, 'color_model': 'yuv', 'number_of_orientations': 6, 'pixels_per_cel

GridSearchCV(cv=3, error_score='raise-deprecating', estimator=HOG_SVM(),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'color_model': ['hsv', 'yuv'], 'bounding_box_size': [96], 'number_of_orientations': [6, 12], 'pixels_per_cell': [8, 16], 'cells_per_block': [1, 2, 4]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [23]:
gs.cv_results_ 

{'mean_fit_time': array([ 8.98433574,  2.31515598, 15.78239163,  5.80262804,  9.40904641,
         3.30963564, 21.60641257,  7.84455522, 11.06377196,  3.81900493,
        26.44073844,  4.66056212, 13.08859881,  2.73984806, 21.29920689,
         5.0903813 , 13.5316093 ,  4.61028409, 28.153711  ,  7.85627341,
        12.94740653,  3.28331431, 22.3935918 ,  6.46602567]),
 'mean_score_time': array([ 5.34040562,  1.20608107,  9.53252602,  2.66566825,  4.44600526,
         1.32905062, 10.56473867,  3.37838443,  4.8438894 ,  2.09428795,
        13.47860853,  2.38384851,  5.75717862,  1.31680759,  9.90129805,
         2.37143111,  6.23967902,  1.6528763 , 12.66163945,  3.04795702,
         6.26885343,  1.61805081, 10.17097624,  2.67359694]),
 'mean_test_score': array([0.63953488, 0.60465116, 0.70348837, 0.62790698, 0.63953488,
        0.60465116, 0.70348837, 0.60465116, 0.73255814, 0.73837209,
        0.73837209, 0.77325581, 0.69186047, 0.72674419, 0.75581395,
        0.73837209, 0.79651163, 0

In [24]:
gs.best_score_

0.7965116279069767

In [25]:
gs.best_params_

{'bounding_box_size': 96,
 'cells_per_block': 4,
 'color_model': 'hsv',
 'number_of_orientations': 6,
 'pixels_per_cell': 8}

In [26]:
print ("Saving models...")

joblib.dump(gs.best_estimator_.SVM, 'models/svc2.pkl')
joblib.dump(gs.best_estimator_.scaler, 'models/scaler2.pkl')

print("...Done")

Saving models...
...Done


In [40]:

from binaryclassifier import BinaryClassifier

sourcer_params = {
    'bounding_box_size': 96,
    'cells_per_block': 4,
    'color_model': 'hsv',
    'number_of_orientations': 6,
    'pixels_per_cell': 8
}

cls = BinaryClassifier(gs.best_estimator_.SVM, gs.best_estimator_.scaler)
src = FeatureSourcer(sourcer_params)

img = cv2.imread("datasets/full/other/Butterfly/original.jpeg?1532741436.jpg")
features = src.features(img)
f = gs.best_estimator_.scaler.transform([features.resize()])
gs.best_estimator_.SVM.predict(f)

ValueError: operands could not be broadcast together with shapes (1,6854400) (23328,) (1,6854400) 