In [2]:
from sklearn.svm import LinearSVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from scipy.misc import imread
from sklearn.externals import joblib
from sklearn.base import BaseEstimator, ClassifierMixin

import random as rand
import numpy as np 
import cv2
import glob
import time

import matplotlib.pyplot as plt
%matplotlib inline

from featuresourcer import FeatureSourcer

import warnings
warnings.filterwarnings('ignore')

In [10]:
class HOG_SVM(BaseEstimator, ClassifierMixin):
    def __init__(self):
        self.SVM = LinearSVC()
        self.scaler = StandardScaler()
        self.sourcer = None

    def set_params(self, **params):
        self.sourcer = FeatureSourcer(params)
        self.size = params["bounding_box_size"]
        print("processing parameters", params)
        return self


    def _extract_features(self, imgs):
        features = []

        for img in imgs:
            features.append(self.sourcer.features(img))

        features = np.asarray(features)

        return features
    
    def _get_imgs(self, paths):
        imgs = []
        for path in paths: imgs.append(cv2.resize(cv2.imread(path), (self.size, self.size)))
            
        return np.asarray(imgs)
        

    def fit(self, paths, y=None):
        imgs = self._get_imgs(paths)
        
        features = self._extract_features(imgs)

        self.scaler.fit(features)
        x = self.scaler.transform(features)

        self.SVM.fit(x, y)

        return self

    def predict(self, paths, y=None):
        imgs = self._get_imgs(paths)
        
        features = self._extract_features(imgs)
        x = self.scaler.transform(features)

        return self.SVM.predict(x)


In [11]:
img_size = 96

insect_imgs, non_insect_imgs = [], []
insect_paths = glob.glob('datasets/cropped/insect/**/*.png')
non_insect_paths = glob.glob('datasets/cropped/non_insect/*.png')

for path in insect_paths: insect_imgs.append(cv2.resize(cv2.imread(path), (img_size, img_size)))
for path in non_insect_paths: non_insect_imgs.append(cv2.resize(cv2.imread(path), (img_size, img_size)))

insect_imgs, non_insect_imgs = np.asarray(insect_imgs), np.asarray(non_insect_imgs)
total_insects, total_non_insects = insect_imgs.shape[0], non_insect_imgs.shape[0]

x = np.asarray(insect_paths + non_insect_paths)
#x = np.vstack((insect_imgs, non_insect_imgs))
y = np.hstack((np.ones(total_insects), np.zeros(total_non_insects)))

In [12]:
from sklearn.model_selection import GridSearchCV

tuned_params = {
  'color_model': ['hsv', 'yuv'],  # hsv, yuv
  'bounding_box_size': [32, 64, 96, 128],             # 64, 96, 128
  'number_of_orientations': [6, 12],        # 6, 12
  'pixels_per_cell': [8, 16],               # 8, 16
  'cells_per_block': [1, 2],                # 1, 2
}
gs = GridSearchCV(HOG_SVM(), tuned_params, cv=3)

gs.fit(x, y)

processing parameters {'bounding_box_size': 32, 'cells_per_block': 1, 'color_model': 'hsv', 'number_of_orientations': 6, 'pixels_per_cell': 8}
processing parameters {'bounding_box_size': 32, 'cells_per_block': 1, 'color_model': 'hsv', 'number_of_orientations': 6, 'pixels_per_cell': 8}
processing parameters {'bounding_box_size': 32, 'cells_per_block': 1, 'color_model': 'hsv', 'number_of_orientations': 6, 'pixels_per_cell': 8}
processing parameters {'bounding_box_size': 32, 'cells_per_block': 1, 'color_model': 'hsv', 'number_of_orientations': 6, 'pixels_per_cell': 16}
processing parameters {'bounding_box_size': 32, 'cells_per_block': 1, 'color_model': 'hsv', 'number_of_orientations': 6, 'pixels_per_cell': 16}
processing parameters {'bounding_box_size': 32, 'cells_per_block': 1, 'color_model': 'hsv', 'number_of_orientations': 6, 'pixels_per_cell': 16}
processing parameters {'bounding_box_size': 32, 'cells_per_block': 1, 'color_model': 'hsv', 'number_of_orientations': 12, 'pixels_per_cell'

processing parameters {'bounding_box_size': 64, 'cells_per_block': 1, 'color_model': 'hsv', 'number_of_orientations': 12, 'pixels_per_cell': 16}
processing parameters {'bounding_box_size': 64, 'cells_per_block': 1, 'color_model': 'hsv', 'number_of_orientations': 12, 'pixels_per_cell': 16}
processing parameters {'bounding_box_size': 64, 'cells_per_block': 1, 'color_model': 'hsv', 'number_of_orientations': 12, 'pixels_per_cell': 16}
processing parameters {'bounding_box_size': 64, 'cells_per_block': 1, 'color_model': 'yuv', 'number_of_orientations': 6, 'pixels_per_cell': 8}
processing parameters {'bounding_box_size': 64, 'cells_per_block': 1, 'color_model': 'yuv', 'number_of_orientations': 6, 'pixels_per_cell': 8}
processing parameters {'bounding_box_size': 64, 'cells_per_block': 1, 'color_model': 'yuv', 'number_of_orientations': 6, 'pixels_per_cell': 8}
processing parameters {'bounding_box_size': 64, 'cells_per_block': 1, 'color_model': 'yuv', 'number_of_orientations': 6, 'pixels_per_cel

processing parameters {'bounding_box_size': 96, 'cells_per_block': 1, 'color_model': 'yuv', 'number_of_orientations': 12, 'pixels_per_cell': 8}
processing parameters {'bounding_box_size': 96, 'cells_per_block': 1, 'color_model': 'yuv', 'number_of_orientations': 12, 'pixels_per_cell': 8}
processing parameters {'bounding_box_size': 96, 'cells_per_block': 1, 'color_model': 'yuv', 'number_of_orientations': 12, 'pixels_per_cell': 8}
processing parameters {'bounding_box_size': 96, 'cells_per_block': 1, 'color_model': 'yuv', 'number_of_orientations': 12, 'pixels_per_cell': 16}
processing parameters {'bounding_box_size': 96, 'cells_per_block': 1, 'color_model': 'yuv', 'number_of_orientations': 12, 'pixels_per_cell': 16}
processing parameters {'bounding_box_size': 96, 'cells_per_block': 1, 'color_model': 'yuv', 'number_of_orientations': 12, 'pixels_per_cell': 16}
processing parameters {'bounding_box_size': 96, 'cells_per_block': 2, 'color_model': 'hsv', 'number_of_orientations': 6, 'pixels_per_

processing parameters {'bounding_box_size': 128, 'cells_per_block': 2, 'color_model': 'hsv', 'number_of_orientations': 6, 'pixels_per_cell': 16}
processing parameters {'bounding_box_size': 128, 'cells_per_block': 2, 'color_model': 'hsv', 'number_of_orientations': 6, 'pixels_per_cell': 16}
processing parameters {'bounding_box_size': 128, 'cells_per_block': 2, 'color_model': 'hsv', 'number_of_orientations': 6, 'pixels_per_cell': 16}
processing parameters {'bounding_box_size': 128, 'cells_per_block': 2, 'color_model': 'hsv', 'number_of_orientations': 12, 'pixels_per_cell': 8}
processing parameters {'bounding_box_size': 128, 'cells_per_block': 2, 'color_model': 'hsv', 'number_of_orientations': 12, 'pixels_per_cell': 8}
processing parameters {'bounding_box_size': 128, 'cells_per_block': 2, 'color_model': 'hsv', 'number_of_orientations': 12, 'pixels_per_cell': 8}
processing parameters {'bounding_box_size': 128, 'cells_per_block': 2, 'color_model': 'hsv', 'number_of_orientations': 12, 'pixels

GridSearchCV(cv=3, error_score='raise-deprecating', estimator=HOG_SVM(),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'color_model': ['hsv', 'yuv'], 'bounding_box_size': [32, 64, 96, 128], 'number_of_orientations': [6, 12], 'pixels_per_cell': [8, 16], 'cells_per_block': [1, 2]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [13]:
gs.cv_results_ 

{'mean_fit_time': array([ 2.13987263,  1.50314673,  2.7014943 ,  1.67658202,  2.01806863,
         1.99652171,  2.89707613,  1.70979834,  2.0172379 ,  1.5434339 ,
         2.89348094,  1.67179465,  2.01287285,  1.50158731,  2.64491868,
         1.66046707,  4.18594837,  2.20909866,  6.80634697,  2.79158743,
         4.21002396,  2.14894851,  6.68596164,  2.80741835,  4.59167496,
         2.14231197,  6.77821398,  2.80075995,  4.2162927 ,  2.16907128,
         6.81323067,  2.78429929,  7.873679  ,  3.23940293, 13.53921604,
         4.69590195,  7.84746798,  3.25033816, 13.57653793,  4.68576169,
         7.9235456 ,  3.2001303 , 13.80134002,  4.65455707,  8.02464851,
         3.1840814 , 13.85761134,  4.68550102, 13.0546728 ,  4.66709137,
        26.14184316,  7.42387112, 12.90227707,  4.68237925, 23.55363433,
         7.31684796, 15.71068128,  6.53712495, 27.92767008,  8.976626  ,
        13.67446534,  5.67755389, 29.01815947,  8.32577968]),
 'std_fit_time': array([0.16066482, 0.1170362

In [14]:
gs.best_score_

0.7906976744186046

In [15]:
gs.best_params_

{'bounding_box_size': 128,
 'cells_per_block': 2,
 'color_model': 'hsv',
 'number_of_orientations': 12,
 'pixels_per_cell': 16}

In [16]:
print ("Saving models...")

joblib.dump(gs.best_estimator_.SVM, 'models/svc2.pkl')
joblib.dump(gs.best_estimator_.scaler, 'models/scaler2.pkl')

print("...Done")

Saving models...
...Done
