## Importing required packages

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [4]:
from skimage.io import imread
from skimage.transform import resize

In [30]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

In [36]:
import os
import pickle

## Getting our data ready

In [6]:
input_dir = 'clf-data/clf-data'
categories = ['empty', 'not_empty']

In [7]:
data = []
labels = []

for cat_idx, i in enumerate(categories):
    for j in os.listdir(os.path.join(input_dir, i)):
        img_path = os.path.join(input_dir, i, j)
        img = imread(img_path)
        img = resize(img, (15, 15))
        data.append(img.flatten())
        labels.append(cat_idx)

In [8]:
data = np.asarray(data)
labels = np.asarray(labels)

In [9]:
data[:10]

array([[0.42900875, 0.43685188, 0.39371463, ..., 0.50419255, 0.50109368,
        0.47928869],
       [0.46212812, 0.46212812, 0.43075557, ..., 0.18931896, 0.19417849,
        0.16645613],
       [0.32647287, 0.31470816, 0.27957628, ..., 0.38530893, 0.38530893,
        0.35393638],
       ...,
       [0.2954764 , 0.30024033, 0.26966579, ..., 0.37068298, 0.35698376,
        0.3438886 ],
       [0.3636021 , 0.36752367, 0.34399425, ..., 0.47245331, 0.4724533 ,
        0.43344594],
       [0.64659241, 0.59946895, 0.54610501, ..., 0.31733955, 0.3248965 ,
        0.34503708]])

In [10]:
labels[:10]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [11]:
from sklearn.svm import SVC

In [12]:
from sklearn.model_selection import GridSearchCV

## Splitting our data into training and testing sets

In [13]:
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size = 0.2, shuffle = True)

In [14]:
x_train[:10], y_train[:10]

(array([[0.35524582, 0.35916739, 0.32795707, ..., 0.31038659, 0.36756852,
         0.35315423],
        [0.34889546, 0.34889546, 0.31752291, ..., 0.27576838, 0.35192719,
         0.34705764],
        [0.21015436, 0.20629407, 0.19715252, ..., 0.3224003 , 0.31139572,
         0.27614031],
        ...,
        [0.52157417, 0.52157417, 0.47465866, ..., 0.38389152, 0.33839018,
         0.30499161],
        [0.57496597, 0.57104441, 0.5515904 , ..., 0.38813604, 0.34421147,
         0.31535211],
        [0.58427414, 0.56114266, 0.50238298, ..., 0.64938404, 0.61759165,
         0.56433048]]),
 array([0, 0, 1, 0, 1, 0, 0, 0, 0, 1]))

In [15]:
x_test[:10], y_test[:10]

(array([[0.2068374 , 0.22644493, 0.20002908, ..., 0.76090288, 0.74605619,
         0.71081191],
        [0.39218669, 0.39610826, 0.36473571, ..., 0.66301517, 0.66301517,
         0.63164263],
        [0.34662352, 0.35054509, 0.32931097, ..., 0.38037053, 0.37280072,
         0.34898517],
        ...,
        [0.35115494, 0.35507337, 0.32370082, ..., 0.64558232, 0.64166006,
         0.62804928],
        [0.36439571, 0.33314219, 0.30958666, ..., 0.39368893, 0.36641322,
         0.32854571],
        [0.52300546, 0.52392488, 0.49074575, ..., 0.54657652, 0.53792828,
         0.49258181]]),
 array([1, 0, 0, 0, 1, 0, 0, 0, 1, 1]))

## Model building

In [16]:
model = SVC()
model

In [20]:
param_grid = [{
    'C': [1, 10, 100, 1000], 
    'gamma': [0.01, 0.001, 0.0001]
}]

In [18]:
model.fit(x_train, y_train)

In [22]:
grid_search = GridSearchCV(model, param_grid)

In [23]:
grid_search.fit(x_train, y_train)

In [24]:
best_esti = grid_search.best_estimator_
y_preds = best_esti.predict(x_test)

In [28]:
score = accuracy_score(y_preds, y_test)
print("The final score of the model is:", score*100)

The final score of the model is: 100.0


In [31]:
confusion_matrix(y_preds, y_test)

array([[612,   0],
       [  0, 606]], dtype=int64)

In [34]:
grid_search.best_score_

0.999794661190965

In [35]:
best_esti.score(x_test, y_test)

1.0

In [37]:
pickle.dump(best_esti, open('./model.p', 'wb'))