# Set Up

In [3]:
import os, cv2, time
from random import shuffle 
from tqdm import tqdm

import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score

import pickle


In [4]:
root_path = "../"
images_path = f"{root_path}/databases/intermittent"
categories = ["day", "night"]

In [6]:
Image = np.ndarray[np.uint8, np.dtype[np.uint8]]

def grayscale(image: Image) -> Image:
    return cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

def thresholding(image: Image) -> Image:
    return cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

def preprocess(image: Image) -> Image:
    image = grayscale(image)
    image = thresholding(image)
    image = cv2.resize(image, (225, 400))
    image = cv2.normalize(image, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
    image = image.flatten()

    return image

In [7]:
data = []
label = []

for category in categories:
    images_dir = os.listdir(f"{images_path}/{category}")
    shuffle(images_dir)
    for file in tqdm(images_dir[:750]):
        img = cv2.imread(f"{images_path}/{category}/{file}")
        img = preprocess(img)

        data.append(img)
        label.append(categories.index(category))
    
data = np.array(data)
label = np.array(label)

100%|██████████| 750/750 [00:28<00:00, 25.94it/s]
100%|██████████| 750/750 [00:36<00:00, 20.33it/s]


In [5]:
X_train, X_test, y_train, y_test = train_test_split(data, label, test_size= 0.2, shuffle= True, random_state= int(time.time()))

In [9]:
clf = SVC()

param_grid = {'C': [0.1, 1, 10], 'gamma': ["scale", "auto"], 'kernel': ["rbf", "sigmoid", "poly"]}
grid_search = GridSearchCV(clf, param_grid, refit= "accuracy", scoring= ["accuracy", "f1"], cv= 3, verbose= 3)

In [10]:
grid_search.fit(X_train, y_train)
best_estimator = grid_search.best_estimator_
best_estimator

Fitting 3 folds for each of 18 candidates, totalling 54 fits
[CV 1/3] END C=0.1, gamma=scale, kernel=rbf; accuracy: (test=0.993) f1: (test=0.992) total time=  35.7s
[CV 2/3] END C=0.1, gamma=scale, kernel=rbf; accuracy: (test=0.990) f1: (test=0.990) total time=  40.9s
[CV 3/3] END C=0.1, gamma=scale, kernel=rbf; accuracy: (test=0.993) f1: (test=0.992) total time=  41.4s
[CV 1/3] END C=0.1, gamma=scale, kernel=sigmoid; accuracy: (test=0.975) f1: (test=0.974) total time=  26.7s
[CV 2/3] END C=0.1, gamma=scale, kernel=sigmoid; accuracy: (test=0.990) f1: (test=0.990) total time=  30.0s
[CV 3/3] END C=0.1, gamma=scale, kernel=sigmoid; accuracy: (test=0.980) f1: (test=0.980) total time=  26.3s
[CV 1/3] END C=0.1, gamma=scale, kernel=poly; accuracy: (test=0.995) f1: (test=0.995) total time=  19.3s
[CV 2/3] END C=0.1, gamma=scale, kernel=poly; accuracy: (test=0.990) f1: (test=0.990) total time=  18.2s
[CV 3/3] END C=0.1, gamma=scale, kernel=poly; accuracy: (test=0.988) f1: (test=0.988) total t

In [11]:
grid_search.best_params_

{'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}

In [12]:
y_hat = best_estimator.predict(X_test)
score = (accuracy_score(y_test, y_hat), f1_score(y_test, y_hat))

score

(0.9933333333333333, 0.9935897435897436)

In [27]:
model_path = f"sklearn_{grid_search.best_estimator_}.pickle"
pickle.dump(best_estimator, open(model_path, "wb"))