In [54]:
from pathlib import Path
import cv2
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler

In [55]:
train_dir = "data/images/training/"
test_dir = "data/images/test/"

In [56]:
def load_dataset(img_dir):
    p = Path(img_dir)
    dirs = p.glob('*')
    
    img_list = []
    
    for dir in dirs:
        label = dir.name
        for file in dir.glob('*.jpg'):
            img = cv2.imread(file)
            
            if img is not None:
                img_list.append((img, label))
    return img_list

train_img = load_dataset(train_dir)
test_img = load_dataset(test_dir)

In [57]:
def standarized_input(image, size=(1100, 600)):
    std_img = cv2.resize(image, size)
    return std_img

def preprocess(img_list):
    std_img_list = []
    for item in img_list:
        image = item[0]
        label = item[1]
        
        std_img = standarized_input(image)
        img_label = 1 if label == 'day' else 0
        std_img_list.append([std_img, img_label])
    return std_img_list

train_std_img_list = preprocess(train_img)
test_std_img_list = preprocess(test_img)


In [58]:
train_labels = [item[1] for item in train_std_img_list]
print(f'Training label counts: {np.bincount(train_labels)}') 

Training label counts: [120 120]


In [59]:
def calc_histogram(image):
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    
    hist_h = cv2.calcHist([hsv_image], [0], None, [16], [0, 180])
    hist_s = cv2.calcHist([hsv_image], [1], None, [16], [0, 256])
    hist_v = cv2.calcHist([hsv_image], [2], None, [16], [0, 256])
    
    hist_h = cv2.normalize(hist_h, hist_h).flatten()
    hist_s = cv2.normalize(hist_s, hist_s).flatten()
    hist_v = cv2.normalize(hist_v, hist_v).flatten()

    hist_features = np.concatenate([hist_h, hist_s, hist_v])
    return hist_features

In [60]:
def extract_histogram_features(img_list):
    features = []
    labels = []
    for item in img_list:
        hist = calc_histogram(item[0])
        features.append(hist)
        labels.append(item[1])
    return np.array(features), np.array(labels)

In [61]:
X_train, y_train = extract_histogram_features(train_std_img_list)
X_test, y_test = extract_histogram_features(test_std_img_list)

In [62]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [63]:
param_grid = {
    'C': [0.001, 0.01],
    'gamma': [0.01, 0.1], # adding gamma for search the best paramater and flexibilty in RBF kernel
    'kernel': ['linear', 'rbf']
}

svc = SVC()

grid_search = GridSearchCV(svc, param_grid, cv=5, verbose=1, n_jobs=-1)
grid_search.fit(X_train, y_train)
print(f'Best parameters found: {grid_search.best_params_}')

Fitting 5 folds for each of 8 candidates, totalling 40 fits
Best parameters found: {'C': 0.01, 'gamma': 0.01, 'kernel': 'linear'}


In [64]:
best_model = grid_search.best_estimator_

y_train_pred = best_model.predict(X_train)
y_test_pred = best_model.predict(X_test)

train_accuracy = accuracy_score(y_train, y_train_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)

print(f'Accuracy on train data: {train_accuracy * 100:.2f}%')
print(f'Accuracy on test data: {test_accuracy * 100:.2f}%')

Accuracy on train data: 99.58%
Accuracy on test data: 95.62%
