In [9]:
import cv2
import numpy as np
from sklearn.metrics import accuracy_score
import os

In [14]:
train_dir = 'data/images/training/'
test_dir = 'data/images/test/'

In [11]:
def compute_histogram(image):
    # Convert the image to HSV
    hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
    
    # Compute the histogram (16 bins for each channel)
    hist = cv2.calcHist([hsv], [0, 1, 2], None, [16, 16, 16], [0, 180, 0, 256, 0, 256])
    
    # Normalize the histogram
    cv2.normalize(hist, hist)
    
    # Flatten the histogram to create the feature vector
    return hist.flatten()

def standarized_input(image):
    # resize to w: 1100, h:600
    std_img = cv2.resize(image, (1100,600))

    return std_img

In [12]:
# Create function to load images and Labels
def load_histogram_std_img(directory):
    data = []
    labels = []
    
    for label in ['day', 'night']:
        path = os.path.join(directory, label)
        for filename in os.listdir(path):
            if filename.endswith('.jpg') or filename.endswith('.png'):
                img_path = os.path.join(path, filename)
                
                # Read and resize the image
                image = cv2.imread(img_path)
                resized_image = standarized_input(image)
                
                # Compute histogram features
                hist = compute_histogram(resized_image)
                
                # Append features and label
                data.append(hist)
                labels.append(0 if label == 'night' else 1)  # 0 for night, 1 for day
                
    return np.array(data), np.array(labels)

In [15]:
# Load Train and test data
X_train, y_train = load_histogram_std_img(train_dir)
X_test, y_test = load_histogram_std_img(test_dir)

# show the shape of the data
print(f'Training data shape: {X_train.shape}')
print(f'Test data shape: {X_test.shape}')

Training data shape: (240, 4096)
Test data shape: (160, 4096)


In [16]:
# Grid search for Hyperparameter tuning
from sklearn import svm
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV

# Create a pipeline with PCA and SVM
pipe = Pipeline(steps=[
    ('pca', PCA()),
    ('svm', svm.SVC())
])

# Define the parameter grid for GridSearch
param_grid = {
    'pca__n_components': [10, 20, 30, 50],  # Number of PCA components
    'svm__C': [0.1, 1, 10],                 # Regularization parameter for SVM
    'svm__kernel': ['linear', 'rbf']         # Kernel types
}

# Initialize GridSearchCV
grid_search = GridSearchCV(pipe, param_grid, cv=5, scoring='accuracy')

# Fit the model on training data
grid_search.fit(X_train, y_train)
print(grid_search.best_params_)
print(grid_search.best_score_)

{'pca__n_components': 50, 'svm__C': 10, 'svm__kernel': 'linear'}
0.9625


In [17]:
# Evaluate model

# Get the best model from GridSearch
best_model = grid_search.best_estimator_

# Predict on train data
train_predictions = best_model.predict(X_train)
# Predict on test data
test_predictions = best_model.predict(X_test)

# Evaluate accuracy
train_accuracy = accuracy_score(y_train, train_predictions)
print(f"Train Accuracy: {train_accuracy * 100:.4f}%")

test_accuracy = accuracy_score(y_test, test_predictions)
print(f"Test Accuracy: {test_accuracy * 100:.4f}%")

Train Accuracy: 100.0000%
Test Accuracy: 99.3750%
