# Importing all the necessary libraries

In [2]:
import cv2
import os
import numpy as np
from skimage.color import rgb2gray
from skimage.feature import hog
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, accuracy_score

# Loading the dataset

In [3]:
def load_images_from_folder(folder):
    images = []
    labels = []
    for class_label, class_folder in enumerate(os.listdir(folder)):
        class_path = os.path.join(folder, class_folder)
        for filename in os.listdir(class_path):
            img = cv2.imread(os.path.join(class_path, filename))
            if img is not None:
                img = cv2.resize(img, (224, 224))  # Resize images
                images.append(img)
                labels.append(class_label)
    return np.array(images), np.array(labels)

data_folder = 'Binary_Data'
images, labels = load_images_from_folder(data_folder)
images = images / 255.0  # Normalize pixel values

# Feature Extraction

In [4]:
def extract_hog_features(images):
    hog_features = []
    for image in images:
        gray_image = rgb2gray(image)
        feature = hog(gray_image, pixels_per_cell=(8, 8), cells_per_block=(2, 2))
        hog_features.append(feature)
    return np.array(hog_features)

hog_features = extract_hog_features(images)

# Data Splitting

In [5]:
# Splitting the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(hog_features, labels, test_size=0., random_state=1, stratify=labels)

# Creating pipelines

In [6]:
# SVM Pipeline
svm_pipeline = Pipeline([
    ('scaler', StandardScaler()), 
    ('svm', SVC()) 
])

# KNN Pipeline
knn_pipeline = Pipeline([
    ('scaler', StandardScaler()),  
    ('knn', KNeighborsClassifier())  
])

# Random Forest Pipeline
rf_pipeline = Pipeline([
    ('scaler', StandardScaler()),  
    ('rf', RandomForestClassifier()) 
])

# Training and evaluating the models

In [7]:
pipelines = [svm_pipeline, knn_pipeline, rf_pipeline]
pipeline_names = ['SVM', 'KNN', 'Random Forest']

for name, pipeline in zip(pipeline_names, pipelines):
    print(f"Training {name} model...")
    pipeline.fit(X_train, y_train)  
    y_pred = pipeline.predict(X_test)  
    accuracy = accuracy_score(y_test, y_pred)  
    print(f"{name} Model Accuracy: {accuracy * 100:.2f}%")
    print(f"Classification Report for {name}:\n{classification_report(y_test, y_pred)}\n")

Training SVM model...
SVM Model Accuracy: 80.86%
Classification Report for SVM:
              precision    recall  f1-score   support

           0       0.80      0.82      0.81      1332
           1       0.81      0.80      0.81      1322

    accuracy                           0.81      2654
   macro avg       0.81      0.81      0.81      2654
weighted avg       0.81      0.81      0.81      2654


Training KNN model...
KNN Model Accuracy: 66.65%
Classification Report for KNN:
              precision    recall  f1-score   support

           0       0.61      0.96      0.74      1332
           1       0.91      0.37      0.52      1322

    accuracy                           0.67      2654
   macro avg       0.76      0.67      0.63      2654
weighted avg       0.76      0.67      0.63      2654


Training Random Forest model...
Random Forest Model Accuracy: 74.38%
Classification Report for Random Forest:
              precision    recall  f1-score   support

           0       

# Saving the models

In [None]:
# Saving each trained pipeline to a file
for name, pipeline in zip(pipeline_names, pipelines):
    filename = f"{name}_binary.pkl" 
    joblib.dump(pipeline, filename)  
    print(f"{name} model saved as {filename}")