# Imports

In [1]:
# imports
import os
import shutil
import cv2
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
# Logistic Regression
from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_curve, auc

# Correlate between images and labels

In [7]:
# Paths to the original folders
bike_folder = os.path.join('Car-Bike-Dataset', 'Bike')
car_folder = os.path.join('Car-Bike-Dataset', 'Car')
# Get the list of images in each folder
bike_images = os.listdir(bike_folder)
car_images = os.listdir(car_folder)

# Create labels based on Folder name - Bike is 0 and car is 1
def read_images_and_labels(path, label):
    images = []
    labels = []
    for image in os.listdir(path):
        img = cv2.imread(os.path.join(path, image))
        img = cv2.resize(img, (300, 200))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        
        images.append(img)
        labels.append(label)  # Add the label for this image

    images = np.array(images)
    labels = np.array(labels)
    return images, labels

# Example to read images and labels for Bike and Car
bike_images, bike_labels = read_images_and_labels('Car-Bike-Dataset/Bike', 0)
car_images, car_labels = read_images_and_labels('Car-Bike-Dataset/Car', 1)

# Concatenate images and labels
images = np.concatenate((car_images, bike_images))
labels = np.concatenate((car_labels, bike_labels))


# Split data into train and test

Train and Test division

In [8]:
# Split into train and test (80% train, 20% test)
train_images, test_images, train_labels, test_labels = train_test_split(images, labels, test_size=0.2, random_state=42)

print("Train labels distribution:", np.bincount(train_labels))
print("Test labels distribution:", np.bincount(test_labels))

Train labels distribution: [1622 1578]
Test labels distribution: [378 422]


Train, Validation and Test devision

In [9]:
# divide to train, validation and test
# Split into train (80%), temp (20%)
train_images_val, temp_images, train_labels_val, temp_labels = train_test_split(images, labels, test_size=0.2, random_state=42)

# Split temp into validation (50% of temp) and test (50% of temp)
val_images, test_images_val, val_labels, test_labels_val = train_test_split(temp_images, temp_labels, test_size=0.5, random_state=42)

print("Train labels in val distribution:", np.bincount(train_labels_val))
print("Val labels distribution:", np.bincount(val_labels))
print("Test labels in val distribution:", np.bincount(test_labels_val))

Train labels in val distribution: [1622 1578]
Val labels distribution: [191 209]
Test labels in val distribution: [187 213]


# Reshape images to match Logistic Regression format

Train and test

In [10]:
# Reshape images for Logistic Regression
# Flatten 2D images (200x300) into 1D vectors for logistic regression
train_images_flat = train_images.reshape(train_images.shape[0], -1)
test_images_flat = test_images.reshape(test_images.shape[0], -1)

Train, validation and test

In [11]:
train_images_vflat = train_images_val.reshape(train_images_val.shape[0], -1)
val_images_flat = val_images.reshape(val_images.shape[0], -1)
test_images_vflat = test_images_val.reshape(test_images_val.shape[0], -1)

# Normalize data

Train and test

In [12]:
# Standardize features
scaler = StandardScaler()
train_images_scaled = scaler.fit_transform(train_images_flat)
test_images_scaled = scaler.transform(test_images_flat)

Train, validation and test

In [13]:
# Standardize features
scaler = StandardScaler()
train_images_vscaled = scaler.fit_transform(train_images_vflat)
val_images_scaled = scaler.transform(val_images_flat)
test_images_vscaled = scaler.transform(test_images_vflat)

# PCA - reduce dimensions

Train and test

In [14]:
# Apply PCA to reduce dimensionality
pca = PCA(n_components=50)  # Choose the number of components
train_images_pca = pca.fit_transform(train_images_scaled)
test_images_pca = pca.transform(test_images_scaled)

Train, validation and test

In [15]:
# Apply PCA to reduce dimensionality
pca = PCA(n_components=50)  # Choose the number of components
train_images_vpca = pca.fit_transform(train_images_vscaled)
val_images_pca = pca.transform(val_images_scaled)
test_images_vpca = pca.transform(test_images_vscaled)

# Train model

Train and test

In [16]:
# train scikit learn model 
model = LogisticRegression(max_iter=500)
model.fit(train_images_pca,train_labels)

Train, validation and test

In [17]:
# train scikit learn model 
model_val = LogisticRegression(max_iter=500)
model_val.fit(train_images_vpca,train_labels_val)

# Evaluation

Train and test

In [18]:
# Evaluate the model
y_pred = model.predict(test_images_pca)
accuracy = accuracy_score(test_labels, y_pred)
print("Accuracy: {:.2f}%".format(accuracy * 100))
# Classification report for further analysis
print("Classification Report (Test):\n", classification_report(test_labels, y_pred))

Accuracy: 74.25%
Classification Report (Test):
               precision    recall  f1-score   support

           0       0.73      0.73      0.73       378
           1       0.76      0.75      0.76       422

    accuracy                           0.74       800
   macro avg       0.74      0.74      0.74       800
weighted avg       0.74      0.74      0.74       800



Train, validation and test

In [19]:

# Evaluate on the validation set
val_pred = model_val.predict(val_images_pca)
val_accuracy = accuracy_score(val_labels, val_pred)
print("Validation Accuracy: {:.2f}%".format(val_accuracy * 100))

# Evaluate on the test set
test_pred = model_val.predict(test_images_vpca)
test_accuracy = accuracy_score(test_labels_val, test_pred)
print("Test Accuracy: {:.2f}%".format(test_accuracy * 100))

# Classification report for further analysis
print("Classification Report (Validation):\n", classification_report(val_labels, val_pred))
print("Classification Report (Test):\n", classification_report(test_labels_val, test_pred))
print("Confusion Matrix:\n", confusion_matrix(test_labels_val, test_pred))


Validation Accuracy: 73.25%
Test Accuracy: 75.25%
Classification Report (Validation):
               precision    recall  f1-score   support

           0       0.72      0.73      0.72       191
           1       0.75      0.74      0.74       209

    accuracy                           0.73       400
   macro avg       0.73      0.73      0.73       400
weighted avg       0.73      0.73      0.73       400

Classification Report (Test):
               precision    recall  f1-score   support

           0       0.73      0.74      0.74       187
           1       0.77      0.76      0.77       213

    accuracy                           0.75       400
   macro avg       0.75      0.75      0.75       400
weighted avg       0.75      0.75      0.75       400

Confusion Matrix:
 [[139  48]
 [ 51 162]]
