In [None]:
!pip install gdown

In [None]:
!gdown --fuzzy https://drive.google.com/file/d/1VSIMAR3-2fXTEy-QdY2d0M_-aC1aXfWp/view
!unzip Classification_data.zip

In [None]:
import os
import cv2 as cv
import random
import numpy as np
import xgboost as xgb
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [None]:
os.getcwd()
os.chdir("Classification_data")

In [None]:
# Get image names and image labels in the list
def get_image_names_and_labels(split):
  image_names_all = []
  image_labels_all = []

  for folder in os.listdir(split):
    image_labels = []
    image_names = os.listdir(Path(split) / folder)
    image_labels.extend([folder] * len(image_names))
    image_names_all.extend(image_names)
    image_labels_all.extend(image_labels) 
  return image_names_all, image_labels_all 

def get_images(image_names_all, 
               image_labels_all, 
               resized_img_width, 
               resized_img_height,
               split):
  images = [cv.resize(cv.imread(str(Path(split) / image_labels_all[i] / image_names_all[i]), cv.IMREAD_GRAYSCALE), 
                    (resized_img_width, resized_img_height)) for i in range(len(image_labels_all))]
  return images


In [None]:
image_names_train, image_labels_train = get_image_names_and_labels("train")
images_train = get_images(image_names_train, image_labels_train, 224, 224, "train")
image_names_test, image_labels_test = get_image_names_and_labels("test")
images_test = get_images(image_names_test, image_labels_test, 224, 224, "test")

In [None]:
block_size = (2, 2)
cell_size = (8, 8)
resize_shape = (224, 224)
window_size = resize_shape
block_stride = cell_size

hog_descriptor = cv.HOGDescriptor(window_size, 
                                  (block_size[0] * cell_size[0], block_size[1] * cell_size[1]), 
                                  block_stride, 
                                  cell_size, 
                                  9)

In [None]:
for i in range(len(images_train)):
    images_train[i] = hog_descriptor.compute(images_train[i])

for i in range(len(images_test)):
    images_test[i] = hog_descriptor.compute(images_test[i])

In [None]:
labels_set = {label:index for index, label in enumerate(set(image_labels_train))}

In [None]:
image_labels_train = [labels_set[image_labels_train[i]] for i in range(len(image_labels_train))]
image_labels_test = [labels_set[image_labels_test[i]] for i in range(len(image_labels_test))]

In [None]:
# Convert lists to numpy
X_train = np.array(images_train)
y_train = np.array(image_labels_train)
X_test = np.array(images_test)
y_test = np.array(image_labels_test)

In [None]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(14034, 26244)
(14034,)
(3000, 26244)
(3000,)


In [None]:
# Standardize the features
scaler = StandardScaler()
X_train_standardized = scaler.fit_transform(X_train)
X_test_standardized = scaler.transform(X_test)

In [None]:
# KNN Classifier
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train_standardized, y_train)
y_pred_knn = knn.predict(X_test_standardized)

In [None]:
# Linear Classifier
linear_classifier = LogisticRegression(max_iter=1_000)
linear_classifier.fit(X_train_standardized, y_train )
y_pred_linear = linear_classifier.predict(X_test_standardized)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [None]:
def calc_metrics(y_test, y_pred):
  accuracy = accuracy_score(y_test, y_pred)
  precision = precision_score(y_test, y_pred, average="weighted")
  recall = recall_score(y_test, y_pred, average="weighted")
  f1 = f1_score(y_test, y_pred, average="weighted")
  return accuracy, precision, recall, f1

In [None]:
# Metrics KNN
accuracy_knn, precision_knn, recall_knn, f1_knn = calc_metrics(y_test, y_pred_knn)
# Metrics Linear
accuracy_linear, precision_linear, recall_linear, f1_linear = calc_metrics(y_test, y_pred_linear)

In [None]:
# KNN metrics
print(accuracy_knn, precision_knn, recall_knn, f1_knn)
# Linear metrics
print(accuracy_linear, precision_linear, recall_linear, f1_linear)

0.36933333333333335 0.5896920123022746 0.36933333333333335 0.25652627531588273
0.6696666666666666 0.6702342648525066 0.6696666666666666 0.6688538438297611
