In [None]:
# ECSE 415 - Assignment 3:  Classifiers, Object Recognition
## Theo Ghanem 260972584
## 1 CIFIAR10 Classification using SVM and Random Forest (50 points)
### 1. Resize the train/test images to 64x64 and convert them to grayscale images. Compute HoG features with cells of 8x8 pixels, blocks of 4x4 cells, and 4 bins. This should generate a feature vector of size 1600 per image, which can be regarded as features for training classifiers.

import os
import numpy as np
import pickle
import cv2
import matplotlib.pyplot as plt
from more_itertools import locate
# from google.colab import drive

# SK-Learn
from skimage.feature import hog
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV

# drive.mount('/content/drive')
# 
filepath = '/Users/theog/Documents/CodingProjects/ECSE415_Computer_Vision/Assignments/A3/A3-W24-images/cifar-10-python/cifar-10-batches-py/'

def unpickle(file):
  with open(file, 'rb') as fo:
    dict = pickle.load(fo, encoding='bytes')
  return dict

def resize_and_convert_to_grayscale(images, size=(64, 64)):
  resized_images = []
  for image in images:
    # Resize image to 64x64
    resized_image = cv2.resize(image, size)
    # Convert image to grayscale
    grayscale_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2GRAY)
    resized_images.append(grayscale_image)
  return np.array(resized_images)

rawtestdata = unpickle(filepath + "test_batch")
X_test = rawtestdata[b'data'].reshape(-1, 3, 32, 32).transpose(0,2,3,1).astype("uint8")

rawdata = unpickle(filepath + "data_batch_1")
X_trn = rawdata[b'data'].reshape(-1, 3, 32, 32).transpose(0,2,3,1).astype("uint8")

# Convert test images to grayscale and resize to 64x64
trainImageData = resize_and_convert_to_grayscale(X_trn)
testImageData = resize_and_convert_to_grayscale(X_test)

#Compute HoG features with cells of 8x8 pixels, blocks of 4x4 cells, and 4 bins. This should generate a feature vector of size 1600 per image, which can be regarded as features for training classifiers.
def compute_hog_features(images, cell_size=(8, 8), block_size=(4, 4), bins=4):
  hog_features = []
  for image in images:
    # Compute HoG features
    feature_vector = hog(image, pixels_per_cell=cell_size, cells_per_block=block_size, orientations=bins)
    hog_features.append(feature_vector)
  return np.array(hog_features)

# Compute HoG features for the train and test images
trainFeatures = compute_hog_features(trainImageData)
testFeatures = compute_hog_features(testImageData)

#print trainFeatures length:
print(trainFeatures.shape)
print(testFeatures.shape)
### 2. Fit a non-linear SVM classifier with default hyperparameters on the features and the class features of the training images.
clf = svm.SVC(kernel='rbf')
clf.fit(trainFeatures, rawdata[b'labels'])
### 3. Predict labels of the test images by feeding the test features to the trained classifier and calculate classification accuracy.

predictedLabels = clf.predict(testFeatures)
accuracy = accuracy_score(rawtestdata[b'labels'], predictedLabels)
print("Classification accuracy: ", accuracy)
### 4. Tune values of hyperparameters ’gamma’ and ’C’ to observe the accuracy change and select the hyperparameters with the highest test accuracy. Display your fine-tuning process by listing all the test cases with their parameter and corresponding accuracy.

param_grid = {'C': [0.1, 1, 10, 100], 'gamma': [1, 0.1, 0.01, 0.001]}
grid = GridSearchCV(svm.SVC(), param_grid, refit=True, verbose=2)
grid.fit(trainFeatures, rawdata[b'labels'])
print(grid.best_params_)
print(grid.best_estimator_)
### 5. Fit a Random Forest(RF) classifier (set n_estimators=10, max_depth=5, and criterion=’entropy’) on the features and the class labels of the training images.

rf = RandomForestClassifier(n_estimators=10, max_depth=5, criterion='entropy')
rf.fit(trainFeatures, rawdata[b'labels'])
### 6. Predict labels of the test images by feeding the test features to the trained classifier and calculate classification accuracy.

predictedLabelsRF = rf.predict(testFeatures)
accuracyRF = accuracy_score(rawtestdata[b'labels'], predictedLabelsRF)
print("Classification accuracy: ", accuracyRF)
### 7. Compare the performance of SVM and RF. Experiment training both classifiers with a range of random states(different values for random_state). Evaluate the stability within the random state. List the strengths and weaknesses of each model.

svm_accuracies = []
rf_accuracies = []
for i in range(10):
  # Train SVM classifier
  clf = svm.SVC(kernel='rbf', random_state=i)
  clf.fit(trainFeatures, rawdata[b'labels'])
  predictedLabels = clf.predict(testFeatures)
  accuracy = accuracy_score(rawtestdata[b'labels'], predictedLabels)
  svm_accuracies.append(accuracy)

  # Train RF classifier
  rf = RandomForestClassifier(n_estimators=10, max_depth=5, criterion='entropy', random_state=i)
  rf.fit(trainFeatures, rawdata[b'labels'])
  predictedLabelsRF = rf.predict(testFeatures)
  accuracyRF = accuracy_score(rawtestdata[b'labels'], predictedLabelsRF)
  rf_accuracies.append(accuracyRF)
  
print("SVM accuracies: ", svm_accuracies)
print("RF accuracies: ", rf_accuracies)

# Strengths and weaknesses of each model
# SVM:
# Strengths:
# - Effective in high dimensional spaces
# - Memory efficient
# - Versatile
# Weaknesses:
# - Not suitable for large datasets
# - Not suitable for non-linear problems
# - Sensitive to overfitting
# RF:
# Strengths:
# - Effective in high dimensional spaces
# - Memory efficient
# - Versatile
# - Not sensitive to overfitting
# Weaknesses:
# - Not suitable for large datasets
# - Not suitable for non-linear problems