#Image Classification using RF and SVM

In [2]:
#In this cell we handle the imports and define functions/variables that will be used
#by other parts
import cv2
import numpy as np
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt

#Finds the hogfeatures
def hogFeatures(inputImages):
  #Setup up some basic info
  img_size = (64, 64)
  cell_size = (8, 8)
  block_size = (4, 4)
  nbins = 4

  #Here we create the hog feature
  hog = cv2.HOGDescriptor(_winSize=(img_size[1] // cell_size[1] * cell_size[1],
                                  img_size[0] // cell_size[0] * cell_size[0]),
                        _blockSize=(block_size[1] * cell_size[1],
                                    block_size[0] * cell_size[0]),
                        _blockStride=(cell_size[1], cell_size[0]),
                        _cellSize=(cell_size[1], cell_size[0]),
                        _nbins=nbins)
  
  #Features will be saved in this array
  features = []

  #Pass all the resized images into the hog descriptor and save the outputs 
  #in the features array
  for i in range(len(inputImages)):
    newImage = inputImages[i].copy()
    newImage *= 255
    newImage = cv2.convertScaleAbs(newImage)
    features.append(hog.compute(newImage).reshape(1, -1))

  features = np.vstack(features)
  return features

train_images = np.load('flower_subset.npz')['train_images']
train_labels = np.load('flower_subset.npz')['train_labels']
test_images = np.load('flower_subset.npz')['test_images']
test_labels = np.load('flower_subset.npz')['test_labels']

#Resize the images
resized_train = []
resized_test = []

for i in range(len(train_images)):
  resized_train.append(cv2.resize(train_images[i], (64, 64)))

for i in range(len(test_images)):
  resized_test.append(cv2.resize(test_images[i], (64,64)))

#Pass the resized images to the HOGdescriptor function
train_hog_features = hogFeatures(resized_train)
test_hog_features = hogFeatures(resized_test)

##Support Vector Machine Method

In [None]:
#Create the SVM classifier and fit it to our data
svm_classifier = svm.SVC(gamma = 'auto', C = 1)
svm_classifier.fit(train_hog_features, train_labels)

#Now lets see our prediction accuracy
num_test_features = len(test_hog_features)
num_correct = 0;

prediction = svm_classifier.predict(test_hog_features)

for i in range(num_test_features):
  if prediction[i] == test_labels[i]:
    num_correct += 1

accuracy = num_correct / num_test_features
print('Accuracy was '+str(accuracy)+'% for C = 1. #Correct Predictions = '+str(num_correct))

#Now lets rerun the classifier with some different values for C and gamma
gamma_values = [0.001, 0.01, 0.1]

#Test for a large range of values -- depending on your hardware this might take a while
for j in gamma_values:
  for i in range(10,110, 10):
    svm_classifier = svm.SVC(gamma = j, C = i)
    svm_classifier.fit(train_hog_features, train_labels)

    num_test_features = len(test_hog_features)
    num_correct = 0;

    prediction = svm_classifier.predict(test_hog_features)

    for k in range(num_test_features):
      if prediction[k] == test_labels[k]:
        num_correct += 1

    accuracy = num_correct / num_test_features
    print('Accuracy was '+str(accuracy)+'% for C = '+str(i)+' and Gamma = '+str(j)+'. #Correct Predictions = '+str(num_correct))


Accuracy was 0.5333333333333333% for C = 1. #Correct Predictions = 48
Accuracy was 0.34444444444444444% for C = 10 and Gamma = 0.001. #Correct Predictions = 31
Accuracy was 0.43333333333333335% for C = 20 and Gamma = 0.001. #Correct Predictions = 39
Accuracy was 0.43333333333333335% for C = 30 and Gamma = 0.001. #Correct Predictions = 39
Accuracy was 0.45555555555555555% for C = 40 and Gamma = 0.001. #Correct Predictions = 41
Accuracy was 0.4444444444444444% for C = 50 and Gamma = 0.001. #Correct Predictions = 40
Accuracy was 0.43333333333333335% for C = 60 and Gamma = 0.001. #Correct Predictions = 39
Accuracy was 0.4444444444444444% for C = 70 and Gamma = 0.001. #Correct Predictions = 40
Accuracy was 0.4666666666666667% for C = 80 and Gamma = 0.001. #Correct Predictions = 42
Accuracy was 0.4666666666666667% for C = 90 and Gamma = 0.001. #Correct Predictions = 42
Accuracy was 0.4666666666666667% for C = 100 and Gamma = 0.001. #Correct Predictions = 42
Accuracy was 0.4777777777777778% f

Gamma = 0.1 and C = 100 gives us an accuracy of 53.33%

##Random Forest Method

In [None]:


#Create the classifier and fit it to our data
rf_classifier = RandomForestClassifier(n_estimators= 100, max_depth = 10, criterion= 'entropy')
rf_classifier.fit(train_hog_features, train_labels)

#View the predictions
num_correct = 0
num_test_features = len(test_hog_features)
accuracy = 0

prediction = rf_classifier.predict(test_hog_features)

for i in range(num_test_features):
  if prediction[i] == test_labels[i]:
    num_correct += 1

accuracy = num_correct / num_test_features

print('Accuracy was '+str(accuracy)+'%. #Correct Predictions = '+str(num_correct))

#Test for a large range of values -- depending on your hardware this may take a while
for j in range(5, 15, 1):
  for i in range(10, 150, 10):
    rf_classifier = RandomForestClassifier(n_estimators=i, max_depth = j, criterion='entropy')
    rf_classifier.fit(train_hog_features, train_labels)

    prediction = rf_classifier.predict(test_hog_features)
    #View the predictions
    num_correct = 0
    num_test_features = len(test_images)
    accuracy = 0

    for k in range(num_test_features):
      if prediction[k] == test_labels[k]:
        num_correct += 1

    accuracy = num_correct / num_test_features

    print('Accuracy was '+str(accuracy)+'% for n = '+str(i)+' and depth = '+str(j)+'. #Correct Predictions = '+str(num_correct))

Accuracy was 0.5555555555555556%. #Correct Predictions = 50
Accuracy was 0.35555555555555557% for n = 10 and depth = 5. #Correct Predictions = 32
Accuracy was 0.37777777777777777% for n = 20 and depth = 5. #Correct Predictions = 34
Accuracy was 0.4222222222222222% for n = 30 and depth = 5. #Correct Predictions = 38
Accuracy was 0.35555555555555557% for n = 40 and depth = 5. #Correct Predictions = 32
Accuracy was 0.36666666666666664% for n = 50 and depth = 5. #Correct Predictions = 33
Accuracy was 0.37777777777777777% for n = 60 and depth = 5. #Correct Predictions = 34
Accuracy was 0.3888888888888889% for n = 70 and depth = 5. #Correct Predictions = 35
Accuracy was 0.3333333333333333% for n = 80 and depth = 5. #Correct Predictions = 30
Accuracy was 0.3888888888888889% for n = 90 and depth = 5. #Correct Predictions = 35
Accuracy was 0.36666666666666664% for n = 100 and depth = 5. #Correct Predictions = 33
Accuracy was 0.37777777777777777% for n = 110 and depth = 5. #Correct Predictions =

N = 130 and depth = 10 gives us an accuracy of 55.55%.

##Testing the random state variable

In [7]:
#First we will test the affect on random state on the SVM classifier
randomVariables = [0,1,2,3,4,5,6,7,8,9,10]
for i in randomVariables:
  svm_classifier = svm.SVC(gamma = 0.1, C = 100, random_state=i)
  svm_classifier.fit(train_hog_features, train_labels)
  
  num_test_features = len(test_hog_features)
  num_correct = 0;

  prediction = svm_classifier.predict(test_hog_features)

  for k in range(num_test_features):
    if prediction[k] == test_labels[k]:
      num_correct += 1

  accuracy = num_correct / num_test_features
  print('Accuracy was '+str(accuracy)+'% for random state = '+str(i)+' in SVM classifier.')

#Now we will test the affect on random state on the RF classifier
for i in randomVariables:
  rf_classifier = RandomForestClassifier(n_estimators=130, max_depth = 10, criterion='entropy', random_state=i)
  rf_classifier.fit(train_hog_features, train_labels)

  prediction = rf_classifier.predict(test_hog_features)
  #View the predictions
  num_correct = 0
  num_test_features = len(test_images)
  accuracy = 0

  for k in range(num_test_features):
    if prediction[k] == test_labels[k]:
      num_correct += 1

  accuracy = num_correct / num_test_features

  print('Accuracy was '+str(accuracy)+'% random state = '+str(i)+' in RF classifier.')

Accuracy was 0.6% for random state = 0 in SVM classifier.
Accuracy was 0.6% for random state = 1 in SVM classifier.
Accuracy was 0.6% for random state = 2 in SVM classifier.
Accuracy was 0.6% for random state = 3 in SVM classifier.
Accuracy was 0.6% for random state = 4 in SVM classifier.
Accuracy was 0.6% for random state = 5 in SVM classifier.
Accuracy was 0.6% for random state = 6 in SVM classifier.
Accuracy was 0.6% for random state = 7 in SVM classifier.
Accuracy was 0.6% for random state = 8 in SVM classifier.
Accuracy was 0.6% for random state = 9 in SVM classifier.
Accuracy was 0.6% for random state = 10 in SVM classifier.
Accuracy was 0.5% random state = 0 in RF classifier.
Accuracy was 0.5111111111111111% random state = 1 in RF classifier.
Accuracy was 0.5444444444444444% random state = 2 in RF classifier.
Accuracy was 0.5% random state = 3 in RF classifier.
Accuracy was 0.5% random state = 4 in RF classifier.
Accuracy was 0.4666666666666667% random state = 5 in RF classifier

##Compare Results of SVM and RF classifiers

In our testing the RF classifier gave us a better highest accuracy at 55.55% while the SVM classifier gave us a highest accuracy of 53.33%. In testing of the random states of both classifiers it is clear that the SVM classifier is more stables to changes in the random state while the RF classifiers tends to give varied results. 