<h1 align='center'>Classification MNIST</h1>
<h1 align='center'>Using SIFT and SVM Model</h1>


# Download Data From Google Driver

In [1]:
%cd '/content'
!gdown https://drive.google.com/uc?id=1AGfKBtuke51vOih-3Kb_fsfSxvqWQb7D

/content
Downloading...
From: https://drive.google.com/uc?id=1uG05PyClwmZWgxYdUFztDYUB7GMZ8L-a
To: /content/Animal Face Dataset.zip
100% 127M/127M [00:01<00:00, 88.7MB/s]


In [None]:
!mkdir MNIST_Image

In [None]:
!tar -xzvf '/content/mnist_png.tar.gz'

# Importing the required libraries

In [None]:
import os
import cv2
import numpy as np
import time
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

In [None]:
!pip install opencv-contrib-python==4.4.0.44

Collecting opencv-contrib-python==4.4.0.44
  Downloading opencv_contrib_python-4.4.0.44-cp37-cp37m-manylinux2014_x86_64.whl (55.7 MB)
[K     |████████████████████████████████| 55.7 MB 53 kB/s 
Installing collected packages: opencv-contrib-python
  Attempting uninstall: opencv-contrib-python
    Found existing installation: opencv-contrib-python 4.1.2.30
    Uninstalling opencv-contrib-python-4.1.2.30:
      Successfully uninstalled opencv-contrib-python-4.1.2.30
Successfully installed opencv-contrib-python-4.4.0.44


In [None]:
cv2.__version__

'4.1.2'

# Preparing the dataset 

In [None]:
path = '/content/mnist_png/training'
image_path = []
for i in range(10):
  dir = os.path.join(path, str(i))
  for file in os.listdir(dir):
    image_path.append(os.path.join(dir, file))

In [None]:
def main(thresh):
  t0 = time.time()

  def CalcFeatures(img, th):
    sift = cv2.xfeatures2d.SIFT_create(th)
    kp, des = sift.detectAndCompute(img, None)
    return des
    
  '''
  All the files appended to the image_path list are passed through the
  CalcFeatures functions which returns the descriptors which are 
  appended to the features list and then stacked vertically in the form
  of a numpy array.
  '''

  features = []
  for file in image_path:
    img = cv2.imread(file, 0)
    img_des = CalcFeatures(img, thresh)
    if img_des is not None:
      features.append(img_des)
  features = np.vstack(features)

  '''
  K-Means clustering is then performed on the feature array obtained 
  from the previous step. The centres obtained after clustering are 
  further used for bagging of features.
  '''

  k = 150
  criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 0.1)
  flags = cv2.KMEANS_RANDOM_CENTERS
  compactness, labels, centres = cv2.kmeans(features, k, None, criteria, 10, flags)

  '''
  The bag_of_features function assigns the features which are similar
  to a specific cluster centre thus forming a Bag of Words approach.  
  '''

  def bag_of_features(features, centres, k = 500):
      vec = np.zeros((1, k))
      for i in range(features.shape[0]):
          feat = features[i]
          diff = np.tile(feat, (k, 1)) - centres
          dist = pow(((pow(diff, 2)).sum(axis = 1)), 0.5)
          idx_dist = dist.argsort()
          idx = idx_dist[0]
          vec[0][idx] += 1
      return vec

  labels = []
  vec = []
  for file in image_path:
    img = cv2.imread(file, 0)
    img_des = CalcFeatures(img, thresh)
    if img_des is not None:
      img_vec = bag_of_features(img_des, centres, k)
      vec.append(img_vec)
      labels.append(int(file[28]))
  vec = np.vstack(vec)

  '''
  Splitting the data formed into test and split data and training the 
  SVM Classifier.
  '''

  X_train, X_test, y_train, y_test = train_test_split(vec, labels, test_size=0.2)
  clf = SVC()
  clf.fit(X_train, y_train)
  preds = clf.predict(X_test)
  acc = accuracy_score(y_test, preds)
  conf_mat = confusion_matrix(y_test, preds)

  t1 = time.time()
  
  return acc*100, conf_mat, (t1-t0)

In [None]:
accuracy = []
timer = []
for i in range(5,26,5):
  print('\nCalculating for a threshold of {}'.format(i))
  data = main(i)
  accuracy.append(data[0])
  conf_mat = data[1]
  timer.append(data[2])
  print('\nAccuracy = {}\nTime taken = {} sec\nConfusion matrix :\n{}'.format(data[0],data[2],data[1]))


Calculating for a threshold of 5

Accuracy = 68.97228144989339
Time taken = 731.6989665031433 sec
Confusion matrix :
[[1006    5   14    5    1   25   55   17   20   17]
 [   2 1162   12    1   18    7   14   22    0    4]
 [  68    1  684   77   50   34   48  166   66   23]
 [  30    3   92  808   33  124   31   25   32   10]
 [   9    3   41   35  845   37   26   53   45   75]
 [  70    4   34   66   37  587   76   51   29   54]
 [  82   12   28   21   20   61  620   53   37  207]
 [  29   55  114   26   55   30   55  857    9   18]
 [  52    2   47   31   54   32   75   19  781   57]
 [  47    6   24   17   80   44  138   25   79  737]]

Calculating for a threshold of 10

Accuracy = 76.52878464818762
Time taken = 736.3295772075653 sec
Confusion matrix :
[[ 988    5   19    4    0   13   38   20   21   12]
 [   1 1132    9    1    9    2    4   28    1    3]
 [  41    3  837   37   23   30   26  103   55   14]
 [  11    4   64  903   19   88   19   40   37   15]
 [   5    6   29   1