#Import lib

In [6]:
import tensorflow as tf
import numpy as np 
from matplotlib import pyplot as plt
from sklearn.metrics import accuracy_score
from keras.datasets import mnist

#Load Mnist Dataset

In [7]:
mnist = tf.keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data() #60000 images for Train & 10000 images for Test

#Used Functions

In [8]:
def imaged_grid(img , row , col ):
  
    """
    Function imaged_grid accepts three arguments:
    :param img: input image.
    :param row: number of rows.
    :param col: number of columns.
    :return: sub-grids of the specified dimensions.
    """   

    # Get the shape of the image and assign the values to variables x and y
    x , y = img.shape
    
    # If the number of rows is not divisible by x, print an error message
    if (x % row != 0 ):
      print("rows not divisable")
    
    # If the number of columns is not divisible by y, print an error message
    elif (y % col != 0):
      print("col not divisable")
    
    # If both number of rows and columns are divisible by x and y respectively,
    # reshape the input image using row and col values and return the result
    else:
      img=img.reshape ( x //row, row, -1, col)
      img=img.swapaxes (1,2)
      img=img.reshape (-1, row, col)
    return img


In [10]:
def get_centroid(img,row, col):

    """
    Function get_centroid accepts three arguments:
    :param img: input image.
    :param row: number of rows.
    :param col: number of columns.
    :return: a feature vector as a numpy array consisting of the centroid of each grid in the image.

    """

    feature_vector = []
    # The image is divided into grids using the "imaged_grid" function with the specified row and column numbers.
    for grid in imaged_grid(img , row, col ) :    
        Xc = 0 
        Yc = 0 
        sum = 0

        # For each grid, the centroid (Xc, Yc) is calculated using the indices and values of the grid.
        for index, x in np.ndenumerate(grid):
          sum+= x 
          Xc += x * index[0]
          Yc += x * index[1]
          
        # The centroid is added to the feature vector if the sum of the values of the grid is not zero.
        if sum != 0 :
            feature_vector.append( Xc/ sum )
            feature_vector.append(Yc/ sum )

        # If the sum is zero, (0, 0) is added to the feature vector.
        else :
             feature_vector.append(0)
             feature_vector.append(0)
    return np.array(feature_vector)

#Feature Extraction "Centroid "

In [None]:
#converting every sample to corresponding centroids feature (it will take some time)
train_features = [get_centroid(img,7,7)  for img in train_images  ]

In [None]:
train_features = np.array(train_features)
train_features.shape

(60000, 32)

#Classification Algorithms "KNN"

In [None]:
def knn_classifier(train_data, test_data, k):

    """
    Function knn_classifier accepts three arguments:
    :param train_data: train data feature.
    :param test_data: test data(as image -it get centroid feature inside the function).
    :param k: number of k nearest neighbours 
    :return: the accuracy on test data .
    """   

    num_test = len(test_data)
    num_train = len(train_data)
    accuracy = 0
    
    for i in range(num_test):
        # Calculate centroid for test image
        test_feature = get_centroid(test_data[i])
        
        # Calculate Euclidean distance between test feature and each training feature
        distances = np.zeros(num_train)
        for j in range(num_train):
            distances[j] = np.linalg.norm(test_feature - train_data[j])
        
        # Get indices of the k smallest distances
        indices = np.argsort(distances)[:k]
        
        # Get labels of the k nearest neighbors
        labels = train_labels[indices]
        
        # Find the most common label among the k neighbors
        prediction = np.bincount(labels).argmax()
        
        # Compare prediction with actual test label
        if prediction == test_labels[i]:
            accuracy += 1
            
    accuracy /= num_test
    return accuracy

In [None]:
accuracy=knn_classifier(train_features, test_images, 1)

In [None]:
print(accuracy)

0.9174
