<div class="alert alert-block alert-info">
<b>IMPORTS</b>
</div>

In [1]:
#Code to connect your google drive with google colaboratory
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


<div class="alert alert-block alert-info">
<b>Code Citation</b>
</div>

In [None]:
# Only the Bag of visual words code has been taken and modifed with.
# https://medium.com/@aybukeyalcinerr/bag-of-visual-words-bovw-db9500331b2f

In [5]:
# Packages
import numpy as np
import cv2 as cv
import os
import glob
from matplotlib import pyplot as plt
import seaborn as sns
import math
from sklearn.metrics import confusion_matrix
from prettytable import PrettyTable
import time
from scipy import ndimage
from scipy.spatial import distance
from sklearn.cluster import KMeans
from google.colab.patches import cv2_imshow
import tensorflow as tf
from tensorflow import keras
from keras.preprocessing import image
from fastai.vision import *
%matplotlib inline
from fastai.basics import *
from fastai.callback.all import *
from fastai.vision.all import *
from sklearn.metrics import precision_score, recall_score, f1_score
!pip install --upgrade fastai

In [16]:
# Defining the path
path = '/content/drive/MyDrive/Assignment/Images'

# Files and reading the data
digits_image = os.path.join(path, 'digits.png')
digit_image = cv.imread(digits_image, cv.IMREAD_GRAYSCALE)

# Printing the digit name
print("The image file name is:", os.path.basename(digits_image))
print(f"The shape of the image is {digit_image.shape}")

# Training and Test Paths
training_folder_path = os.path.join(path, "Train")
testing_folder_path = os.path.join(path, "Test")

# Create Train and Test folders if they don't exist
if os.path.isfile(training_folder_path) and os.path.isfile(training_folder_path) == False:
    os.mkdir(training_folder_path)
    os.mkdir(testing_folder_path)


The image file name is: digits.png
The shape of the image is (1000, 2000)


In [9]:
# Create Arrays of 20 by 20 patch
all_digit_numbers = np.empty((0,20,20),dtype=np.uint8)
training_digits = np.empty((0,20,20),dtype=np.uint8)
test_digits = np.empty((0,20,20),dtype=np.uint8)

# Format
training_images, testing_images = [],[]

# LOCATE DIGITS
# 10 DIGITS
for k in range(10):
  # 5 ROWS
    for r in range(5):
      # 100 NUMBERS
        for i in range(100):
          # IMAGE SLICING
            digit = digit_image[(k*5+r)*20:(k*5+r+1)*20, i*20:(i+1)*20]
            #  Adding another dimension due to the array's initial size
            digit = np.expand_dims(digit, axis=0)
            # ADD TO THE BIG ARRAY
            all_digit_numbers = np.append(all_digit_numbers, digit, axis=0) # Just in case we need to the full size
            if r < 4:
                # Appending 80%
                training_digits = np.append(training_digits, digit, axis=0)# Append 4 rows for training
            else:
                # Appending 20%
                test_digits = np.append(test_digits, digit, axis=0)  # Append the last row for testing

training_elements, testing_elements = 0,0

for i in range(10): # 0-9 digits
    counter = 1
    for j in range(4): # 4 rows
        for k in range(100):
            # row1 = 0-100 # row2 = 100-200 # row3 = 200-300# row4 = 300-400
            filename = f"digit_{i}_row{j+1}_id_{counter}.jpg"
            # 0-3999 == 4000 elements which is 80 percent for training
            image = os.path.join(training_folder_path, filename)
            # If image does not exists, then create it
            if os.path.isfile(image) == False:
                cv.imwrite(image, np.uint8(training_digits[training_elements])) # Saving the image in the Train folder 0 till 4000
            # APPEND TO TRAINING ARRAY
            training_images.append(np.uint8(training_digits[training_elements]))
            counter += 1
            training_elements += 1 # till 4000 elements

    counter = 1
    for l in range(100): # There's only 1000 elements for testing
        filename = f"digit_{i}_id_{counter}.jpg"
        testing_folder_path_1 = os.path.join(testing_folder_path, filename)
        # If image does not exists, then create it
        if os.path.isfile(testing_folder_path_1) == False:
            cv.imwrite(testing_folder_path_1, np.uint8(test_digits[testing_elements])) # Saving the image in the Test folder 0 till 2000
        # APPEND TO TESTING ARRAY
        testing_images.append(np.uint8(test_digits[testing_elements]))
        counter += 1
        testing_elements += 1

# all_digit_numbers.shape #Confirming we have (5000, 20, 20)
print(all_digit_numbers.shape)
print(training_digits.shape)
print(test_digits.shape)

# At the moment the images are 20 x 20
# Convert it to a row vector of 400
train_array = np.array(training_images).reshape(-1,400).astype(np.float32)
test_array = np.array(testing_images).reshape(-1,400).astype(np.float32)

print(f"The size of the training array is: {train_array.shape}")
print(f"The size of the testing array is: {test_array.shape}")

(5000, 20, 20)
(4000, 20, 20)
(1000, 20, 20)
The size of the training array is: (4000, 400)
The size of the testing array is: (1000, 400)


<div class="alert alert-block alert-info">
<b>Bag of Visual Words - Pre-processing</b>
</div>

In [None]:
def load_images_from_folder(folder):
    images = {}
    for filename in os.listdir(folder):
        # looping through the folders
        img = cv.imread(os.path.join(folder, filename), 0)
        if img is not None:
            # Assuming the filename is in the format: 'digit_0_row1_id_1'
            parts = filename.split('_')
            class_label = parts[1]  # The digit is in the second position in this case
            if class_label not in images:
                images[class_label] = []
            images[class_label].append(img)
        # print(images[class_label])

    # for class_label, imgs in images.items():
    #     print(f"Number of images in class {class_label}: {len(imgs)}")
    #     # Show the first image of each class
    #     cv.imshow(f"First image in class {class_label}", imgs[0])
    #     cv.waitKey(0)
    #     cv.destroyAllWindows()

    return images

# Usage
train_images = load_images_from_folder(training_folder_path)
test_images = load_images_from_folder(testing_folder_path)
# train images and test images are now dictionaries


<div class="alert alert-block alert-info">
<b>Bag of Visual Words - SIFT</b>
</div>

In [None]:
# Code citation
# The Bag of Visual words code has been taken from https://medium.com/@aybukeyalcinerr/bag-of-visual-words-bovw-db9500331b2f
def sift_features(images):
    # create descriptors usin sift
    sift_vectors = {}
    descriptor_list = []
    # locate for sift extremas
    sift = cv.xfeatures2d.SIFT_create()
    for key,value in images.items():
        features = []
        for img in value:
            # keypoint and descriptor
            kp, des = sift.detectAndCompute(img,None)
            # just in case does not find a descriptor because it does reject certain keypoints
            if des is not None:
                descriptor_list.extend(des)
                features.append(des)
        sift_vectors[key] = features
    return [descriptor_list, sift_vectors]

sifts = sift_features(train_images)
# Takes the descriptor list
descriptor_list = sifts[0]
# Takes the sift features for train data
all_bovw_feature = sifts[1]
# Takes the sift features for test data
test_bovw_feature = sift_features(test_images)[1]

print(f"The Length of descriptor_list: {len(descriptor_list)}")
print(f"The Keys in all_bovw_feature: {all_bovw_feature.keys()}")
print(f"The Length of descriptors for the first class: {len(all_bovw_feature[list(all_bovw_feature.keys())[0]])}")
print(f"The Keys in test_bovw_feature: {test_bovw_feature.keys()}")




The Length of descriptor_list: 15247
The Keys in all_bovw_feature: dict_keys(['7', '8', '9', '5', '6', '2', '3', '4', '0', '1'])
The Length of descriptors for the first class: 326
The Keys in test_bovw_feature: dict_keys(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'])


<div class="alert alert-block alert-info">
<b>Bag of Visual Words - K-Means Strategy</b>
</div>

In [None]:
def kmeans(k, descriptor_list):
    # use k-means algorithm
    kmeans = KMeans(n_clusters = k, n_init=10)
    # fit the descriptor list
    kmeans.fit(descriptor_list)
    # obtain the visual words from the centers
    visual_words = kmeans.cluster_centers_
    return visual_words

# Takes the center points which is visual words
visual_words = kmeans(850, descriptor_list)
# print(visual_words)


<div class="alert alert-block alert-info">
<b>Bag of Visual Words - Histograms for both test and train images</b>
</div>

In [None]:
def find_index(feature, centres):
    min_distance = float("inf")
    index = -1
    for i in range(len(centres)):
        distance_1 = distance.euclidean(feature, centres[i])
        if distance_1 < min_distance:
            index = i
            min_distance = distance_1
    return index


def image_class(all_bovw, centers):
    # create dictionary 
    dict_feature = {}
    # for all sift features
    for key,value in all_bovw.items():
        category = []
        for img in value:
            # create histograms
            histogram = np.zeros(len(centers))
            for each_feature in img:
                ind = find_index(each_feature, centers)
                histogram[ind] += 1
            category.append(histogram)
        dict_feature[key] = category
    return dict_feature

# Creates histograms for train data
bovw_train = image_class(all_bovw_feature, visual_words)
# Creates histograms for test data
bovw_test = image_class(test_bovw_feature, visual_words)


<div class="alert alert-block alert-info">
<b>Bag of Visual Words - Predict classes of the test images with k-NN function</b>
</div>

In [None]:
# 1-NN algorithm
def knn(images, tests):
    num_test = 0
    correct_predict = 0
    class_based = {}

    for test_key, test_val in tests.items():
        class_based[test_key] = [0, 0] # [correct, all]
        for tst in test_val:
            predict_start = 0
            #print(test_key)
            minimum = 0
            key = "a" #predicted
            for train_key, train_val in images.items():
                for train in train_val:
                    if(predict_start == 0):
                        minimum = distance.euclidean(tst, train)
                        #minimum = L1_dist(tst,train)
                        key = train_key
                        predict_start += 1
                    else:
                        dist = distance.euclidean(tst, train)
                        #dist = L1_dist(tst,train)
                        if(dist < minimum):
                            minimum = dist
                            key = train_key

            if(test_key == key):
                correct_predict += 1
                class_based[test_key][0] += 1
            num_test += 1
            class_based[test_key][1] += 1
            #print(minimum)
    return [num_test, correct_predict, class_based]

# Call the knn function
results_bowl = knn(bovw_train, bovw_test)

<div class="alert alert-block alert-info">
<b>Bag of Visual Words - Accuracy</b>
</div>

In [None]:
# Calculates the average accuracy and class based accuracies.
def accuracy(results):
    avg_accuracy = (results[1] / results[0]) * 100
    print("Average accuracy: %" + str(avg_accuracy))
    print("\nClass based accuracies: \n")
    for key,value in results[2].items():
        acc = (value[0] / value[1]) * 100
        print(key + " : %" + str(acc))
accuracy(results_bowl)
# Calculates the accuracies and write the results to the console.

Average accuracy: %57.877094972067034

Class based accuracies: 

0 : %54.25531914893617
1 : %64.15094339622641
2 : %31.25
3 : %48.421052631578945
4 : %67.70833333333334
5 : %56.17977528089888
6 : %38.297872340425535
7 : %74.69879518072288
8 : %67.0103092783505
9 : %80.61224489795919
