In [18]:
import cv2
import numpy as np
import tensorflow as tf
from skimage.color import rgb2gray

fashion_mnist = tf.keras.datasets.fashion_mnist
from sklearn.metrics import (accuracy_score, classification_report)


def distance(a,b):
    # euclidean distance between two points
    return np.linalg.norm(a-b)

def extract_sift_features(image):
    sift = cv2.xfeatures2d.SIFT_create()
    kp, des = sift.detectAndCompute(image, None)
    return kp, des

def extract_features(x_train,y_train):
    descriptor_list = []
    all_descriptor_list = []
    x_test_new=[]
    y_test_new=[]
    for i in range(len(x_train)):
        image = x_train[i]
        image=rgb2gray(x_train[i])
        kp, des = extract_sift_features(image)
        if des is None:
          continue
        x_test_new.append(x_train[i])
        y_test_new.append(y_train[i])
        for d in range(len(kp)):
            descriptor_list.append(des[d])
        all_descriptor_list.append(des)
    # convert to numpy array
    descriptor_list = np.array(descriptor_list)
    all_descriptor_list = np.array(all_descriptor_list)
    x_test_new=np.array(x_test_new)
    y_test_new=np.array(y_test_new)
    return descriptor_list,all_descriptor_list,x_test_new,y_test_new

def find_nearest_centroid(feature,centroids):
    distances = np.array([distance(i,feature) for i in centroids])
    return np.argmin(distances)

def generate_histogram(des,centroids):
    histogram = np.zeros(len(centroids))
    for feature in des:
        nearest_centroid = find_nearest_centroid(feature,centroids)
        histogram[nearest_centroid] += 1
    return np.array(histogram)

def ComputeHistogram(centroids,all_descriptor_list,number_of_images):
    bag_of_visual_words = []
    for i in range(number_of_images):
        des = all_descriptor_list[i]
        if des is None:
            continue
        histogram = generate_histogram(des,centroids)
        bag_of_visual_words.append(histogram)

    return np.array(bag_of_visual_words)

def my_k_mean(k,data,iter=50):
    centroids = data[:k]
    for _ in range(iter):
        # create a list of k empty lists
        clusters = [[] for _ in range(k)]

        for i in range(len(data)):
            nearest_centroid = find_nearest_centroid(data[i],centroids)
            clusters[nearest_centroid].append(data[i])

        # calculate new centroids
        new_centroids = np.array([np.mean(i,axis=0) for i in clusters])
            
        
        # check if the centroids are the same
        if np.array_equal(centroids,new_centroids):
            break
        centroids = new_centroids
    return centroids,clusters

def MatchHistogram(histogram1, all_his):
    # return cosine similarity of two histograms
    return np.dot(histogram1,all_his)/(np.linalg.norm(histogram1,axis=1)*np.linalg.norm(all_his))

def predict(test_images,train_images,train_labels):
    # find the nearest image in the training set
    predicted_images = []
    for test_image in test_images:
        
        cosine_similarity = MatchHistogram(train_images,test_image)
        nearest_image = np.array(np.argpartition(cosine_similarity,-100)[-100:]).astype(int)
        temp=train_labels[nearest_image].astype(int)
        
        nearest_image=np.argmax(np.bincount(temp))
        predicted_images.append(nearest_image)
    return np.array(predicted_images)

def CreateVisualDictionary(descriptors, k):
    centroids,clusters = my_k_mean(k=k,data=descriptors)
    return centroids


In [None]:
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

In [None]:
# extract features from training images
print("Extracting features from training images")
descriptor_list , all_descriptor_list , x_train , y_train = extract_features(x_train,y_train)
number_of_images = len(x_train)

In [28]:
# create visual dictionary
print("Creating visual dictionary")
centroids = CreateVisualDictionary(descriptor_list,k=50)

Creating visual dictionary


In [30]:
# compute histogram for training images
print("Computing histogram for training images")
bag_of_visual_words = ComputeHistogram(centroids=centroids,all_descriptor_list=all_descriptor_list,number_of_images=number_of_images)


Computing histogram for training images


In [31]:
# generate test images
print("Extracting features from test images")
test_images = []
for i in range(len(x_test)//1):
    test_images.append((x_test[i]))

Extracting features from test images


In [None]:
# extract features from test images 
test_descriptor_list , test_all_descriptor_list , test_images , y_labels = extract_features(test_images,y_test)


In [33]:
# compute histogram for test images
print("Computing histogram for test images")
test_histograms = ComputeHistogram(centroids=centroids,all_descriptor_list=test_all_descriptor_list,number_of_images=len(test_images))


Computing histogram for test images


In [34]:
# match histograms
print("Matching histograms")
predicted_images = predict(test_histograms,bag_of_visual_words,y_train)


Matching histograms


In [35]:
# compute accuracy
correct = 0
for i in range(len(predicted_images)):
    if predicted_images[i] == y_labels[i]:
        correct += 1
print("Accuracy: ",correct/len(predicted_images)*100)
print(f"Correct: {correct} out of {len(predicted_images)}")


Accuracy:  48.83544578052215
Correct: 4508 out of 9231


In [36]:
# print classification report
target_names = ["T-shirt/top", "Trouser", "Pullover", "Dress","Coat", "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]
print(classification_report(y_labels,predicted_images,target_names=target_names))
print("Overall Accuracy: ",accuracy_score(y_labels,predicted_images))


              precision    recall  f1-score   support

 T-shirt/top       0.60      0.54      0.57       944
     Trouser       0.46      0.64      0.53       628
    Pullover       0.33      0.46      0.38       954
       Dress       0.37      0.39      0.38       923
        Coat       0.37      0.33      0.35       973
      Sandal       0.52      0.62      0.56       966
       Shirt       0.29      0.07      0.11       932
     Sneaker       0.60      0.56      0.58       965
         Bag       0.50      0.56      0.53       958
  Ankle boot       0.75      0.75      0.75       988

    accuracy                           0.49      9231
   macro avg       0.48      0.49      0.47      9231
weighted avg       0.48      0.49      0.47      9231

Overall Accuracy:  0.4883544578052215
