<a href="https://colab.research.google.com/github/Hongawy/Computer_Vision_Projects/blob/main/K_MEANS_CIFAR100.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Importing libraries and CIFAR-100 Dataset

In [None]:
import cv2
import numpy as np
import tensorflow as tf
import keras
import matplotlib.pyplot as plt

# Load the CIFAR-100 dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar100.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz


# HOG Features Extraction

In [None]:
# Create a HOG descriptor with window size =(32x32) , block size = (16x16)
# cell size = (8x8) , histogram values = 9

hog = cv2.HOGDescriptor((32,32), (16,16), (8,8), (8,8),9)

# Initialize a list to store the HOG features for training set and test set
hog_training = []
hog_test = []
# Loop through the training images
for image in x_train:
  # Convert the image to grayscale
  gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
  # Compute the HOG features
  features = hog.compute(gray_image)
  # Append the features to the list
  hog_training.append(features)

# Loop through the test images
for image in x_test:
  # Convert the image to grayscale
  gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
  # Compute the HOG features
  features = hog.compute(gray_image)
  # Append the features to the list
  hog_test.append(features)

# Convert the lists to a numpy array
hog_training = np.array(hog_training)
hog_test = np.array(hog_test)

# Print the shape of the HOG features
print("HOG Features Done!")
print(hog_training.shape)
print(hog_test.shape)
print(y_train.shape)
print(y_test.shape)

HOG Features Done!
(50000, 324)
(10000, 324)
(50000, 1)
(10000, 1)


# K-Means Algorithm

In [None]:
#Initialize the centroids to random values
def kMeans_init_centroids(X, K):
    randidx = np.random.permutation(X.shape[0])
    centroids = X[randidx[:K]]
    return centroids

def cost_kmeans(X,Y,centroids):
    total_cost=np.zeros_like(centroids)
    clusters=0
    for d in range(centroids.shape[0]):
        cost=0
        m =0
        for i in range(X.shape[0]):
            if Y[i]==(clusters):
                m+=1
                cost_i=((X[i]-centroids[clusters])**2)
                cost+=cost_i
        total_cost[clusters]=(1/m *(cost))
        clusters+=1
    return total_cost

def mean_kmeans(X,Y,centroids):
    clusters=0
    for d in range(centroids.shape[0]):
        points=np.zeros_like(X[1])
        m =0
        for i in range(X.shape[0]):
            if Y[i]==clusters:
                m+=1
                points+=X[i]
        centroids[clusters]=(1/m)*(points)
        clusters+=1
    return centroids

k=100
centroids =kMeans_init_centroids(hog_training,k)
print(centroids)
print("\n\n")
P_cost=np.zeros_like(centroids)
cost=cost_kmeans(hog_training,y_train,centroids)
centroids=mean_kmeans(hog_training,y_train,centroids)
print(centroids)

def compute_centroids(X, idx, K):
    m, n = X.shape
    centroids = np.zeros((K, n))
    for i  in range(K):
        k_pi=X[idx == i]
        centroids[i] = np.mean(k_pi, axis = 0)
    return centroids


def Kmean_acc(idx,y_train):
    correct=0
    for i in range(50000):
        if (idx[i]-y_train[i])==0:
            correct = correct + 1
    acc = (correct/50000)*100
    print('Accuracy:',acc,'%')


def find_closest_centroids(X, centroids):
    K = centroids.shape[0]
    idx = np.zeros(X.shape[0], dtype=int)
    for i in range(X.shape[0]) :
        dist=[]
        for j in range(centroids.shape[0]):
            norm=  np.linalg.norm(X[i] - centroids[j])
            dist.append(norm)
        idx[i]=np.argmin(dist)
    return idx

def run_kMeans(X, initial_centroids, max_iters=10):
    m,n = X.shape
    K = initial_centroids.shape[0]
    centroids = initial_centroids
    previous_centroids = centroids
    idx = np.zeros(m)
    for i in range(max_iters):
        print("K-Means iteration %d/%d" % (i, max_iters-1))
        idx = find_closest_centroids(X, centroids)
        centroids = compute_centroids(X, idx, K)
    return centroids, idx



[[0.25073656 0.03987567 0.04723272 ... 0.16149437 0.10017493 0.19596632]
 [0.13392524 0.10425232 0.11062553 ... 0.0862163  0.18236846 0.24937668]
 [0.14525978 0.14372349 0.11496767 ... 0.14026721 0.24489073 0.21032937]
 ...
 [0.0631104  0.02684585 0.0191454  ... 0.196098   0.20119944 0.25389275]
 [0.09973546 0.26037693 0.26037693 ... 0.01530824 0.04212321 0.1426693 ]
 [0.0321167  0.03031038 0.03784161 ... 0.12836827 0.15646964 0.23043613]]



[[0.11716131 0.19083531 0.2117164  ... 0.07171296 0.07004483 0.1039547 ]
 [0.11663306 0.11807032 0.12678084 ... 0.12696472 0.11465508 0.13325156]
 [0.13861856 0.13467434 0.12887655 ... 0.14372025 0.14341044 0.15463659]
 ...
 [0.15813889 0.13435902 0.1160449  ... 0.12376606 0.13545637 0.17187482]
 [0.15832122 0.14989474 0.12356924 ... 0.12893431 0.13769083 0.16813783]
 [0.11087339 0.12938452 0.13686627 ... 0.09990277 0.10099541 0.11489166]]


In [None]:
centroids, idx = run_kMeans(hog_training,centroids,max_iters=1)
print (idx)
print (y_train)
Kmean_acc(idx,y_train)

K-Means iteration 0/0
[77 29 58 ...  3 30 91]
[[19]
 [29]
 [ 0]
 ...
 [ 3]
 [ 7]
 [73]]
Accuracy: 15.826 %
