# K-MEANS ALGORITHM

## Import Libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import glob
import cv2
import os
import pandas as pd
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from IPython.display import clear_output

## Dataset

In [None]:
#Import all images's paths
images=glob.glob('image_database/*')
test=glob.glob('test/*')

In [None]:
I1=plt.imread('image_database/1.jpg')
I2=plt.imread('image_database/11.jpg')
I3=plt.imread('image_database/21.jpg')
I4=plt.imread('image_database/31.jpg')
I5=plt.imread('image_database/41.jpg')
I6=plt.imread('image_database/51.jpg')
plt.subplot(231)
plt.title('Bus')
plt.imshow(I1)
plt.subplot(232)
plt.title('Dinosaur')
plt.imshow(I2)
plt.subplot(233)
plt.title('Elephant')
plt.imshow(I3)
plt.subplot(234)
plt.title('Flower')
plt.imshow(I4)
plt.subplot(235)
plt.title('Horse')
plt.imshow(I5)
plt.subplot(236)
plt.title('Mountain')
plt.imshow(I6)
plt.show()

## Important Functions

In [None]:
def rgb2gray(I):
    return np.dot(I,[0.2989, 0.5870, 0.1140])

In [None]:
I_gray=rgb2gray(I1)
plt.imshow(I_gray,cmap=plt.get_cmap('gray'))
plt.show()

In [None]:
def distance(x,y):
    return np.sqrt(np.sum(np.power((x-y),2)))

In [None]:
I_gray1=rgb2gray(I1)
I_gray2=rgb2gray(I2)
print("Euclidean distance between two images : ",distance(I_gray1,I_gray2))

In [None]:
def distanceMan(x,y):
    return np.sqrt(np.sum(np.abs(x-y)))

In [None]:
print("Manhattan distance between two images : ",distanceMan(I_gray1,I_gray2))

In [None]:
def redimension(im_ori,im): 
    a,b=im_ori.shape
    img_resized = cv2.resize(im,(b,a))
    return img_resized

## Color Feature

In [None]:
def meanColor(I):
    return np.array((np.mean(I[:,:,0]),np.mean(I[:,:,1]),np.mean(I[:,:,2])))

In [None]:
meanColor(I1)

## Texture Feature

In [None]:
def variance(I):
    nl,nc=I.shape
    m=np.mean(I)
    va=np.sum((I-m)**2)/(nl*nc)
    return va

def energie(I): 
    nl,nc=I.shape
    en= np.sum(I**2)
    return en

def entropie(I):
    ent=np.sum(I*np.log2(I+1e-10))
    return -ent

def contraste(I):
    nl,nc=I.shape
    co=0
    for i in range(nl):
        for j in range(nc):
            co+=((i-j)**2*I[i,j])
    return co

def homogenite(I):
    nl,nc=I.shape
    mo=0
    for i in range(nl):
        for j in range(nc):
            mo+=I[i,j]/(1+abs(i-j))
    return mo

In [None]:
def texture(I):
    I=I/255
    return np.array((variance(I),energie(I),entropie(I),contraste(I),homogenite(I)))

In [None]:
texture(I_gray)

## DATA PREPARATION

In [None]:
dictionary={'Names' :[],'R':[],'G':[],'B':[],'Variance':[],'Energy':[],'Entropy':[],'Contrast':[],'Homogeneity':[]}
names=['Bus','Dinosaur','Elephant','Flower','Horse','Mountain']

In [None]:
def dict2dataframe(images):
    
    for i in images:
        
        image_name = int(os.path.basename(i).split('.')[0])
        image=plt.imread(i)
        color=meanColor(image)
        textur=texture(rgb2gray(image))
        
        if image_name in range(1,11):
            dictionary['Names'].append(names[0])
        elif image_name in range(11,21):
            dictionary['Names'].append(names[1])
        elif image_name in range(21,31):
            dictionary['Names'].append(names[2])
        elif image_name in range(31,41):
            dictionary['Names'].append(names[3])
        elif image_name in range(41,51):
            dictionary['Names'].append(names[4])
        elif image_name in range(51,61):
            dictionary['Names'].append(names[5])
        
        dictionary['R'].append(color[0])
        dictionary['G'].append(color[1])
        dictionary['B'].append(color[2])
        dictionary['Variance'].append(textur[0])
        dictionary['Energy'].append(textur[1])
        dictionary['Entropy'].append(textur[2])
        dictionary['Contrast'].append(textur[3])
        dictionary['Homogeneity'].append(textur[4])
        
    return pd.DataFrame.from_dict(dictionary)

In [None]:
#data=dict2dataframe(images)
#data.to_csv('Image_Features.csv', index=False)

In [None]:
df=pd.read_csv('Image_Features.csv')

In [None]:
df.head()

## K-Means Algorithm

In [None]:
class K_MEANS:
    
    def __init__(self,data,k,iteration=100):
        self.data=data
        self.k=k
        self.iteration=iteration
        
    def random_centroids(self):
        centroids = []
        for i in range(self.k):
            centroid = self.data.apply(lambda x: float(x.sample()))
            centroids.append(centroid)
        return pd.concat(centroids, axis=1)
    
    def get_labels(self):
        centroids=self.random_centroids()
        distances = centroids.apply(lambda x: np.sqrt(((self.data - x) ** 2).sum(axis=1)))
        return distances.idxmin(axis=1)
    
    def new_centroids(self):
        labels=self.get_labels()
        centroids = self.data.groupby(labels).apply(lambda x: np.exp(np.log(x).mean())).T
        return centroids 
    
    def plot_clusters(self,data, labels, centroids, iteration):
        pca = PCA(n_components=2)
        data_2d = pca.fit_transform(data)
        centroids_2d = pca.transform(centroids.T)
        clear_output(wait=True)
        plt.title(f'Iteration {iteration}')
        plt.scatter(x=data_2d[:,0], y=data_2d[:,1], c=labels)
        plt.scatter(x=centroids_2d[:,0], y=centroids_2d[:,1])
        plt.show() 
        
    def start(self):
        centroids = self.random_centroids()
        old_centroids = pd.DataFrame()
        iteration = 1

        while iteration < self.iteration and not centroids.equals(old_centroids):
            old_centroids = centroids

            labels = self.get_labels()
            centroids = self.new_centroids()
            self.plot_clusters(self.data, labels, centroids, iteration)
            iteration += 1

1. Mean Color

In [None]:
data1=df[['R','G','B']].dropna()

In [None]:
data1.head()

In [None]:
data1 = ((data1 - data1.min()) / (data1.max() - data1.min()))

In [None]:
data1.head()

In [None]:
color=K_MEANS(data1,6)
color.start()

2. Texture

In [None]:
data2=df[['Variance','Energy','Entropy','Contrast','Homogeneity']].dropna()

In [None]:
data2.head()

In [None]:
data2 = ((data2 - data2.min()) / (data2.max() - data2.min()))

In [None]:
data2.head()

In [None]:
textur=K_MEANS(data2,6)
textur.start()

# Results

In [None]:
#add the class to each image
df['Color_Clustring']=color.get_labels()
df['Textur_Clustring']=textur.get_labels()

In [None]:
df