In [4]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
## https://scikit-learn.org/stable/modules/clustering.html

import numpy as np
import pandas as pd
import os
from glob import glob
from skimage import io,color
from skimage import filters
from skimage import exposure

import warnings
warnings.filterwarnings("ignore")



images_folder = r'/content/drive/MyDrive/cropped'
dirc=os.listdir(images_folder)


In [6]:
#Image Processing
def angle(dx, dy):
    return np.mod(np.arctan2(dy, dx), np.pi)

X = []
Y = []
for idx,name in enumerate(dirc):
    path=os.path.join(images_folder,name)
    for file in os.listdir(path):
        img   = io.imread(os.path.join(path,file.strip()))
        gry_img=color.rgb2gray(img)
        imge = angle(filters.sobel_h(gry_img),filters.sobel_v(gry_img))
        hist,_=exposure.histogram(imge, nbins=36)
        hist=hist/np.sum(hist)
        X.append(hist)
        Y.append(idx)
X=np.array(X)
Y=np.array(Y)

In [7]:
from sklearn.decomposition import PCA
PCA_model = PCA(2)
X= PCA_model.fit_transform(X)

In [8]:
from sklearn.cluster import KMeans,BisectingKMeans,SpectralClustering,AgglomerativeClustering,DBSCAN
from sklearn.metrics import silhouette_score,fowlkes_mallows_score
for k in ['random','k-means++']:
    km=KMeans(n_clusters=4, random_state=42, init=k).fit(X).labels_
    print(k+ ':F: ' +str(fowlkes_mallows_score(Y,km))+ " :S:"+str(silhouette_score(X,km))+'\n')
for k in ['single','complete','average','ward']:
    agg=AgglomerativeClustering(n_clusters=4,linkage=k).fit(X).labels_
    print(k+ ':F: ' +str(fowlkes_mallows_score(Y,agg))+ " :S:"+str(silhouette_score(X,agg))+'\n')
bisecting=BisectingKMeans(n_clusters=4, random_state=42, init='random').fit(X).labels_
print('bisectingkmeans'+ ':F: ' +str(fowlkes_mallows_score(Y,bisecting))+ " :S:"+str(silhouette_score(X,bisecting))+'\n')
spec=SpectralClustering(n_clusters=4, random_state=42).fit(X).labels_
print('SpectralClustering'+ ':F: ' +str(fowlkes_mallows_score(Y,spec))+ " :S:"+str(silhouette_score(X,spec))+'\n')
dbscan = DBSCAN(eps=0.03, min_samples=5).fit(X).labels_
print('DBSCAN'+ ':F: ' +str(fowlkes_mallows_score(Y,dbscan))+ " :S:"+str(silhouette_score(X,dbscan))+'\n')

random:F: 0.28840832681999923 :S:0.3783284217005333

k-means++:F: 0.28840832681999923 :S:0.3783284217005333

single:F: 0.49780673783874707 :S:0.6890183012316408

complete:F: 0.3853636857095279 :S:0.3866889309287012

average:F: 0.4859862073148134 :S:0.6791331546361979

ward:F: 0.2841054987140208 :S:0.33602009982182524

bisectingkmeans:F: 0.28254937401293506 :S:0.29517292653955085

SpectralClustering:F: 0.3393801542504846 :S:0.0025110900404269935

DBSCAN:F: 0.4985860578905427 :S:0.7966554193322181



## to get 4 clusters eps= 0.03 and min-samples = 5 are used in dbscan

## Ranking models from best to worst as per fowlkes score :
#### single,dbscan,average,complete,ward,k-means++,bisectingkmeans,spectralclustering,random kmeans.

## Ranking models from best to worst as per silhouette score :
#### single,dbscan,average,complete,ward,k-means++,bisectingkmeans,spectralclustering,random kmeans.