In [1]:
import random
import numpy as np
import keras
from keras import layers, regularizers
from tensorflow.keras.datasets import cifar10
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score,davies_bouldin_score
import cv2

In [2]:
(X_train,Y_train),(X_test,Y_test) = cifar10.load_data()
print(X_train.shape,Y_train.shape)

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
(50000, 32, 32, 3) (50000, 1)


In [3]:
X_train=np.array([cv2.cvtColor(image,cv2.COLOR_BGR2GRAY) for image in X_train])
X_test=np.array([cv2.cvtColor(image,cv2.COLOR_BGR2GRAY) for image in X_test])
print(X_train.shape,X_test.shape)

(50000, 32, 32) (10000, 32, 32)


In [4]:
X_train=X_train.astype('float32')/255
X_train=X_train.reshape(-1, 1024)
X_test=X_test.astype('float32')/255
X_test=X_test.reshape(-1, 1024)
print(X_train.shape,X_test.shape)

(50000, 1024) (10000, 1024)


In [28]:
#Define initial 'k' clusters and the random centroids for those clusters
no_of_clusters=10
rand_centroids=[]
for i in range(no_of_clusters):
  rand_centroids.append(random.choices(X_test,k=1))

# Choosing the number of epochs as 100
# In every iteration 
# We find the corresponding cluster to everydata point
# We find the updated centroids by calculating the mean of the datapoints of a specific cluster
# We find the difference between the previous and current centroids
# We repeat this process for 100 epochs
centroid_difference=0
for a in range(50):
  clusters=[]
  for point in range(len(X_test)):
    distance=[]
    for key in range(no_of_clusters):
      distance.append(np.linalg.norm(np.subtract(rand_centroids[key],X_test[point])))
    minimal_point=min(distance)
    clusters.append(distance.index(minimal_point))

  updated_centroids=[]
  for i in range(no_of_clusters):
    temparr=[]
    for j in range(len(X_test)):
      if i==clusters[j]:
        temparr.append(X_test[j])
    meancalc=np.mean(temparr,axis=0)
    updated_centroids.append(meancalc)
  
  for i in range(10):
    centroid_difference=centroid_difference+np.linalg.norm(np.subtract(updated_centroids[i],rand_centroids[i]))
  print(centroid_difference)
  rand_centroids=updated_centroids

print(rand_centroids)

44.484195947647095
53.62355720996857
57.94986592233181
60.803412325680256
63.2242536470294
65.72712045907974
68.1396910995245
70.40421818196774
72.31568369269371
73.96247686818242
75.32762971892953
76.48857041075826
77.57758621498942
78.53713365644217
79.30513370223343
80.11756676808
80.96386790648103
81.70517734624445
82.43023099005222
83.13408371992409
83.6894020140171
84.17601776495576
84.6337421219796
85.10735060553998
85.52347273100168
85.91290524788201
86.33354602381587
86.75718922633678
87.1557335127145
87.50230891443789
87.84599381685257
88.14987732563168
88.41388397756964
88.68013474950567
88.94569690944627
89.23907299293205
89.48203652352095
89.72800798807293
89.95237512979656
90.13451175857335
90.25183322094381
90.36808514082804
90.46575239766389
90.54966098628938
90.65538772428408
90.75433574151248
90.8436343879439
90.92180966306478
91.00770346727222
91.12328267190605
[array([0.620215  , 0.62417763, 0.63257235, ..., 0.32135722, 0.32404596,
       0.33037263], dtype=float32)

In [29]:
SH_score=silhouette_score(X_test,clusters)
print('Silhouette_score:', SH_score)

Silhouette_score: 0.061666515


In [30]:
DB_score=davies_bouldin_score(X_test, clusters)
print('Davis_Bouldin_score:', DB_score)

Davis_Bouldin_score: 2.722823529571989


In [31]:
#This is a representation of an input image
input_image=layers.Input(shape=(1024),name='input_layer')

#Creation of 2 Dense layers as encoding layer1(with 80 neurons) and encoding layer2(with 10 neurons)
#RELU activation function is used in both the layers
encoding_layer1=layers.Dense(80,activation='relu')(input_image)
encoding_layer2=layers.Dense(10,activation='relu')(encoding_layer1)

#Creation of an encoder model with the layers created
encoder_model=keras.Model(input_image,encoding_layer2)

In [32]:
encoder_model.summary()

Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_layer (InputLayer)    [(None, 1024)]            0         
                                                                 
 dense_4 (Dense)             (None, 80)                82000     
                                                                 
 dense_5 (Dense)             (None, 10)                810       
                                                                 
Total params: 82,810
Trainable params: 82,810
Non-trainable params: 0
_________________________________________________________________


In [33]:
#decoder input modelling
decoder_input=layers.Input(shape=(10),name='decoder_input_layer')

#Creation of 2 Dense layers as encoding layer1(with 80 neurons) and encoding layer2(with 1024 neurons)
#RELU activation function is used in both the layers
decoding_layer1=layers.Dense(80,activation='relu')(decoder_input)
decoding_layer2=layers.Dense(1024,activation='linear')(decoding_layer1)

#Creation of an decoder model with the layers created
decoder_model=keras.Model(decoder_input,decoding_layer2)


In [34]:
decoder_model.summary()

Model: "model_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 decoder_input_layer (InputL  [(None, 10)]             0         
 ayer)                                                           
                                                                 
 dense_6 (Dense)             (None, 80)                880       
                                                                 
 dense_7 (Dense)             (None, 1024)              82944     
                                                                 
Total params: 83,824
Trainable params: 83,824
Non-trainable params: 0
_________________________________________________________________


In [35]:
#With the help of encoder and decoder model we are building autoencoder model
ip_image_format=layers.Input(shape=(1024))

#Input of encoder model and decoder model and modelling the autoencoder
autoencoder_input_encoded=encoder_model(ip_image_format)
autoencoder_output_decoded=decoder_model(autoencoder_input_encoded)
automodel=keras.Model(ip_image_format,autoencoder_output_decoded)

#Compiling the built model
automodel.compile(optimizer='adam',loss='mean_squared_error')

In [36]:
automodel.fit(X_train,X_train,epochs=50,batch_size=1024,shuffle=True)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7fb278014e10>

In [37]:
image_encoded=encoder_model.predict(X_train)
image_decoded=decoder_model.predict(image_encoded)

In [38]:
Clustering=KMeans(n_clusters=10, random_state=0).fit(image_encoded)

In [39]:
clusters=Clustering.labels_
SH_score=silhouette_score(image_encoded, clusters)
print(SH_score)

0.15147403


In [40]:
DB_score=davies_bouldin_score(X_train, clusters)
print(DB_score)

2.7334847307592964
