In [22]:
from keras.applications import VGG16
import numpy as np
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
from sklearn.cluster import KMeans
from sklearn.cluster import DBSCAN
from sklearn.decomposition import PCA
import pandas as pd
from sklearn.metrics import homogeneity_score
from sklearn.metrics import silhouette_score

Preprocessing and Feature Extraction

Using VGG16 model without Fully Connected layers
for each Image:
1) Loading png file and resizeing the image to a fixed size of 224x224 pixels
2) converting the image data into a NumPy array
3) using preprocess_input to Resize, Normalize and Color channel reordering
4) Using VGG16 model to Extract Features

In [23]:
model = VGG16(weights='imagenet', include_top=False)

all_features = []
for i in range(1,211):
    if i < 10:
        flower_num = '000'+str(i)
    elif i < 100:
        flower_num = '00'+str(i)
    else:
        flower_num = '0'+str(i)
    img_path = 'flower_images/'+flower_num+'.png'
    img_data = image.load_img(img_path, target_size=(224, 224))
    img_array = np.expand_dims(img_data, axis=0)
    img_preprocessed = preprocess_input(img_array)
    features = model.predict(img_preprocessed)
    flattened_features = features.flatten()
    all_features.append(flattened_features)




In [24]:
pca = PCA(n_components=2) 
features_pca = pca.fit_transform(all_features)

In [34]:
kmeans = KMeans(n_clusters=12)
clustering_kmeans = kmeans.fit_predict(features_pca)
print(clustering_kmeans)

[ 3  3  2  1  3  6  8  3  6  3 10  3  1  8 11  6  6  1  1  8  2  5  8  2
 10  4  2  0  8  4  6  1 10  0  1  3 10  6  9  6  3  6  8  6  3  5  8  3
  1  6  1  5 11 10  3  4 10  8  3  1  9  3  8 11 10  0  1  3  4  7  8  3
 10  6  3  3  4  8  3  1  6  7  1 10  6 10  9  5  4  5  8  6 10  9  6 10
  8 10  3  6  6  2  7 10  6 11  6 10  6  6 10  7  2  8 10  3  1  4  3 11
 10  2  5 10  6  6 11  3  3  4  0  2  6  5  0  8  3  4  9  7 10  7  3 10
  2 11  8  5  1  3 11 11  8 10  3  3  3  0 10  3  8  3  3  8  3  1  8  6
  6 10  6  6  6  8  3 10  6  5  8  8  6 10  3  1  6  8  1  6  1 11  5  8
  3 10  3  3  1 10  4  5  3  6  8  6  5  4  3  7  1  6]


In [35]:
dbscan = DBSCAN(eps=50, min_samples=3)
clustering_dbscan = dbscan.fit_predict(features_pca)
print(clustering_dbscan)

[ 0  0 -1  1  0  0  0  0  0  0  0  0  1  2 -1  0  0  1 -1  0 -1 -1 -1 -1
 -1 -1 -1 -1 -1  3  0  1  0 -1 -1  0  0  0 -1  0  0  0  0  0  0 -1 -1  0
  1  0 -1 -1 -1  0  0  3  4  0  0 -1 -1  0  2  5  0 -1 -1  0 -1 -1  0  0
  0  0  0  0  3  0 -1 -1  0 -1 -1  0  0 -1 -1 -1 -1 -1  0  0  0 -1  0  0
  0  0  0  0  0 -1 -1 -1  0 -1 -1  0  0  0  0 -1 -1  6 -1  0  1  3  0 -1
  4 -1 -1  4  0  0 -1  0  0  3 -1 -1  0 -1 -1  6  0 -1 -1 -1  0 -1  0  0
 -1  5 -1 -1 -1  0  5 -1 -1  0  0 -1  0 -1  0  0  6  0  0  0  0  1  2  0
  0  0  0  0  0  0  0  0  0 -1 -1 -1  0  0  0 -1  0  0 -1 -1 -1 -1 -1  6
  0  0  0  0 -1  4 -1 -1  0  0  2  0 -1 -1  0 -1 -1  0]


Loading True labels using csv file

In [36]:
data_labels = pd.read_csv('flower_images/flower_labels.csv')
true_labels = []
for label in data_labels['label']:
    true_labels.append(label)
true_labels = np.array(true_labels)


homogeneity and silhouette for K-Means

In [37]:
homogeneity = homogeneity_score(true_labels, clustering_kmeans)
print("Homogeneity Score for K-Means:", homogeneity)
silhouette = silhouette_score(features_pca, clustering_kmeans)
print("Silhouette Score for K-Means:", silhouette)

Homogeneity Score for K-Means: 0.4738918233455418
Silhouette Score for K-Means: 0.3905867595483577


homogeneity and silhouette for DBScan

In [38]:
homogeneity = homogeneity_score(true_labels, clustering_dbscan)
print("Homogeneity Score for DBScan:", homogeneity)
silhouette = silhouette_score(features_pca, clustering_dbscan)
print("Silhouette Score for DBScan:", silhouette)

Homogeneity Score for DBScan: 0.2042061118555482
Silhouette Score for DBScan: -0.06431070147847735
