In [6]:
from keras.preprocessing.image import load_img 
from keras.preprocessing.image import img_to_array 
from keras.applications.vgg16 import preprocess_input 

In [7]:
from keras.applications.vgg16 import VGG16 
from keras.models import Model

In [8]:
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

In [9]:
import os
import numpy as np
import matplotlib.pyplot as plt
from random import randint
import pandas as pd
import pickle

In [10]:
import warnings
warnings.filterwarnings('ignore')

In [13]:
path = r"D:\Downloads\flower_images\flower_images"
os.chdir(path)
flowers = []
with os.scandir(path) as files:
    for file in files:
        if file.name.endswith('.png'):
            flowers.append(file.name)

In [14]:
img = load_img(flowers[0], target_size=(224,224))
img = np.array(img)
print(img.shape)
(224, 224, 3)

(224, 224, 3)


(224, 224, 3)

In [15]:
reshaped_img = img.reshape(1,224,224,3)
print(reshaped_img.shape)
(1, 224, 224, 3)

(1, 224, 224, 3)


(1, 224, 224, 3)

In [16]:
x = preprocess_input(reshaped_img)

In [17]:
# load model
model = VGG16()
# remove the output layer
model = Model(inputs=model.inputs, outputs=model.layers[-2].output)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5
[1m553467096/553467096[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m319s[0m 1us/step


In [None]:
def extract_features(file, model):
    
    img = load_img(file, target_size=(224,224))
    
    img = np.array(img) 
    
    reshaped_img = img.reshape(1,224,224,3) 
    
    imgx = preprocess_input(reshaped_img)
    
    features = model.predict(imgx, use_multiprocessing=True)
    return features
   
data = {}
p = r"D:\Downloads\flower_features1.pkl"


for flower in flowers:
    
    try:
        feat = extract_features(flower,model)
        data[flower] = feat
    
    except:
        with open(p,'wb') as file:
            pickle.dump(data,file)
          
 

filenames = np.array(list(data.keys()))


feat = np.array(list(data.values()))


feat = feat.reshape(-1,4096)


df = pd.read_csv('flower_labels.csv')
label = df['label'].tolist()
unique_labels = list(set(label))


pca = PCA(n_components=100, random_state=22)
pca.fit(feat)
x = pca.transform(feat)


kmeans = KMeans(n_clusters=len(unique_labels),n_jobs=-1, random_state=22)
kmeans.fit(x)


groups = {}
for file, cluster in zip(filenames,kmeans.labels_):
    if cluster not in groups.keys():
        groups[cluster] = []
        groups[cluster].append(file)
    else:
        groups[cluster].append(file)

        
def view_cluster(cluster):
    plt.figure(figsize = (25,25));
    
    files = groups[cluster]
    
    if len(files) > 30:
        print(f"Clipping cluster size from {len(files)} to 30")
        files = files[:29]
    
    for index, file in enumerate(files):
        plt.subplot(10,10,index+1);
        img = load_img(file)
        img = np.array(img)
        plt.imshow(img)
        plt.axis('off')
        
   

sse = []
list_k = list(range(3, 50))

for k in list_k:
    km = KMeans(n_clusters=k, random_state=22, n_jobs=-1)
    km.fit(x)
    
    sse.append(km.inertia_)


plt.figure(figsize=(6, 6))
plt.plot(list_k, sse)
plt.xlabel(r'Number of clusters *k*')
plt.ylabel('Sum of squared distance');
plt.show()