In [2]:
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2, preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array

ModuleNotFoundError: No module named 'distutils'

In [None]:
def load_and_preprocess_images(folder, target_size):
    images = []
    filenames = []
    for filename in os.listdir(folder):
        filepath = os.path.join(folder, filename)
        img = load_img(filepath, target_size=target_size)
        img = img_to_array(img)
        img = preprocess_input(img)
        images.append(img)
        filenames.append(filename)
    return np.array(images), filenames

In [None]:
def extract_features(images):
    model = MobileNetV2(weights='imagenet', include_top=False, pooling='avg')
    features = model.predict(images)
    return features

In [None]:
def perform_clustering(features, cluster_range, n_init=10):
    best_score = -1
    best_n_clusters = None
    best_labels = None

    for n_clusters in cluster_range:
        silhouette_avg = []
        for _ in range(n_init):
            kmeans = KMeans(n_clusters=n_clusters, random_state=None)
            labels = kmeans.fit_predict(features)
            if len(set(labels)) > 1:
                score = silhouette_score(features, labels)
                silhouette_avg.append(score)

        avg_score = np.mean(silhouette_avg) if silhouette_avg else -1
        print(f"Testing {n_clusters} clusters: Average Silhouette Score = {avg_score}")

        if avg_score > best_score:
            best_score = avg_score
            best_n_clusters = n_clusters
            best_labels = labels

    return best_n_clusters, best_labels

In [None]:
def display_clustered_images(images, labels, filenames):
    n_clusters = len(set(labels))
    fig, axs = plt.subplots(n_clusters, figsize=(15, n_clusters*3))
    for i in range(n_clusters):
        cluster_images = [images[j] for j in range(len(images)) if labels[j] == i]
        if cluster_images:
            combined_image = np.hstack(cluster_images)
            axs[i].imshow(combined_image.astype('uint8'))
            axs[i].set_title(f'Cluster {i+1}')
            axs[i].axis('off')
    plt.show()

    for i in range(n_clusters):
        print(f"Cluster {i+1}: {[filename for j, filename in enumerate(filenames) if labels[j] == i]}")

In [None]:
image_directory = 'E7-images'
image_size = (224, 224)  # Size expected by MobileNetV2

# Load and preprocess images
images, filenames = load_and_preprocess_images(image_directory, image_size)

# Extract features using a CNN
features = extract_features(images)

# Cluster images based on extracted features
cluster_range = range(2, 10)  # Range of cluster sizes to test
best_n_clusters, best_labels = perform_clustering(features, cluster_range)

# Display best clustering results
print(f"Best number of clusters: {best_n_clusters}")
display_clustered_images(images, best_labels, filenames)

  model = MobileNetV2(weights='imagenet', include_top=False, pooling='avg')


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5


Exception: URL fetch failure on https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5: None -- [Errno -3] Temporary failure in name resolution