In [1]:
import os
import shutil
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.cluster import KMeans

In [2]:
input_folder = '/content/drive/MyDrive/PF Tasks/Unsupervised/Updated folder'
output_folder = '/content/drive/MyDrive/PF Tasks/Unsupervised/Output'
image_size = (224, 224)
batch_size = 32
num_clusters = 3

In [3]:
# Function for Clustering
def cluster_images(input_folder, output_folder, num_clusters, image_size, batch_size):
    # List all image files in the input folder
    image_files = [file for file in os.listdir(input_folder) if file.endswith('.jpg')]

    df = pd.DataFrame({'filename': image_files})

    # Data augmentation and normalization
    datagen = ImageDataGenerator(rescale=1.0 / 255.0)
    image_generator = datagen.flow_from_dataframe(
        dataframe=df,
        x_col='filename',
        y_col=None,
        directory=input_folder,
        target_size=image_size,
        batch_size=batch_size,
        class_mode=None,
        shuffle=False
    )

    # Generate image embeddings using the base model
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(*image_size, 3))
    image_embeddings = base_model.predict(image_generator, steps=len(image_generator), verbose=1)
    image_embeddings = image_embeddings.reshape(image_embeddings.shape[0], -1)

    # K-MEANS clustering
    kmeans = KMeans(n_clusters=num_clusters, random_state=42)
    cluster_labels = kmeans.fit_predict(image_embeddings)

    # Create output folders if they don't exist
    os.makedirs(output_folder, exist_ok=True)
    for i in range(num_clusters):
        os.makedirs(os.path.join(output_folder, f'cluster_{i}'), exist_ok=True)

    # Copy images to corresponding subfolders based on cluster assignments
    for idx, image_filename in enumerate(image_files):
        image_path = os.path.join(input_folder, image_filename)
        cluster = cluster_labels[idx]
        shutil.copy(image_path, os.path.join(output_folder, f'cluster_{cluster}', os.path.normpath(image_filename)))

cluster_images(input_folder, output_folder, num_clusters, image_size, batch_size)

Found 625 validated image filenames.


