Notebook to Categorize the shoes, boots and sandals from a large pool of unabelled images.

In [None]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from sklearn.cluster import KMeans

# Function to preprocess and extract features from images using ResNet50
def extract_features(images):
    model = ResNet50(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
    preprocess_input = tf.keras.applications.resnet.preprocess_input

    processed_images = [cv2.resize(img, (224, 224)) for img in images]
    processed_images = np.array(processed_images)
    processed_images = preprocess_input(processed_images)

    image_features = model.predict(processed_images)
    image_features = image_features.reshape(image_features.shape[0], -1)  # Flatten the features
    return image_features

# Function to move images to corresponding folders based on cluster assignments
def move_images_to_folders(images, cluster_assignments, output_folder):
    for cluster_id in np.unique(cluster_assignments):
        cluster_folder = os.path.join(output_folder, f'cluster_{cluster_id}')
        os.makedirs(cluster_folder, exist_ok=True)

        cluster_indices = np.where(cluster_assignments == cluster_id)[0]
        for idx in cluster_indices:
            image = images[idx]
            img_name = f'image_{idx}.jpg'
            img_path = os.path.join(cluster_folder, img_name)
            cv2.imwrite(img_path, image)

# Load all images from the folder
data_folder = '/content/drive/MyDrive/Data-PF/shoe_boot_sandal/shoe_boot_sandal'
image_files = [os.path.join(data_folder, filename) for filename in os.listdir(data_folder)]
images = [cv2.imread(img_file) for img_file in image_files]

# Extract features using ResNet50
image_features = extract_features(images)

# Perform K-Means clustering
kmeans = KMeans(n_clusters=3, random_state=42)
cluster_assignments = kmeans.fit_predict(image_features)

# Move images to corresponding folders based on cluster assignments
output_folder = '/content/drive/MyDrive/Data-PF/categorized images'
move_images_to_folders(images, cluster_assignments, output_folder)
