In [4]:
import os
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
from keras._tf_keras.keras.preprocessing import image
from keras._tf_keras.keras.applications.resnet50 import ResNet50, preprocess_input
from keras._tf_keras.keras.models import Model


def extract_features(img_path, model):
    """Extracts features from an image using a CNN model."""
    img = image.load_img(img_path, target_size=(224, 224))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = preprocess_input(img_array)
    features = model.predict(img_array)
    return features.flatten()

def cluster_images(test_images_folder: str, output_csv: str, num_clusters=5):
    """
    Clusters test images into categories based on visual similarity.
    
    Parameters:
        test_images_folder (str): Path to the folder containing test images.
        output_csv (str): Path to save the output CSV file with predictions.
        num_clusters (int): Number of clusters to form.
    """
    # Load pre-trained model (ResNet50 without top layers for feature extraction)
    base_model = ResNet50(weights='imagenet', include_top=False, pooling='avg')
    model = Model(inputs=base_model.input, outputs=base_model.output)
    
    # Get all image filenames
    image_filenames = [f for f in os.listdir(test_images_folder) if f.lower().endswith(('png', 'jpg', 'jpeg'))]
    image_paths = [os.path.join(test_images_folder, f) for f in image_filenames]
    
    # Extract features for each image
    features = np.array([extract_features(img_path, model) for img_path in image_paths])
    
    # Apply KMeans clustering
    kmeans = KMeans(n_clusters=num_clusters, random_state=42)
    categories = kmeans.fit_predict(features)
    
    # Create a DataFrame and save to CSV
    df = pd.DataFrame({'filename': image_filenames, 'category': categories})
    df.to_csv(output_csv, index=False)
    
    print(f"Clustered classifications saved to {output_csv}")

# Example usage
cluster_images("Test", "predictions3.csv")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms