In [5]:
"""

This code processes images to classify them based on their average color into three clusters: red, green, and blue.

Loads images and converts them to RGB format.
Calculates the average color by sampling 100 random pixels from the image and computing the mean of their RGB values.
Clustering by Color by comparing results using Euclidean distance.
Outputs a dictionary that maps each cluster to the images that belong to it.

"""

from PIL import Image
import numpy as np

image_folder = 'data/'

images = [Image.open(f'{image_folder}image{i}.png').convert('RGB') for i in range(1, 7)]

def average_color(image, sample_size=100):
    data = np.array(image)
    pixels = data.reshape(-1, data.shape[2])
    indices = np.random.choice(pixels.shape[0], sample_size, replace=False)
    selected_pixels = pixels[indices]
    mean_color = selected_pixels.mean(axis=0)
    return mean_color

clusters = { 'red': [], 'green': [], 'blue': [] }
for i, img in enumerate(images):
    color = average_color(img)
    if np.linalg.norm(color - np.array([255, 0, 0])) < np.linalg.norm(color - np.array([0, 255, 0])) and np.linalg.norm(color - np.array([255, 0, 0])) < np.linalg.norm(color - np.array([0, 0, 255])):
        clusters['red'].append(i+1)
    elif np.linalg.norm(color - np.array([0, 255, 0])) < np.linalg.norm(color - np.array([0, 0, 255])):
        clusters['green'].append(i+1)
    else:
        clusters['blue'].append(i+1)

print(clusters)


{'red': [2, 5], 'green': [4, 6], 'blue': [1, 3]}
