In [5]:
# Using the mean of color value in the image

from PIL import Image
import numpy as np

image_folder = 'data/'

images = [Image.open(f'{image_folder}image{i}.png').convert('RGB') for i in range(1, 7)]

def average_color(image, sample_size=100):
    data = np.array(image)
    pixels = data.reshape(-1, data.shape[2])
    indices = np.random.choice(pixels.shape[0], sample_size, replace=False)
    selected_pixels = pixels[indices]
    mean_color = selected_pixels.mean(axis=0)
    return mean_color

clusters = { 'red': [], 'green': [], 'blue': [] }
for i, img in enumerate(images):
    color = average_color(img)
    if np.linalg.norm(color - np.array([255, 0, 0])) < np.linalg.norm(color - np.array([0, 255, 0])) and np.linalg.norm(color - np.array([255, 0, 0])) < np.linalg.norm(color - np.array([0, 0, 255])):
        clusters['red'].append(i+1)
    elif np.linalg.norm(color - np.array([0, 255, 0])) < np.linalg.norm(color - np.array([0, 0, 255])):
        clusters['green'].append(i+1)
    else:
        clusters['blue'].append(i+1)

print(clusters)


{'red': [2, 5], 'green': [4, 6], 'blue': [1, 3]}


In [8]:
# Using K-Means to find centroids and automatically create clusters

from PIL import Image
import numpy as np
from sklearn.cluster import KMeans

image_folder = 'data/'

images = [Image.open(f'{image_folder}image{i}.png').convert('RGB') for i in range(1, 7)]

def extract_grid_pixels(image, grid_size=10):
    data = np.array(image)
    rows, cols, _ = data.shape
    sampled_pixels = []
    row_step, col_step = rows // grid_size, cols // grid_size

    for row in range(0, rows, row_step):
        for col in range(0, cols, col_step):
            sampled_pixels.append(data[row, col])

    return np.array(sampled_pixels)

def cluster_pixels(images, n_clusters=3):
    all_pixels = []
    for img in images:
        pixels = extract_grid_pixels(img)
        all_pixels.append(pixels)

    all_pixels = np.vstack(all_pixels)
    kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(all_pixels)
    return kmeans

kmeans = cluster_pixels(images)

print("Centroids of the clusters (RGB values):")
print(kmeans.cluster_centers_)

image_clusters = []
for i, img in enumerate(images):
    pixels = extract_grid_pixels(img)
    mean_color = pixels.mean(axis=0)
    closest_centroid_idx = np.argmin(np.linalg.norm(kmeans.cluster_centers_ - mean_color, axis=1))
    image_clusters.append(closest_centroid_idx)

print("Image to cluster mapping:")
for i, cluster_idx in enumerate(image_clusters):
    print(f"Image {i+1} is closest to cluster {cluster_idx + 1}")

Centroids of the clusters (RGB values):
[[ 26.5 235.   70. ]
 [194.    3.    3. ]
 [  7.5  78.5 243.5]]
Image to cluster mapping:
Image 1 is closest to cluster 3
Image 2 is closest to cluster 2
Image 3 is closest to cluster 3
Image 4 is closest to cluster 1
Image 5 is closest to cluster 2
Image 6 is closest to cluster 1
