In [1]:
import numpy as np
import matplotlib.pyplot as plt

from PIL import Image

from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
from sklearn.preprocessing import StandardScaler


In [2]:
img = Image.open("ladybug.png")
img = img.convert("RGB")   # ensure RGB

img_array = np.array(img)
h, w, c = img_array.shape

print("Image shape:", img_array.shape)

plt.figure(figsize=(6,6))
plt.imshow(img_array)
plt.title("Original Image")
plt.axis("off")
plt.show()


FileNotFoundError: [Errno 2] No such file or directory: 'ladybug.png'

In [10]:
pixels = img_array.reshape(-1, 3)  # (h*w, 3)
print("Pixels shape:", pixels.shape)


NameError: name 'img_array' is not defined

In [9]:
def segment_kmeans(pixels, h, w, k=5):
    kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
    labels = kmeans.fit_predict(pixels)
    centers = kmeans.cluster_centers_.astype(np.uint8)

    segmented_pixels = centers[labels]
    segmented_img = segmented_pixels.reshape(h, w, 3)

    return segmented_img, labels


In [8]:
k_values = [3, 5, 8]

plt.figure(figsize=(15,5))

for i, k in enumerate(k_values):
    seg_img, labels = segment_kmeans(pixels, h, w, k=k)

    plt.subplot(1, len(k_values), i+1)
    plt.imshow(seg_img)
    plt.title(f"K-Means (k={k})")
    plt.axis("off")

plt.show()


NameError: name 'segment_kmeans' is not defined

<Figure size 1500x500 with 0 Axes>

In [7]:
small_img = img.resize((150, 150))  # reduce size for speed
small_array = np.array(small_img)

sh, sw, _ = small_array.shape
small_pixels = small_array.reshape(-1, 3)

print("Small image shape:", small_array.shape)
print("Small pixels shape:", small_pixels.shape)

plt.figure(figsize=(5,5))
plt.imshow(small_array)
plt.title("Resized Image for Agglomerative")
plt.axis("off")
plt.show()


NameError: name 'img' is not defined

In [6]:
def segment_agglomerative(pixels, h, w, n_clusters=5, linkage="ward"):

    # ward only works with euclidean distances
    model = AgglomerativeClustering(n_clusters=n_clusters, linkage=linkage)
    labels = model.fit_predict(pixels)

    # Replace each cluster by mean color
    segmented = np.zeros_like(pixels, dtype=np.uint8)

    for cluster_id in np.unique(labels):
        cluster_pixels = pixels[labels == cluster_id]
        mean_color = cluster_pixels.mean(axis=0)
        segmented[labels == cluster_id] = mean_color.astype(np.uint8)

    segmented_img = segmented.reshape(h, w, 3)
    return segmented_img, labels


In [5]:
cluster_values = [3, 5, 8]

plt.figure(figsize=(15,5))

for i, k in enumerate(cluster_values):
    seg_img, labels = segment_agglomerative(small_pixels, sh, sw, n_clusters=k, linkage="ward")

    plt.subplot(1, len(cluster_values), i+1)
    plt.imshow(seg_img)
    plt.title(f"Agglomerative (clusters={k})")
    plt.axis("off")

plt.show()


NameError: name 'segment_agglomerative' is not defined

<Figure size 1500x500 with 0 Axes>

In [11]:
k = 5

kmeans_img, _ = segment_kmeans(small_pixels, sh, sw, k=k)
agg_img, _ = segment_agglomerative(small_pixels, sh, sw, n_clusters=k, linkage="ward")

plt.figure(figsize=(12,6))

plt.subplot(1, 2, 1)
plt.imshow(kmeans_img)
plt.title(f"K-Means (k={k})")
plt.axis("off")

plt.subplot(1, 2, 2)
plt.imshow(agg_img)
plt.title(f"Agglomerative (clusters={k})")
plt.axis("off")

plt.show()


NameError: name 'small_pixels' is not defined

In [3]:
def segment_dbscan(pixels, h, w, eps=5, min_samples=20):
    # scale helps DBSCAN
    scaler = StandardScaler()
    pixels_scaled = scaler.fit_transform(pixels)

    db = DBSCAN(eps=eps, min_samples=min_samples)
    labels = db.fit_predict(pixels_scaled)

    segmented = np.zeros_like(pixels, dtype=np.uint8)

    for cluster_id in np.unique(labels):
        mask = labels == cluster_id

        if cluster_id == -1:
            # noise -> black
            segmented[mask] = np.array([0, 0, 0], dtype=np.uint8)
        else:
            mean_color = pixels[mask].mean(axis=0)
            segmented[mask] = mean_color.astype(np.uint8)

    segmented_img = segmented.reshape(h, w, 3)
    return segmented_img, labels


In [12]:
eps_values = [0.5, 1.0, 1.5]

plt.figure(figsize=(15,5))

for i, e in enumerate(eps_values):
    seg_img, labels = segment_dbscan(small_pixels, sh, sw, eps=e, min_samples=15)

    plt.subplot(1, len(eps_values), i+1)
    plt.imshow(seg_img)
    plt.title(f"DBSCAN (eps={e})")
    plt.axis("off")

plt.show()


NameError: name 'small_pixels' is not defined

<Figure size 1500x500 with 0 Axes>

Observations and Parameter Experimentation

K-Means

Increasing k increases the number of distinct colors in the segmented image.

With low k (e.g., 3), the image becomes overly simplified.

With higher k (e.g., 8), more detail appears but segmentation can become noisy.

Agglomerative Clustering

Produces segmentation similar to K-Means, but can group pixels differently.

It is slower and requires resizing the image for performance.

More clusters increase detail but can create rough boundaries.

DBSCAN (Optional)

DBSCAN is difficult to tune for image segmentation.

With small eps, many pixels become noise.

With larger eps, clusters merge and segmentation becomes less meaningful.

DBSCAN is better for spatial density problems than pure RGB color segmentation.

Comparison Summary

K-Means is the most practical for color segmentation because it is fast and produces clear results.

Agglomerative clustering can give similar segmentation but is slower.

DBSCAN is not ideal for this dataset and often produces unstable segmentation.