In [None]:
# Question 3: Image Compression with K-Means
# Description: Use K-Means clustering for basic image compression.
import numpy as np
from PIL import Image
from sklearn.cluster import KMeans

def compress_image(image_path, n_colors=16):
    """
    Compresses an image using K-Means clustering.

    Args:
        image_path (str): The path to the input image.
        n_colors (int): The number of colors to reduce the image to.
                        This is also the number of clusters for K-Means.

    Returns:
        PIL.Image.Image: The compressed image.
    """
    # Load the image and convert it to a NumPy array
    try:
        image = Image.open(image_path)
        image_array = np.array(image)
    except FileNotFoundError:
        print(f"Error: Image not found at {image_path}")
        return None
    except Exception as e:
        print(f"Error loading image: {e}")
        return None

    original_shape = image_array.shape

    # If the image is grayscale (2D array), convert it to 3D for consistent processing
    if len(original_shape) == 2:
        image_array = np.stack([image_array, image_array, image_array], axis=-1)
        original_shape = image_array.shape # Update original_shape for the 3D array

    # Reshape the image to a 2D array of pixels, where each pixel is an RGB triplet
    # (height * width, 3)
    reshaped_image = image_array.reshape(-1, 3)

    # Apply K-Means clustering to find the dominant colors
    # n_init='auto' (default in recent sklearn versions) or explicit int like 10 is good.
    kmeans = KMeans(n_clusters=n_colors, random_state=42, n_init='auto')
    kmeans.fit(reshaped_image)

    # The cluster centers are the representative colors
    compressed_colors = kmeans.cluster_centers_.astype(np.uint8)

    # Replace each pixel's color with the color of its assigned cluster center
    labels = kmeans.predict(reshaped_image)
    new_image_array = compressed_colors[labels]

    # Reshape the array back to the original image dimensions
    compressed_image_array = new_image_array.reshape(original_shape)

    # Convert the NumPy array back to a PIL Image object
    compressed_image = Image.fromarray(compressed_image_array)

    return compressed_image

if __name__ == "__main__":
    # Example usage:
    # IMPORTANT: Replace 'path/to/your/image.jpg' with the actual path to an image on your system.
    # For instance, if you have an image named 'flower.jpg' in the same directory as your script, use 'flower.jpg'.
    
    input_image_path = "path/to/your/image.jpg"
    output_image_path = "compressed_output_image.jpg"
    
    # You can experiment with different numbers of colors (e.g., 4, 8, 16, 32)
    desired_colors = 16 

    print(f"Attempting to compress '{input_image_path}' to {desired_colors} colors...")
    
    compressed_img = compress_image(input_image_path, n_colors=desired_colors)

    if compressed_img:
        compressed_img.save(output_image_path)
        print(f"Image successfully compressed and saved as '{output_image_path}'")
    else:
        print("Image compression failed.")