<a href="https://colab.research.google.com/github/RaziSidd/Collab-and-Jupyter-Notebook/blob/main/ImageCompress.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

In [2]:
def kmeans_numpy(X, n_clusters, max_iters=100):
    """
    Hint:
    if implementing using numpy array:
    the shape of centroids is (n_clusters x 3), a 2D array
    The shape of labels is (for the given sample image) 512 x 512 = 262144 --> a 1D array
    """
    # Initialize cluster centroids randomly
    centroids = X[np.random.choice(len(X), n_clusters, replace=False)]
    labels = np.zeros(X.shape[0])

    for _ in range(max_iters):
        for i, row in enumerate(X):
          mini_dist_1 = float("inf")
          for idx, centriod in enumerate(centroids):
            # Calculate the distance from each point to each centroid
            distance = np.linalg.norm(row - centriod) #np.sqrt((centriod[0]-row[0])** 2 + (centriod[1]-row[1])**2 + (centriod[2]-row[2])**2)
            # Assign each point to the nearest cluster
            if mini_dist_1 > distance:
              mini_dist_1 = distance
              labels[i] = idx

        # Create an empty array to store the new centroids
        new_centroids = np.zeros((n_clusters, X.shape[1]))

        # Loop through each cluster
            # Find all data points that belong to the current cluster
        cluster_points = pd.DataFrame(X).groupby(by=labels)

            # Calculate the mean (average) of all data points in the cluster
        cluster_mean = cluster_points.mean()

            # Assign the cluster mean as the new centroid
        new_centroids = cluster_mean.values
        # Check for convergence
        if np.all(centroids == new_centroids):
          break
        else:
          centroids = new_centroids

    return centroids, labels

In [3]:
def create_compressed_image(labels, centroids, height, width, channels):

    """
    Hint:
    Assign the cluster value to the pixel of that cluster
    """
    # Step 3: Create a compressed image based on cluster centroids
    compressed_image = np.zeros((height, width, channels), dtype=np.uint8)

    for i in range(0, height):
        for j in range(0, width):
          position = labels[i*width + j]
          compressed_image[i][j] = centroids[int(position)]
    return compressed_image

In [None]:
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

def kmeans_image_compression(image_path, num_clusters):
    # Step 1: Load the image
    original_image = cv2.imread(image_path)

    # Convert the image to RGB color space (if it's not already)
    original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)

    # Get the dimensions of the image
    height, width, channels = original_image.shape

    # Step 2: Apply K-means clustering using numpy
    pixel_values = original_image.reshape(-1, 3)
    print("PART 1")
    centroids, labels = kmeans_numpy(pixel_values, num_clusters)

    # Step 3: Create a compressed image based on cluster centroids
    print("PART 2")
    compressed_image = create_compressed_image(labels, centroids, height, width, channels)

    # Step 4: Save the compressed image
    compressed_image_path = "compressed_image.png"
    cv2.imwrite(compressed_image_path, cv2.cvtColor(compressed_image, cv2.COLOR_RGB2BGR))

    # Save the original image in the same format as the compressed one
    original_image_path = "original_image.png"
    cv2.imwrite(original_image_path, cv2.cvtColor(original_image, cv2.COLOR_RGB2BGR))

    # Calculate and print the compression ratio
    original_size = os.path.getsize(original_image_path)
    compressed_size = os.path.getsize(compressed_image_path)
    compression_ratio = original_size / compressed_size

    print(f"Original image size: {original_size} bytes")
    print(f"Compressed image size: {compressed_size} bytes")
    print(f"Compression ratio: {compression_ratio:.2f}")
    print("By increasing the number of clusters, we decrease the value of the compression ratio. \nThis makes sense as we are increasing the number of colors we want to use in our final compressed image \nFor example, 2 cluster means 2 colors, so we will have a very compressed image, almost like a 2-tone or grayscale but with the cluster colors \nBut, 10 clusters means 10 colors, so we will have an image closer to the original, but with less space taken up \nBy increasing the clusters, we get an image closer to the original, but we also get a compression which is less than with less colors")
    # Show the original and compressed images using matplotlib
    plt.figure(figsize=(12, 6))

    plt.subplot(1, 2, 1)
    plt.imshow(original_image)
    plt.title("Original Image")
    plt.axis("off")

    plt.subplot(1, 2, 2)
    plt.imshow(compressed_image)
    plt.title("Compressed Image")
    plt.axis("off")

    plt.show()

if __name__ == "__main__":
    image_path = "/content/sample_data/test_image.png"

    num_clusters = 5  # Adjust the number of clusters as needed
    kmeans_image_compression(image_path, num_clusters)


PART 1
