# Image Compression Using K-Means Clustering (Exercise Notebook)

## Exercise Notebook Walkthrough

##  - Follow these steps to complete the exercise -

## 1. Introduction (Section 1)

## 2. Setup and Required Libraries (Section 2):

In [None]:
# Your task: Import the required libraries.
import _____ as np        # Library for numerical operations
import _____ as plt       # Library for plotting
import _____ as cv2       # Library for image processing
from _____ import KMeans  # Import KMeans for clustering

In [None]:
# Complete
plt.rcParams['figure.figsize'] = (___, ___)

## 3. Load and Display the Image, EDA Analysis (Section 3):

In [None]:
# Load the image (replace '_____.jpg' with your image file name)
image = cv2.imread('____.jpg')

# Convert BGR to RGB for correct color display
image = cv2.cvtColor(image, cv2.____)

# Display the image
plt.imshow(____)
plt.axis("off")  # Hides axis for a cleaner display
plt.title("Original Image")
plt.show()

In [None]:
# Check image dimensions and data type
print(f"Image Shape: {image._____}")  # Hint: Use the shape attribute
print(f"Data Type: {image._____}")    # Hint: Use the dtype attribute

In [None]:
# Split the image into R, G, B channels
r, g, b = cv2.split(____)

# Plot the histograms for each channel
plt.figure(figsize=(____,____ ))  # Set the figure size (width, height)
plt.hist(r.ravel(), bins=256, color='___', alpha=0.6, label='Red')   # Set color for red channel
plt.hist(g.ravel(), bins=256, color='___', alpha=0.6, label='Green') # Set color for green channel
plt.hist(b.ravel(), bins=256, color='___', alpha=0.6, label='Blue')  # Set color for blue channel

plt.xlabel("Pixel Value")
plt.ylabel("Frequency")
plt.legend()
plt.title("Color Distribution in Image")
plt.show()

In [None]:
# Reshape the image to a 2D array (height * width, 3)
# This will flatten the image into a list of pixels
pixels = image._____  # Hint: Use reshape method to flatten the image

# Count unique colors in the image
# Use np.unique to find all unique rows (colors) in the pixels array
unique_colors = np._____  # Hint: Use np.unique to get unique rows

# Print the number of unique colors
print(f"Number of unique colors: {_____}")  # Hint: Use len() to find the count

## 4. Preprocess the Image (Section 4):

In [None]:
# Reshape the image into a 2D array (height * width, 3).An image is typically a 3D NumPy array with the shape (height, width, 3) 
# where height represents the number of rows (pixels), width represents the number of columns (pixels)
# and 3 corresponds to the three color channels (Red, Green, Blue - RGB). Using .reshape(-1, 3), the image is flattened into a 2D array
# where -1 automatically calculates the number of rows as height × width (total pixels), and 3 preserves the three color channels. 
# For example, an image with dimensions (100, 100, 3) would be reshaped into (10000, 3), where each row represents a pixel with three values [R, G, B].
pixels = image.reshape(___, ____)

# Check the shape of the array. This checks the dimensions of the reshaped array to confirm that it has been converted into a 2D array of shape (num_pixels, 3).
print(f"Shape of pixel array: {pixels.shape}")

## 5. Apply K-Means Clustering (Section 5):

In [None]:
# Choose number of clusters (reduced colors)
k = 4  # You can experiment with different values

# Apply K-Means
kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
kmeans.fit(pixels)

# Get the cluster centers (new colors)
cluster_colors = kmeans.cluster_centers_

# Convert back to 8-bit values
cluster_colors = np.round(cluster_colors).astype(int)

print(f"Clustered Colors (New Color Palette with {k} colors):\n", cluster_colors)


# Get labels for each pixel
labels = kmeans.labels_

# Reconstruct the image using clustered colors
compressed_image = cluster_colors[labels]
compressed_image = compressed_image.reshape(image.shape)

# Display the compressed image
plt.figure(figsize=(6, 6))
plt.imshow(compressed_image)
plt.axis("off")
plt.title(f"Compressed Image with {k} Colors")
plt.show()

## 6. Evaluation and Discussion (Section 6):

## 7.Elbow Method (Section 7):

In [None]:
# Implementing the Elbow Method
distortions = []
K_values = range(___, ___) # k values can range from 1 to large number, so choose your desired k number, however keep in mind that the larger the range is the better optimal k you will find, however longer it will take for the ouput to be processed.

for k in K_values:
    kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
    kmeans.fit(pixels)
    distortions.append(kmeans.inertia_)

plt.plot(K_values, distortions, marker='o')
plt.xlabel('Number of Clusters (k)')
plt.ylabel('Distortion')
plt.title('Elbow Method for Optimal k')
plt.show()

## 8. Conclusion (Section 8):