In [None]:
import pandas as pd
import numpy as np
import os
from PIL import Image
import matplotlib.pyplot as plt

In [None]:
image_path = 'image path'
# Open the image using Pillow
img = Image.open(image_path)
# Convert the PIL image to a NumPy array
img_array = np.array(img)

print(img_array.shape)
print(img_array[32][13])


# Converting 4 channel image into black and white image

In [None]:
data_image = img.convert('L')
data = np.array(data_image)
print(data.shape)

In [None]:
print(data[1])

In [None]:

print(data.shape[1])

# Initialize Centroids

In [None]:
def initialize_centroids(data, k):
    x_min = y_min = float('inf')
    x_max = float('-inf')
    y_max = float('-inf')
    centroids = []
    for point, value in np.ndenumerate(data):
        if value!= 0:
            x_min = min(point[0],x_min)
            y_min = min(point[1],y_min)
            x_max = max(point[0],x_max)
            y_max = max(point[1],y_max)
    for i in range(k):
        centroids.append((np.random.uniform(x_min, x_max), np.random.uniform(y_min, y_max)))
        
    return centroids

# Distance of each data point from centroids

In [None]:
def distance(data_point, centroid):
    return np.sqrt((data_point[0] - centroid[0])**2 + (data_point[1] - centroid[1])**2)

# Finding the closest Centroid to each data point

In [None]:
def centroids_indices(data, centroids, k):
    indices = []
    for point, value in np.ndenumerate(data):  # it will give position and value of each element
        if value != 0:
            min_dist = float('inf')
            index = None
            for i in range(k):
                new_dist = distance(point, centroids[i])
                if new_dist < min_dist:
                    min_dist = new_dist
                    index = i
            indices.append((point[0], point[1], index))
    return indices 


# Forming a Cluster

In [None]:
def form_clusters(indices, k):
    clusters = [[] for _ in range(k)] 
    
    for i in range(k):
        for t in range(len(indices)):
            if indices[t][2] == i:
                 clusters[i].append((indices[t][0], indices[t][1]))
    clusters_numpy = []
    for i in range(k):
        clusters_numpy.append(np.array(clusters[i]))           
    return clusters_numpy

# Finding new centroid using mean of data points of a cluster

In [None]:
def new_centroids(clusters, k):
    new_centroids = [[0,0] for _ in range(k)] 
    counts = [0] * k
    
    for i in range(k):
            for j in range(len(clusters[i])):
                 new_centroids[i][0] += clusters[i][j][0]
                 new_centroids[i][1] += clusters[i][j][1]
                 counts[i] += 1   
    for z in range(k):
        if counts[z] != 0:
                new_centroids[z] = (new_centroids[z][0] / counts[z] , new_centroids[z][1] / counts[z])
                
    return new_centroids    

# Calculating wcss

In [None]:
def get_wcss( clusters, centroids):
  sum_of_sq = 0
  for i in range(len(centroids)):
        for point in clusters[i]:  
            sum_of_sq += (distance(point, centroids[i]))**2
        
  return sum_of_sq
             
    

# K_Means Algorithm

In [None]:
def K_Means (data, k):
    condition = True
    while condition == True:
        initial_centroids = initialize_centroids(data, k)

        indices = centroids_indices(data, initial_centroids, k)
        clusters = form_clusters(indices, k) # clusters is a 3D list
        centroids = new_centroids(clusters, k)
        condition = False
        i = 0
        while i == (initial_centroids == centroids):
            initial_centroids = centroids
            indices = centroids_indices(data, initial_centroids, k)
            clusters = form_clusters(indices, k)
            centroids = new_centroids(clusters, k)
        for  i in range(len(centroids)):
            if centroids[i] == [0,0]:
                condition = True
               
    return clusters, centroids

# Plot of WCSS and Number of Clusters

In [None]:
k_range = np.arange(1,7)
wcss = np.empty(len(k_range))

"""for i in range(len(k_range)):
    clusters_wcss, centroids_wcss = K_Means(data, k_range[i])
    wcss[i] = get_wcss( clusters_wcss, centroids_wcss)"""
    
min_wcss = np.full(len(k_range), np.inf)
for i in range(len(k_range)):
    for iteration in range(10):
        clusters_wcss, centroids_wcss = K_Means(data, k_range[i])
        wcss[i] = get_wcss( clusters_wcss, centroids_wcss)
        if min_wcss[i] > wcss[i]:
            min_wcss[i] = wcss[i]
        





In [None]:
plt.plot(k_range, min_wcss)
plt.xlabel('Number of clusters')
plt.ylabel('WCSS') 
plt.show()

In [None]:
print(k_range)
print(min_wcss)
len(k_range)

# Calculating perpendicular distance

In [None]:
def perpendicular_distance(point, line_point1, line_point2):
    x0, y0 = point
    x1, y1 = line_point1
    x2, y2 = line_point2
    
    numerator = abs((y2 - y1) * x0 - (x2 - x1) * y0 + x2 * y1 - y2 * x1)
    denominator = np.sqrt((y2 - y1)**2 + (x2 - x1)**2)
    
    distance = numerator / denominator
    return distance

# Calculating the value of K

In [None]:
def calc_k(min_wcss, k_range):
    elbow_array = np.column_stack((k_range,min_wcss))
    max_distance = float('-inf')
    pos = 0
    for i in range(1, len(k_range) - 1):
        per_dist = perpendicular_distance(elbow_array[i], elbow_array[0], elbow_array[len(k_range)-1])
        if max_distance < per_dist:
            max_distance = per_dist
            pos = i

    return elbow_array[pos][0]

# Getting clusters and centroids

In [None]:
k = int(calc_k(min_wcss, k_range))
clusters, centroids = K_Means(data, k)
print(clusters)
print(centroids)


In [None]:
centroids_array = np.array(centroids)

colors = plt.cm.rainbow(np.linspace(0, 1, len(centroids)))
labels = [f'Cluster {i+1}' for i in range(len(centroids))]

# Plotting each array with a different color and label
for i, array in enumerate(clusters):
    plt.scatter(array[:,0],array[:,1], color=colors[i], label=labels[i])


plt.scatter(centroids_array[:,0], centroids_array[:,1], color='black', marker='x', label = 'centroids')

plt.xlabel('X-axis')
plt.ylabel('Y-axis')
plt.title('Scatter Plot of Points')
plt.legend()
# Show the plot
plt.show()

distances = np.linalg.norm(centroids_array[:, np.newaxis] - centroids_array, axis=2)

rows = []
for i in range(len(centroids)):
    for j in range(i+1, len(centroids)):
        rows.append({"Clusters": f'Cluster {i+1} and Cluster {j+1}', "Distance": distances[i, j]})
distances = pd.DataFrame(rows)
print(distances)