In [1]:
import datetime
import os
import pickle
import glob
from pathlib import Path

import cv2
import faiss
import matplotlib.pyplot as plt
import numpy as np

from sklearn.metrics import  davies_bouldin_score, calinski_harabasz_score
from sklearn.preprocessing import MinMaxScaler


In [2]:
source_path = "E:/taha/code/CanSat/images"

images = []

frames = Path(source_path)
for path in frames.glob('*.jpg'):

    image = cv2.imread(str(path))
    images.append(image)

shape = images[0].shape    

In [3]:
Size = len(images)

images = np.asarray(images)
images = images.reshape(images.shape[0], -1)

In [4]:
# images.shape
shape

(3456, 5184, 3)

In [5]:
row_indices = np.arange(0, shape[1])
column_indices = np.arange(0, shape[0])

row_mesh, column_mesh = np.meshgrid(row_indices, column_indices)

index_array = np.stack([column_mesh, row_mesh], axis=-1)

hsv_image = np.zeros((images.shape[0], shape[0], shape[1], 5))

images = images.reshape(Size, shape[0], shape[1], shape[2])
for i in range(Size):
    temp = cv2.cvtColor(images[i], cv2.COLOR_BGR2HSV)
    hsv_image[i] = np.concatenate((temp, index_array), axis=2)

images = images.reshape(Size, shape[0] * shape[1] * shape[2])

In [6]:
hsv_image.reshape(Size, shape[0] * shape[1], 5)[0]

array([[1.000e+01, 7.900e+01, 1.070e+02, 0.000e+00, 0.000e+00],
       [1.000e+01, 7.700e+01, 1.090e+02, 0.000e+00, 1.000e+00],
       [1.000e+01, 7.900e+01, 1.070e+02, 0.000e+00, 2.000e+00],
       ...,
       [1.100e+01, 6.600e+01, 1.350e+02, 3.455e+03, 5.181e+03],
       [1.100e+01, 6.500e+01, 1.380e+02, 3.455e+03, 5.182e+03],
       [1.100e+01, 6.500e+01, 1.370e+02, 3.455e+03, 5.183e+03]])

In [7]:
images_shape = hsv_image.shape

In [8]:
def set_priority(arr, color, pos):
    color_columns = np.tile(arr[:, :3], (1, color))
    pos_columns = np.tile(arr[:, 3:], (1, pos))
    duplicated_arr = np.concatenate((color_columns, pos_columns), axis=1)

    return duplicated_arr

In [9]:
hsv_image = hsv_image.reshape(images_shape[0] * images_shape[1] * images_shape[2], images_shape[3])

minMaxScaler = MinMaxScaler()
minMaxScaler.fit(hsv_image)

hsv_image = hsv_image.reshape(images_shape[0], images_shape[1] * images_shape[2], images_shape[3])

In [44]:
k1 = 15
color = 1
pos = 5

In [45]:
rgb_codes = [
    (255, 0, 0),  # Red
    (0, 255, 0),  # Green
    (0, 0, 255),  # Blue
    (255, 255, 0),  # Yellow
    (0, 255, 255),  # Cyan
    (255, 0, 255),  # Magenta
    (255, 165, 0),  # Orange
    (128, 0, 128),  # Purple
    (0, 128, 128),  # Teal
    (255, 192, 203),  # Pink
    (190, 255, 0),  # Lime
    (165, 42, 42),  # Brown
    (255, 215, 0),  # Gold
    (112, 128, 144),  # Slate Gray
    (128, 128, 0)  # Olive
]

In [46]:
if not os.path.isdir(f'results'):
    os.mkdir(f'results')
if not os.path.isdir(f'results/k{k1}_color{color}_pos{pos}'):
    os.mkdir(f'results/k{k1}_color{color}_pos{pos}')
else:
    if os.path.exists(f'results/k{k1}_color{color}_pos{pos}/res.csv'):
        os.remove(f'results/k{k1}_color{color}_pos{pos}/res.csv')

list_of_all_clusters = []

for image in range(images_shape[0]):
    print(f'image {image} {datetime.datetime.now()}')

    hsv_image_scaled = minMaxScaler.transform(hsv_image[image])
    hsv_image_scaled = set_priority(hsv_image_scaled, color, pos)

    kmeans = faiss.Kmeans(d=hsv_image_scaled.shape[1], k=k1, nredo=4) # d =(color*3 + pos*2)
    kmeans.train(hsv_image_scaled)
    # print(f"d = {hsv_image.shape}")

    list_of_all_clusters.append(kmeans.assign(hsv_image_scaled)[1])

    clusters = [[] for i in range(k1)]
    for index, value in enumerate(list_of_all_clusters[-1]):
        clusters[value].append(hsv_image[image, index])

    labels_for_segment_image = list_of_all_clusters[-1].reshape(shape[0], shape[1])
    # print(f"labels shape = {labels_for_segment_image.shape}")
    centers = kmeans.centroids

    segmented_image = np.zeros((shape[0], shape[1], 3), dtype=np.float32)
    for i in range(k1):
        segmented_image[labels_for_segment_image == i] = np.array(rgb_codes[i]) / 255
        # print(f"segmented_image shape = {labels_for_segment_image == i}")

    plt.imshow(segmented_image)
    plt.savefig(f'results/k{k1}_color{color}_pos{pos}/RGB{image}.jpg')
    plt.close()

    segmented_image = np.zeros((shape[0], shape[1], 3), dtype=np.float32)
    for i in range(k1):
        segmented_image[labels_for_segment_image == i] = centers[i][0]
        # print(f"centers = {centers[i][8]}")

    plt.imshow(cv2.cvtColor(segmented_image, cv2.COLOR_HSV2BGR))
    plt.savefig(f'results/k{k1}_color{color}_pos{pos}/HSV{image}.jpg')
    plt.close()

    avg_dis_cluster = np.zeros(k1)
    for index, cluster in enumerate(clusters):
        cluster_array = np.array(cluster)
        mean = np.mean(cluster_array, axis=0)
        avg_dis_cluster[index] = np.sum(np.abs(cluster_array[:, -2:] - mean[-2:]), axis=1).mean()
    plt.plot(avg_dis_cluster)
    plt.title(f"average of cluster distance {image}")
    plt.savefig(f'results/k{k1}_color{color}_pos{pos}/{image}.avg_dis_cluster.jpg')
    plt.close()
    avg_distance1 = np.sum(np.abs(np.subtract.outer(avg_dis_cluster, avg_dis_cluster))) / (
            avg_dis_cluster.shape[0] * (avg_dis_cluster.shape[0] - 1))

    avg_hsv_cluster = np.zeros(k1)
    for index, cluster in enumerate(clusters):
        cluster_array = np.array(cluster)
        mean = np.mean(cluster_array, axis=0)
        avg_hsv_cluster[index] = np.sum(np.abs(cluster_array[:, :3] - mean[:3]), axis=1).mean()

    plt.plot(avg_hsv_cluster)
    plt.title(f"average of cluster hsv {image}")
    plt.savefig(f'results/k{k1}_color{color}_pos{pos}/{image}.avg_hsv_cluster.jpg')
    plt.close()
    avg_distance2 = np.sum(np.abs(np.subtract.outer(avg_hsv_cluster, avg_hsv_cluster))) / (
            avg_hsv_cluster.shape[0] * (avg_hsv_cluster.shape[0] - 1))

    size_of_cluster = np.array([len(i) for i in clusters])

    plt.plot(size_of_cluster)
    plt.title(f"size of cluster {image}")
    plt.savefig(f'results/k{k1}_color{color}_pos{pos}/{image}.size_of_cluster.jpg')
    plt.close()
    avg_distance3 = np.sum(np.abs(np.subtract.outer(size_of_cluster, size_of_cluster))) / (
            size_of_cluster.shape[0] * (size_of_cluster.shape[0] - 1))
    with open(f'results/k{k1}_color{color}_pos{pos}/res.csv', 'a') as csv_file:
        csv_file.write(
            f'{avg_distance1},{avg_distance2},{avg_distance3},{davies_bouldin_score(hsv_image_scaled, list_of_all_clusters[-1])},{calinski_harabasz_score(hsv_image_scaled, list_of_all_clusters[-1])}\n')
    del size_of_cluster, segmented_image, avg_hsv_cluster, avg_dis_cluster, kmeans, clusters, centers, cluster_array, mean, avg_distance1, avg_distance2, avg_distance3, hsv_image_scaled


means = np.mean(np.genfromtxt(f'results/k{k1}_color{color}_pos{pos}/res.csv', delimiter=','), axis=0)
with open('res.csv', 'a') as csv_file:
    csv_file.write(f'k{k1}_color{color}_pos{pos},{means[0]},{means[1]},{means[2]},{means[3]},{means[4]}\n')

image 0 2024-04-22 02:16:24.208368
centers = 0.42524048686027527
centers = 0.6247732639312744
centers = 0.8960421681404114
centers = 0.38515159487724304
centers = 0.15749256312847137
centers = 0.1674686223268509
centers = 0.8612996339797974
centers = 0.6052494049072266
centers = 0.3471258878707886
centers = 0.8729769587516785
centers = 0.12054547667503357
centers = 0.8294789791107178
centers = 0.6002093553543091
centers = 0.12625443935394287
centers = 0.43374350666999817
image 1 2024-04-22 02:17:21.178789
centers = 0.4330086410045624
centers = 0.610592246055603
centers = 0.887458086013794
centers = 0.3538898229598999
centers = 0.15135885775089264
centers = 0.15894363820552826
centers = 0.8837361335754395
centers = 0.6184254288673401
centers = 0.3356024920940399
centers = 0.888658881187439
centers = 0.11553747951984406
centers = 0.6720091700553894
centers = 0.651358962059021
centers = 0.12054523825645447
centers = 0.41314515471458435
image 2 2024-04-22 02:18:22.827055
centers = 0.144027