In [1]:
import datetime
import os
import pickle

import cv2
import faiss
import matplotlib.pyplot as plt
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, davies_bouldin_score, calinski_harabasz_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler


In [2]:
def load_dataset():
    image_pickle_file_path = 'images.pkl'
    label_pickle_file_path = 'label.pkl'

    with open(image_pickle_file_path, 'rb') as file:
        images = pickle.load(file)

    with open(label_pickle_file_path, 'rb') as file:
        labels = pickle.load(file)

    images = images.reshape(images.shape[0], -1)

    return images, labels

In [3]:
def classify(datapoints, labels, test_size=0.2):
    if test_size != 0:
        X_train, X_test, y_train, y_test = train_test_split(datapoints, labels, test_size=test_size, random_state=42)
    else:
        X_train = datapoints
        y_train = labels
    clf = RandomForestClassifier(n_estimators=100, random_state=42)

    clf.fit(X_train, y_train)
    if test_size != 0:
        y_pred = clf.predict(X_test)

        accuracy = accuracy_score(y_test, y_pred)
        # print(f"Accuracy: {accuracy * 100:.2f}%")
        return clf, accuracy
    else:
        return clf
    # recall = recall_score(y_test, y_pred, average='macro')
    # print(f"Recall: {recall * 100:.2f}%")
    #
    # f1score = f1_score(y_test, y_pred, average='macro')
    # print(f"F1Score: {f1score * 100:.2f}%")
    #
    # precision = precision_score(y_test, y_pred, average='macro')
    # print(f"Precision: {precision * 100:.2f}%")
    #
    # cm = confusion_matrix(y_test, y_pred)
    # dis_cm = ConfusionMatrixDisplay(cm)
    # dis_cm.plot()
    # plt.show()
    # plt.close()

In [None]:
images, labels = load_dataset()

In [None]:
images.shape

In [None]:
row_indices = np.arange(0, 499)
column_indices = np.arange(0, 499)

row_mesh, column_mesh = np.meshgrid(row_indices, column_indices)

index_array = np.stack([column_mesh, row_mesh], axis=-1)

hsv_image = np.zeros((images.shape[0], 499, 499, 5))

images = images.reshape(560, 499, 499, 3)
for i in range(560):
    temp = cv2.cvtColor(images[i], cv2.COLOR_BGR2HSV)
    hsv_image[i] = np.concatenate((temp, index_array), axis=2)

images = images.reshape(560, 499 * 499 * 3)

In [8]:
hsv_image.reshape(560, 499 * 499, 5)[0]

array([[ 19.,  19., 107.,   0.,   0.],
       [ 19.,  19., 107.,   0.,   1.],
       [ 19.,  19., 107.,   0.,   2.],
       ...,
       [ 40., 209., 105., 498., 496.],
       [ 40., 209., 105., 498., 497.],
       [ 40., 209., 105., 498., 498.]])

In [9]:
images_shape = hsv_image.shape

In [10]:
def set_priority(arr, color, pos):
    color_columns = np.tile(arr[:, :3], (1, color))
    pos_columns = np.tile(arr[:, 3:], (1, pos))
    duplicated_arr = np.concatenate((color_columns, pos_columns), axis=1)

    return duplicated_arr

In [11]:
k1 = 15
color = 1
pos = 7

In [9]:
hsv_image = hsv_image.reshape(images_shape[0] * images_shape[1] * images_shape[2], images_shape[3])

minMaxScaler = MinMaxScaler()
minMaxScaler.fit(hsv_image)

hsv_image = hsv_image.reshape(images_shape[0], images_shape[1] * images_shape[2], images_shape[3])

In [12]:
rgb_codes = [
    (255, 0, 0),  # Red
    (0, 255, 0),  # Green
    (0, 0, 255),  # Blue
    (255, 255, 0),  # Yellow
    (0, 255, 255),  # Cyan
    (255, 0, 255),  # Magenta
    (255, 165, 0),  # Orange
    (128, 0, 128),  # Purple
    (0, 128, 128),  # Teal
    (255, 192, 203),  # Pink
    (0, 255, 0),  # Lime
    (165, 42, 42),  # Brown
    (255, 215, 0),  # Gold
    (112, 128, 144),  # Slate Gray
    (128, 128, 0)  # Olive
]

In [None]:
if not os.path.isdir(f'images'):
    os.mkdir(f'images')
if not os.path.isdir(f'images/k{k1}_color{color}_pos{pos}'):
    os.mkdir(f'images/k{k1}_color{color}_pos{pos}')
else:
    if os.path.exists(f'images/k{k1}_color{color}_pos{pos}/res.csv'):
        os.remove(f'images/k{k1}_color{color}_pos{pos}/res.csv')

list_of_all_clusters = []

for image in range(images_shape[0]):
    print(f'image {image} {datetime.datetime.now()}')

    hsv_image_scaled = minMaxScaler.transform(hsv_image[image])
    hsv_image_scaled = set_priority(hsv_image_scaled, color, pos)

    kmeans = faiss.Kmeans(d=hsv_image_scaled.shape[1], k=k1)
    kmeans.train(hsv_image_scaled)

    list_of_all_clusters.append(kmeans.assign(hsv_image_scaled)[1])

    clusters = [[] for i in range(k1)]
    for index, value in enumerate(list_of_all_clusters[-1]):
        clusters[value].append(hsv_image[image, index])

    labels_for_segment_image = list_of_all_clusters[-1].reshape(499, 499)
    centers = kmeans.centroids

    segmented_image = np.zeros((499, 499, 3), dtype=np.float32)
    for i in range(k1):
        segmented_image[labels_for_segment_image == i] = np.array(rgb_codes[i]) / 255

    plt.imshow(segmented_image)
    plt.savefig(f'images/k{k1}_color{color}_pos{pos}/RGB{image}.jpg')
    plt.close()

    segmented_image = np.zeros((499, 499, 3), dtype=np.float32)
    for i in range(k1):
        segmented_image[labels_for_segment_image == i] = centers[i][0]

    plt.imshow(cv2.cvtColor(segmented_image, cv2.COLOR_HSV2BGR))
    plt.savefig(f'images/k{k1}_color{color}_pos{pos}/HSV{image}.jpg')
    plt.close()

    avg_dis_cluster = np.zeros(k1)
    for index, cluster in enumerate(clusters):
        cluster_array = np.array(cluster)
        mean = np.mean(cluster_array, axis=0)
        avg_dis_cluster[index] = np.sum(np.abs(cluster_array[:, -2:] - mean[-2:]), axis=1).mean()
    plt.plot(avg_dis_cluster)
    plt.title(f"average of cluster distance {image}")
    plt.savefig(f'images/k{k1}_color{color}_pos{pos}/{image}.avg_dis_cluster.jpg')
    plt.close()
    avg_distance1 = np.sum(np.abs(np.subtract.outer(avg_dis_cluster, avg_dis_cluster))) / (
            avg_dis_cluster.shape[0] * (avg_dis_cluster.shape[0] - 1))

    avg_hsv_cluster = np.zeros(k1)
    for index, cluster in enumerate(clusters):
        cluster_array = np.array(cluster)
        mean = np.mean(cluster_array, axis=0)
        avg_hsv_cluster[index] = np.sum(np.abs(cluster_array[:, :3] - mean[:3]), axis=1).mean()

    plt.plot(avg_hsv_cluster)
    plt.title(f"average of cluster hsv {image}")
    plt.savefig(f'images/k{k1}_color{color}_pos{pos}/{image}.avg_hsv_cluster.jpg')
    plt.close()
    avg_distance2 = np.sum(np.abs(np.subtract.outer(avg_hsv_cluster, avg_hsv_cluster))) / (
            avg_hsv_cluster.shape[0] * (avg_hsv_cluster.shape[0] - 1))

    size_of_cluster = np.array([len(i) for i in clusters])

    plt.plot(size_of_cluster)
    plt.title(f"size of cluster {image}")
    plt.savefig(f'images/k{k1}_color{color}_pos{pos}/{image}.size_of_cluster.jpg')
    plt.close()
    avg_distance3 = np.sum(np.abs(np.subtract.outer(size_of_cluster, size_of_cluster))) / (
            size_of_cluster.shape[0] * (size_of_cluster.shape[0] - 1))
    with open(f'images/k{k1}_color{color}_pos{pos}/res.csv', 'a') as csv_file:
        csv_file.write(
            f'{avg_distance1},{avg_distance2},{avg_distance3},{davies_bouldin_score(hsv_image_scaled, list_of_all_clusters[-1])},{calinski_harabasz_score(hsv_image_scaled, list_of_all_clusters[-1])}\n')
    del size_of_cluster, segmented_image, avg_hsv_cluster, avg_dis_cluster, kmeans, clusters, centers, cluster_array, mean, avg_distance1, avg_distance2, avg_distance3, hsv_image_scaled

means = np.mean(np.genfromtxt(f'images/k{k1}_color{color}_pos{pos}/res.csv', delimiter=','), axis=0)
with open('res.csv', 'a') as csv_file:
    csv_file.write(f'k{k1}_color{color}_pos{pos},{means[0]},{means[1]},{means[2]},{means[3]},{means[4]}\n')