# Image Segmentation using graph kernels

# Import Libraries

In [None]:
# This cell imports all the libraries needed 
from skimage import color
from sklearn.metrics.cluster import adjusted_rand_score
from skimage import data, segmentation, color
from skimage import graph
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from skimage.segmentation import slic
from skimage.color import rgb2lab
from skimage.segmentation import mark_boundaries
from grakel.kernels import ShortestPath
from grakel import Graph
import scipy.io
import os
import glob
from grakel import GraphKernel
from grakel.kernels import ShortestPathAttr, SubgraphMatching, PropagationAttr
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import pairwise_distances
import cv2
import pickle
from grakel.kernels import WeisfeilerLehman, VertexHistogram, NeighborhoodHash, WeisfeilerLehmanOptimalAssignment, EdgeHistogram
import csv
import statistics
from skimage import io, color
import math
from scipy.stats import wilcoxon

# Number of Images to process from validation set
num_images = 100

# Set the random seed
np.random.seed(42)

## Define Helper Functions

In [None]:
def downsample_image(image:np.array)->np.array:
    """ Downsample every image in order to speed up calculations.
        We choose every second pixel -> Reduce size in half. 
    :param image: The original image of type `np.array`.
    :return: Return the same image downsampled half the size. 
    """
    return image[0::2, 0::2]


def load_image(path: str)->np.array:
    """
     Load the image from the specified path. 
     Pixel intensities take values in the range [0,255] and are represented using all 3 channels (RGB). 
     redChannel   = image_imported[:,:,0] - Red channel
     greenChannel = image_imported[:,:,1] - Green channel
     blueChannel  = image_imported[:,:,2] - Blue channel
    
    :param path: The path of the image of type `str`.
    :image_imported : The image stored as `np.array`. 

    """
    image_imported = Image.open(path)
    image = np.array(image_imported)
    image = downsample_image(image)
    return image


def load_ground_truth_images()-> dict:
    """ Create a dictionary with groundtruth segmentations for each image in the training set.
    
    :return groundtruth: Dictionary of the form key:[segmentation,number of segments,chosen_truth]. For each image
    we might have more than 1 ground truth segmentations so we will return the segmentation with the most 
    number of segments. Key is going to be the number of the image. 
    """
    folder_path = "C:/Users/giwrg/Desktop/Master/Modules/Thesis/Reading Material & Data/2- Data/Data Used - 500/groundTruth/val"  
    # Construct the pattern to match image files
    image_pattern = os.path.join(folder_path, '*.mat')  
    # Use glob to find all matching image file paths
    image_paths = glob.glob(image_pattern)
    train_paths = []
    groundtruth = {}
    # Replace \ with /
    for path in image_paths:
        updated_string = path.rsplit("\\", 1)
        updated_string = "/".join(updated_string)
        train_paths.append(updated_string)

    for path in image_paths:
        mat_path = path
        # Get the file name from the path
        file_name = os.path.basename(path)
        # Remove the file extension
        file_name = os.path.splitext(file_name)[0]
        # Extract the number
        image_number = file_name.split("\\")[-1]

        mat_contents = scipy.io.loadmat(mat_path)
        num_of_groundtruths = mat_contents['groundTruth'][0].shape[0]
        num_regions = []
        for i in range(0,num_of_groundtruths):
            # Find the number of distinct elements
            num_distinct_elements = len(np.unique(mat_contents['groundTruth'][0][i][0][0][0]))
            num_regions.append(num_distinct_elements)
        max_index = num_regions.index(min(num_regions))  # TODO: Modify this accordingly
        selected_truth = mat_contents['groundTruth'][0][max_index][0][0][0]
        # Downsample true segmentation 
        selected_truth = downsample_image(selected_truth)
        groundtruth[image_number] = [selected_truth,len(np.unique(selected_truth)), max_index]
    return groundtruth
    

def superpixel_SLIC(image: np.array, n_segments:int, compactness:int) ->np.array:
    """
    Create superpixels using SLIC algorithm.
    
    
    :param image: Image of type 'np.array' having all 3 channels (RGB format)
    :param n_segments: Approximate number of superpixels of type `int`.
    :param compactness: Defines the tradeoff between space and color proximity of type `int`. 
    High m  -> more square superpixels.
    Small m -> arbitary shapes for superpixels but more sensitive to boundries.
    :return superpixels: The superpixel regions of type 'np.array'.
    """
    
    # Use lab color format
    lab = rgb2lab(image)

    # Use SLIC algorithm for superpixel segmentation
    superpixels = slic(lab, n_segments=n_segments, compactness=compactness, start_label=1)
    return superpixels

def Euclidean_distance(vector_a:list, vector_b:list)->float:
    """ Calculate Euclidean distance between co-ordinates-vectors of 2 pixels. 
    
    :param vector_a: Co-ordinates of first node of type `list`.
    :param vector_b: Co-ordinates of second node of type `list`.
    :return result: Return the Euclidean distance of co-ordinates of type `int`. 
    """
    result = np.sqrt((vector_a[0]-vector_b[0])**2 + (vector_a[1]-vector_b[1])**2)
    return result

def rgb_to_grayscale(rgb):
    """ Compute grayscale value from RGB values. 
    :param rgb: The RGB value of type `list`.
    :return grayscale: The pixel intensity grayscale value in the range [0,255] of type `int`.
    """
    grayscale = int(0.2989 * rgb[0] + 0.5870 * rgb[1] + 0.1140 * rgb[2])
    return grayscale


def edge_labels_calc(vector_a:list, vector_b:list)->str:
    """ Calculate edge labels by concatinating pixel intensity values. 
    
    :param vector_a: RGB values of first node of type `list`.
    :param vector_b: RGB values of second node of type `list`.
    :return: String with edge label.     
    """
    # Concatinate Pixel Intensity Values. 
    A = rgb_to_grayscale(vector_a)
    B = rgb_to_grayscale(vector_b)
    return str(A)+str(B)


def get_image_number(path: str)-> str:
    """ Extract image number from path. 
    :param path: The path of the image of type `str`.
    :return image_number: Return the image number of type `str`.
    """
    
    # Get the file name from the path
    file_name = os.path.basename(path)
    # Remove the file extension
    file_name = os.path.splitext(file_name)[0]
    # Extract the number
    image_number = file_name.split("\\")[-1]
    return image_number


def create_graphs(image: np.array, superpixels: np.array)->list:
    """
    Create a list of Graphs where each graph corresponds to a superpixel. Every node in each graph has
    as attributes the RGB values. Each node is connected with at-most 8 neighboors. 
    
    :param image: Image of type 'np.array' having all 3 channels (RGB format).
    :param superpixels: The superpixel regions of type 'np.array'.
    :return image_graphs: A `list` with graphs for each superpixel. Each graph is an instance of the class
    `Graph` within the Grakel library.  
    """
    num_of_superpixels = len(np.unique(superpixels)) # The number of superpixels-graphs 
    image_graphs = []
    for graph_num in range(1,num_of_superpixels+1):
        indices = np.argwhere(superpixels == graph_num)  # Indices of the pixels which are in the specific superpixel
        adj_matrix = np.zeros((len(indices), len(indices)))  # Adjacency matrix
        node_attributes = {}  # Dictionary with attributes for each vertex.
        examined_vertices = []
        vertex_index = 0
        for vector in indices:
            x_coordinate       = vector[0]
            y_coordinate       = vector[1]
            redChannel_value   = image[x_coordinate,y_coordinate,0] # Red channel pixel value 
            greenChannel_value = image[x_coordinate,y_coordinate,1] # Green channel pixel value
            blueChannel_value  = image[x_coordinate,y_coordinate,2] # Blue channel pixel value
                        
            # Update dictionary with attributes for each vertex
            node_attributes[vertex_index] = [redChannel_value, greenChannel_value, blueChannel_value]
            if examined_vertices == []:  # This is the first vertex to be added
                examined_vertices.append(vector)
                vertex_index = vertex_index + 1
            else:
                # Update adjacency matrix
                for index, existing_vertex in enumerate(examined_vertices):
                    if Euclidean_distance(existing_vertex, vector) < 1.45:  # Use a 8-neighbourhood
                        adj_matrix[index, vertex_index] = 1
                        adj_matrix[vertex_index, index] = 1
                examined_vertices.append(vector)
                vertex_index = vertex_index + 1
        G = Graph(adj_matrix, node_labels=node_attributes)
        image_graphs.append(G)
    return image_graphs


def create_graphs_grayscale(image: np.array, superpixels: np.array)->list:
    """
    Create a list of Graphs where each graph corresponds to a superpixel. Every node in each graph has
    as label the grayscale pixel intensity value in the range [0,255]. Each node is connected with at-most 8 neighboors. 
    
    :param image: Image of type 'np.array' having all 3 channels (RGB format).
    :param superpixels: The superpixel regions of type 'np.array'.
    :return image_graphs: A `list` with graphs for each superpixel. Each graph is an instance of the class
    `Graph` within the Grakel library. 
    """
    num_of_superpixels = len(np.unique(superpixels)) # The number of superpixels-graphs 
    image_graphs = []
    for graph_num in range(1,num_of_superpixels+1):
        indices = np.argwhere(superpixels == graph_num)  # Indices of the pixels which are in the specific superpixel
        adj_matrix = np.zeros((len(indices), len(indices)))  # Adjacency matrix
        node_labels = {}  # Dictionary with labels for each vertex.
        examined_vertices = []
        vertex_index = 0
        for vector in indices:
            x_coordinate       = vector[0]
            y_coordinate       = vector[1]
            image_gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
            pixel_intensity    = image_gray[x_coordinate,y_coordinate]
            # Update dictionary with attributes for each vertex
            node_labels[vertex_index] = pixel_intensity
            if examined_vertices == []:  # This is the first vertex to be added
                examined_vertices.append(vector)
                vertex_index = vertex_index + 1
            else:
                # Update adjacency matrix
                for index, existing_vertex in enumerate(examined_vertices):
                    if Euclidean_distance(existing_vertex, vector) < 1.45:  # Use a 8-neighbourhood
                        adj_matrix[index, vertex_index] = 1
                        adj_matrix[vertex_index, index] = 1
                examined_vertices.append(vector)
                vertex_index = vertex_index + 1
        G = Graph(adj_matrix, node_labels=node_labels)
        image_graphs.append(G)
    return image_graphs


def create_graphs_edges(image: np.array, superpixels: np.array)->list:
    """
    Create a list of Graphs where each graph corresponds to a superpixel. Every graph has
    edge weights which correspond to concatinations of pixel intensity values. 
    Each node is connected with at-most 8 neighboors. 
    
    :param image: Image of type 'np.array' having all 3 channels (RGB format).
    :param superpixels: The superpixel regions of type 'np.array'.
    :return image_graphs: A `list` with graphs for each superpixel. 
    """
    image_gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    # Calculate Otsu's threshold
    threshold, _ = cv2.threshold(image_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    num_of_superpixels = len(np.unique(superpixels)) # The number of superpixels-graphs 
    image_graphs = []
    for graph_num in range(1,num_of_superpixels+1):
        indices = np.argwhere(superpixels == graph_num)  # Indices of the pixels which are in the specific superpixel
        adj_matrix = np.zeros((len(indices), len(indices)))  # Adjacency matrix
        node_attributes = {}  # Dictionary with attributes for each vertex.
        edges = {}
        edge_labels = {}
        examined_vertices = []
        vertex_index = 0
        for vector in indices:
            x_coordinate       = vector[0]
            y_coordinate       = vector[1]
            redChannel_value   = image[x_coordinate,y_coordinate,0] # Red channel pixel value 
            greenChannel_value = image[x_coordinate,y_coordinate,1] # Green channel pixel value
            blueChannel_value  = image[x_coordinate,y_coordinate,2] # Blue channel pixel value
            
                        
            # Update dictionary with attributes for each vertex
            node_attributes[vertex_index] = [redChannel_value, greenChannel_value, blueChannel_value]
            if examined_vertices == []:  # This is the first vertex to be added
                examined_vertices.append(vector)
                vertex_index = vertex_index + 1
            else:
                # Update adjacency matrix
                for index, existing_vertex in enumerate(examined_vertices):
                    if Euclidean_distance(existing_vertex, vector) < 1.45:  # Use a 8-neighbourhood
                        RGB_1 = node_attributes[index]
                        RGB_2 = node_attributes[vertex_index]
                        adj_matrix[index, vertex_index] = 1
                        adj_matrix[vertex_index, index] = 1
                        edges[(index, vertex_index)] = 1
                        edges[(vertex_index, index)] = 1
                        edge_labels[(index, vertex_index)] = edge_labels_calc(RGB_1, RGB_2)
                        edge_labels[(vertex_index, index)] = edge_labels_calc(RGB_1, RGB_2)
                examined_vertices.append(vector)
                vertex_index = vertex_index + 1
        G = Graph(edges, edge_labels=edge_labels)
        image_graphs.append(G)
    return image_graphs


def convert_seg_to_boundaries(seg:np.array)->np.array:
    """ Convert segmentation which uses a different number for each segment into boundries which is 
        a numpy array that contains 1 for boundry pixels. 
    
    :param seg: The segmentation in region format of type 'np.array'.
    :return result: The segmentation in boundry format of type `np.array`.
    """
    seg_padded = np.pad(seg, ((1, 0), (1, 0)), mode='constant', constant_values=seg[-1, -1])
    
    dx = cv2.Sobel(seg_padded, cv2.CV_64F, 1, 0, ksize=3)
    dy = cv2.Sobel(seg_padded, cv2.CV_64F, 0, 1, ksize=3)
    boundaries = np.abs(dx) + np.abs(dy)
    
    boundaries = boundaries[:-1, :-1]
    boundaries = cv2.threshold(boundaries, 0, 1, cv2.THRESH_BINARY)[1]
    result = boundaries.astype(np.uint8)

    return result


def evaluate(Predictions: np.array, Human: np.array)->float:
    """ Calculate the F1-score between the predicted boundries and the ground truth. 
    
    :param Predictions: The predicted boundries of type `np.array`.
    :param Human: The human labelled boundries of type `np.array`.
    :return f1score: F1 score which takes values between 0 and 1. Value is rounded to 4 decimal points and the 
    closer the value to 1 the better.
    """
    
    TP = Predictions * Human
    numFP = 0
    numFN = 0
    nrow, ncol = Predictions.shape
    for i in range(nrow):
        for j in range(ncol):
            if (Predictions[i,j] == 1) & (Human[i,j]==0):
                numFP = numFP + 1
            if (Predictions[i,j] == 0) & (Human[i,j]==1): 
                numFN = numFN + 1
    numTP = np.sum(TP)
    f1score = 2 * numTP / (2 * numTP + numFP + numFN)

    return round(f1score, 4)

# Data Transformation - Convert Images to Graphs with RGB Node Attributes

In [None]:
#########################################################################
# Create a list with all the paths for the images in the validation set #
#########################################################################

folder_path_train = "C:/Users/giwrg/Desktop/Master/Modules/Thesis/Reading Material & Data/2- Data/Data Used - 500/images/val"  
# Construct the pattern to match image files
image_pattern = os.path.join(folder_path_train, '*.jpg')  
# Use glob to find all matching image file paths
image_paths = glob.glob(image_pattern)
train_paths = []

# Print the paths for each image
for path in image_paths:
    updated_string = path.rsplit("\\", 1)
    updated_string = "/".join(updated_string)
    train_paths.append(updated_string)
    
#################################################################################
# Create a list of lists containing graphs for each image in the validation set #
#################################################################################

images_to_graphs = []
original_images = []
superpixel_segmentations = []
for path in train_paths[:num_images]:  
    current_image = load_image(path)
    original_images.append(current_image)
    segments = superpixel_SLIC(current_image, 800, 10)
    superpixel_segmentations.append(segments)
    graphs = create_graphs(current_image, segments)
    images_to_graphs.append(graphs)
    


## Load Ground Truth Segmentations of Τest Set 

In [None]:
###############################################################################
#                      Load Ground Truth Segmentations                        #
###############################################################################
# Get dictionary of ground-truth-segmentations
ground_truth_segmentations = load_ground_truth_images()

# Plot Superpixels for first 5 Images 

In [None]:
# Set the figure size
plt.figure(figsize=(25, 25))

# Create comparison plots for the images
for i, image in enumerate(original_images[0:5]):
    # Plot the original image
    plt.subplot(5, 2, 2*i+1)
    plt.imshow(image)
    plt.axis('off')
    plt.title('Original', fontsize=18)

    # Plot the segmented image
    plt.subplot(5, 2, 2*i+2)
    plt.imshow(mark_boundaries(image, superpixel_segmentations[i]))
    plt.axis('off')
    plt.title('Superpixels', fontsize=18)

# Adjust the layout and spacing
plt.tight_layout()

# Calculate Basic Statistics for Graphs 

In [None]:
###############################################################
##### Number of Nodes for Each graph in each Image      #######
###############################################################
node_counts = []
for image in images_to_graphs:
    count_nodes = []
    for graph in image:
        num_nodes = graph.get_adjacency_matrix().shape[0]
        count_nodes.append(num_nodes)
    node_counts.append(count_nodes)

node_counts_freq = node_counts.copy()  # Create a node frequency 

# Calculate the proportion of the image that each graph covers.
for index, path in enumerate(train_paths[:len(node_counts)]):
    current_image = load_image(path)
    n_row, n_col, _ = current_image.shape
    total_vertices = n_row*n_col  # All images have the same number of pixels = 154401
    node_counts_freq[index] = list(map(lambda x: round((x / total_vertices)*100,2), node_counts[index]))
    

###############################################################
##### Number of Edges for Each graph in each Image      #######
###############################################################
edge_counts = []
for image in images_to_graphs:
    count_edges = []
    for graph in image:
        num_edges = graph.get_adjacency_matrix()
        num_edges = np.sum(num_edges == 1)/2
        count_edges.append(num_edges)
    edge_counts.append(count_edges)

# Calculate density for each graph 
graph_density = []  # Α list of lists where each inside list has the density for each subgraph
for i, image in enumerate(edge_counts):
    temp_density = []
    for j, graph in enumerate(image):
        n = node_counts[i][j]
        n = n * (n - 1) / 2
        density = round((graph/n)*100,2)
        temp_density.append(density)
    graph_density.append(temp_density)

# Average Density of Each Graph of Each Image

In [None]:
###############################################################
############# Plot Average Graph Density or Each Image    #####
###############################################################    

# Create box plot for graph densities
sns.set(style='ticks', context='paper')
sns.histplot(data=[sum(sublist) / len(sublist) for sublist in graph_density])
# Add labels and title
plt.xlabel("Average Graph Density")
plt.ylabel("Frequency")
plt.title("Average Density of Graphs for each Image")

# Adjust the plot layout for better presentation
plt.tight_layout()

# Average Graph size for Each Image

In [None]:
#########################################################################
############# Plot Average Graph size for Each Image     ###############
#########################################################################

# Create box plot for graph sizes 
sns.set(style='ticks', context='paper')
sns.histplot(data=[sum(sublist) / len(sublist) for sublist in node_counts])
# Add labels and title
plt.xlabel("Average Graph Size")
plt.ylabel("Frequency")
plt.title("Average Size of Graphs for each Image")

# Adjust the plot layout for better presentation
plt.tight_layout()

# Average Edge Number for Each Image 

In [None]:
#########################################################################
############# Plot Average Edge Number for Each Image     ###############
#########################################################################

# Create box plot for graph sizes 
sns.set(style='ticks', context='paper')
sns.histplot(data=[sum(sublist) / len(sublist) for sublist in edge_counts])
# Add labels and title
plt.xlabel("Average Edge Number")
plt.ylabel("Frequency")
plt.title("Average Edge Number of Graphs for each Image")

# Adjust the plot layout for better presentation
plt.tight_layout()

## Scatterplot of Avg Graph Size vs Avg Graph Density

In [None]:
# The bigger the graphs the more sparse they are.
# This is due to the fact that a pixel can have atmost 8 neighbours 

avg_size = [sum(sublist) / len(sublist) for sublist in node_counts]
avg_density = [sum(sublist) / len(sublist) for sublist in graph_density]

sns.scatterplot(x=avg_size, y=avg_density)
plt.xlabel("Average Graph Size")
plt.ylabel("Average Graph Density")
plt.title("Scatterplot of Avg Graph Size vs Avg Graph Density")

# Calculate Kernel Matrix Using Propagation Kernel

In [None]:
# Uses the propagation kernel to generate the kernel matrices
# For each Image Calculate the Kernel Matrix 
gk = PropagationAttr(normalize=True,  t_max=2, w=0.001)
Kernel_matrix = []
for image in images_to_graphs:
    current_matrix = gk.fit_transform(image)
    # Convert kernel matrix to dissimilarity matrix 
    current_matrix = 1 - current_matrix
    Kernel_matrix.append(current_matrix)

# Perform Clustering Using Propagation Kernel Matrices and Hierrarchical Clustering

In [None]:
# For the number of clusters use the same number of clusters as the ground truth so that the results are comperable
# Get the image number which will be used as key for the groundtruth
predicted_clusters = []
for i in range(0,len(Kernel_matrix)):
    Image_number = get_image_number(train_paths[i])
    k = ground_truth_segmentations[Image_number][1]  # Get the chosen number of regions
    # Perform hierarchical clustering with group average
    clustering = AgglomerativeClustering(n_clusters=k, linkage='average',metric='precomputed')
    labels = clustering.fit_predict(Kernel_matrix[i])
    predicted_clusters.append(labels)

In [None]:
# We need to convert the cluster labels to pixel labels
# Each graph in images_to_graphs is assigned a cluster. However each graph is also connected to 
# several pixels. Therefore we need to assign the graph cluster to their pixels.
image_segmentations = []
for i in range(0,len(Kernel_matrix)):
    current_image = load_image(train_paths[i])
    n_row, n_col, _ = current_image.shape
    image_segmentation = np.zeros((n_row, n_col))  # Final Segmentation
    num_of_superpixels = len(np.unique(superpixel_segmentations[i])) # The number of superpixels-graphs 
    for graph_num in range(1,num_of_superpixels+1):
        indices = np.argwhere(superpixel_segmentations[i] == graph_num)  # Indices of the pixels which are in the specific superpixel
        for pixel in indices:
            x = pixel[0]
            y = pixel[1]
            image_segmentation[x,y] = predicted_clusters[i][graph_num-1]
    
    image_segmentations.append(image_segmentation)

## Plot Results for first 5 Images

In [None]:
# Set the figure size
plt.figure(figsize=(25, 25))

# Create comparison plots for the images
for i, image in enumerate(original_images[0:5]):
    
    # Get the truth also 
    Image_number = get_image_number(train_paths[i])
    human        = ground_truth_segmentations[Image_number][0]
    human        = color.label2rgb(human, image, kind='avg', bg_label=0)
    
    # Plot the original image
    plt.subplot(5, 3, 3*i+1)
    plt.imshow(image)
    plt.axis('off')
    plt.title('Original', fontsize=18)

    # Plot the segmented image
    plt.subplot(5, 3, 3*i+2)
    # Replace each region with Average RGB values
    colored_segmentation =color.label2rgb(image_segmentations[i], image, kind='avg', bg_label=0)
    plt.imshow(colored_segmentation)
    plt.axis('off')
    plt.title('Segmented Image', fontsize=18)
    
    # Plot the truth 
    plt.subplot(5, 3, 3*i+3)
    # Replace each region with Average RGB values
    plt.imshow(human)
    plt.axis('off')
    plt.title('True Segmentation', fontsize=18)

# Adjust the layout and spacing
plt.tight_layout()

# Performance Evaluation - Propagation

In [None]:
F1_scores = []
propagation_segmentations = image_segmentations.copy()
for i in range(0,len(Kernel_matrix)):
    Image_number = get_image_number(train_paths[i])
    ground_truth               = ground_truth_segmentations[Image_number]
    # Get chosen ground truth-segmentation
    ground_truth               = ground_truth[0]

    # Convert segmentations into boundry formats
    ground_truth_boundry = convert_seg_to_boundaries(ground_truth)
    image_segmentation_boundry = convert_seg_to_boundaries(image_segmentations[i])

    # Calculate performance measures 
    F_score = evaluate(image_segmentation_boundry, ground_truth_boundry)
    F1_scores.append(F_score)
    
# Specify the file path and name
csv_file = 'Propagation_test_set.csv'

# Open the CSV file in write mode
with open(csv_file, 'w', newline='') as file:
    writer = csv.writer(file)

    # Write each element of the list as a row in the CSV file
    for item in F1_scores:
        writer.writerow([item])
        
F1_scores_propagation = F1_scores.copy()

# Save the list as a pickle file
with open('propagation_test_results.pkl', 'wb') as file:
    pickle.dump(F1_scores, file)

# Repeat for WL Kernel

# Data Transformation - Convert Images to Graphs with Node Labels

In [None]:
#######################################################################
# Create a list with all the paths for the images in the test set     #
#######################################################################

folder_path_train = "C:/Users/giwrg/Desktop/Master/Modules/Thesis/Reading Material & Data/2- Data/Data Used - 500/images/val"  
# Construct the pattern to match image files
image_pattern = os.path.join(folder_path_train, '*.jpg')  
# Use glob to find all matching image file paths
image_paths = glob.glob(image_pattern)
train_paths = []

# Print the paths for each image
for path in image_paths:
    updated_string = path.rsplit("\\", 1)
    updated_string = "/".join(updated_string)
    train_paths.append(updated_string)
    
###############################################################################
# Create a list of lists containing graphs for each image in the test set     #
###############################################################################

images_to_graphs = []
original_images = []
superpixel_segmentations = []
for path in train_paths[:num_images]:  
    current_image = load_image(path)
    original_images.append(current_image)
    segments = superpixel_SLIC(current_image, 800, 10)
    superpixel_segmentations.append(segments)
    graphs = create_graphs_grayscale(current_image, segments)
    images_to_graphs.append(graphs)

# Calculate Kernel Matrix Using WeisfeilerLehman Kernel

In [None]:
# Uses the WL kernel to generate the kernel matrices
# For each Image Calculate the Kernel Matrix 
gk = WeisfeilerLehman(normalize=False, n_iter=4)
Kernel_matrix = []
for image in images_to_graphs:
    current_matrix = gk.fit_transform(image)
    # Convert kernel matrix to dissimilarity matrix 
    current_matrix =  np.max(current_matrix) - current_matrix
    Kernel_matrix.append(current_matrix)

# Perform Clustering Using WL Kernel Matrices and Hierrarchical Clustering

In [None]:
# For the number of clusters use the same number of clusters as the ground truth so that the results are comperable
# Get the image number which will be used as key for the groundtruth
predicted_clusters = []
for i in range(0,len(Kernel_matrix)):
    Image_number = get_image_number(train_paths[i])
    k = ground_truth_segmentations[Image_number][1]  # Get the chosen number of regions
    # Perform hierarchical clustering with group average
    clustering = AgglomerativeClustering(n_clusters=k, linkage='average',metric='precomputed')
    labels = clustering.fit_predict(Kernel_matrix[i])
    predicted_clusters.append(labels)

In [None]:
# We need to convert the cluster labels to pixel labels
# Each graph in images_to_graphs is assigned a cluster. However each graph is also connected to 
# several pixels. Therefore we need to assign the graph cluster to their pixels.
image_segmentations = []
for i in range(0,len(Kernel_matrix)):
    current_image = load_image(train_paths[i])
    n_row, n_col, _ = current_image.shape
    image_segmentation = np.zeros((n_row, n_col))  # Final Segmentation
    num_of_superpixels = len(np.unique(superpixel_segmentations[i])) # The number of superpixels-graphs 
    for graph_num in range(1,num_of_superpixels+1):
        indices = np.argwhere(superpixel_segmentations[i] == graph_num)  # Indices of the pixels which are in the specific superpixel
        for pixel in indices:
            x = pixel[0]
            y = pixel[1]
            image_segmentation[x,y] = predicted_clusters[i][graph_num-1]
    
    image_segmentations.append(image_segmentation)

## Plot Results for first 5 Images

In [None]:
# Set the figure size
plt.figure(figsize=(25, 25))

# Create comparison plots for the images
for i, image in enumerate(original_images[0:5]):
    
    # Get the truth also 
    Image_number = get_image_number(train_paths[i])
    human        = ground_truth_segmentations[Image_number][0]
    human        = color.label2rgb(human, image, kind='avg', bg_label=0)
    
    # Plot the original image
    plt.subplot(5, 3, 3*i+1)
    plt.imshow(image)
    plt.axis('off')
    plt.title('Original', fontsize=18)

    # Plot the segmented image
    plt.subplot(5, 3, 3*i+2)
    # Replace each region with Average RGB values
    colored_segmentation =color.label2rgb(image_segmentations[i], image, kind='avg', bg_label=0)
    plt.imshow(colored_segmentation)
    plt.axis('off')
    plt.title('Segmented Image', fontsize=18)
    
    # Plot the truth 
    plt.subplot(5, 3, 3*i+3)
    # Replace each region with Average RGB values
    plt.imshow(human)
    plt.axis('off')
    plt.title('True Segmentation', fontsize=18)

# Adjust the layout and spacing
plt.tight_layout()

# Performance Evaluation - WL

In [None]:
F1_scores = []
WL_segmentations = image_segmentations.copy()
for i in range(0,len(Kernel_matrix)):
    Image_number = get_image_number(train_paths[i])
    ground_truth               = ground_truth_segmentations[Image_number]
    # Get chosen ground truth-segmentation
    ground_truth               = ground_truth[0]

    # Convert segmentations into boundry formats
    ground_truth_boundry = convert_seg_to_boundaries(ground_truth)
    image_segmentation_boundry = convert_seg_to_boundaries(image_segmentations[i])

    # Calculate performance measures 
    F_score = evaluate(image_segmentation_boundry, ground_truth_boundry)
    F1_scores.append(F_score)
    
# Specify the file path and name
csv_file = 'WL_test_set.csv'

# Open the CSV file in write mode
with open(csv_file, 'w', newline='') as file:
    writer = csv.writer(file)

    # Write each element of the list as a row in the CSV file
    for item in F1_scores:
        writer.writerow([item])
        
F1_scores_WL = F1_scores.copy()

# Save the list as a pickle file
with open('WL_test_results.pkl', 'wb') as file:
    pickle.dump(F1_scores, file)

# Repeat for Shortest Path Kernel

# Calculate Kernel Matrix Using Shortest Path Kernel

In [None]:
# Uses the SP kernel to generate the kernel matrices
# For each Image Calculate the Kernel Matrix 
gk = ShortestPath(normalize=False)
Kernel_matrix = []
for image in images_to_graphs:
    current_matrix = gk.fit_transform(image)
    # Convert kernel matrix to distance matrix 
    current_matrix =  np.max(current_matrix) - current_matrix
    Kernel_matrix.append(current_matrix)

# Perform Clustering Using Shortest Path Kernel Matrices and Hierrarchical Clustering

In [None]:
# For the number of clusters use the same number of clusters as the ground truth so that the results are comperable
# Get the image number which will be used as key for the groundtruth
predicted_clusters = []
for i in range(0,len(Kernel_matrix)):
    Image_number = get_image_number(train_paths[i])
    k = ground_truth_segmentations[Image_number][1]  # Get the chosen number of regions
    # Perform hierarchical clustering with group average
    clustering = AgglomerativeClustering(n_clusters=k, linkage='average',metric='precomputed')
    labels = clustering.fit_predict(Kernel_matrix[i])
    predicted_clusters.append(labels)

In [None]:
# We need to convert the cluster labels to pixel labels
# Each graph in images_to_graphs[0] is assigned a cluster. However each graph is also connected to 
# several pixels. Therefore we need to assign the graph cluster to their pixels.
image_segmentations = []
for i in range(0,len(Kernel_matrix)):
    current_image = load_image(train_paths[i])
    n_row, n_col, _ = current_image.shape
    image_segmentation = np.zeros((n_row, n_col))  # Final Segmentation
    num_of_superpixels = len(np.unique(superpixel_segmentations[i])) # The number of superpixels-graphs 
    for graph_num in range(1,num_of_superpixels+1):
        indices = np.argwhere(superpixel_segmentations[i] == graph_num)  # Indices of the pixels which are in the specific superpixel
        for pixel in indices:
            x = pixel[0]
            y = pixel[1]
            image_segmentation[x,y] = predicted_clusters[i][graph_num-1]
    
    image_segmentations.append(image_segmentation)

# Plot Results for first 5 Images

In [None]:
# Set the figure size
plt.figure(figsize=(25, 25))

# Create comparison plots for the images
for i, image in enumerate(original_images[0:5]):
    
    # Get the truth also 
    Image_number = get_image_number(train_paths[i])
    human        = ground_truth_segmentations[Image_number][0]
    human        = color.label2rgb(human, image, kind='avg', bg_label=0)
    
    # Plot the original image
    plt.subplot(5, 3, 3*i+1)
    plt.imshow(image)
    plt.axis('off')
    plt.title('Original', fontsize=18)

    # Plot the segmented image
    plt.subplot(5, 3, 3*i+2)
    # Replace each region with Average RGB values
    colored_segmentation =color.label2rgb(image_segmentations[i], image, kind='avg', bg_label=0)
    plt.imshow(colored_segmentation)
    plt.axis('off')
    plt.title('Segmented Image', fontsize=18)
    
    # Plot the truth 
    plt.subplot(5, 3, 3*i+3)
    # Replace each region with Average RGB values
    plt.imshow(human)
    plt.axis('off')
    plt.title('True Segmentation', fontsize=18)

# Adjust the layout and spacing
plt.tight_layout()

# Performance Evaluation - Shortest Path

In [None]:
F1_scores = []
for i in range(0,len(Kernel_matrix)):
    Image_number = get_image_number(train_paths[i])
    ground_truth               = ground_truth_segmentations[Image_number]
    # Get chosen ground truth-segmentation
    ground_truth               = ground_truth[0]

    # Convert segmentations into boundry formats
    ground_truth_boundry = convert_seg_to_boundaries(ground_truth)
    image_segmentation_boundry = convert_seg_to_boundaries(image_segmentations[i])

    # Calculate performance measures 
    F_score = evaluate(image_segmentation_boundry, ground_truth_boundry)
    F1_scores.append(F_score)
    
# Specify the file path and name
csv_file = 'SP_test_set.csv'

# Open the CSV file in write mode
with open(csv_file, 'w', newline='') as file:
    writer = csv.writer(file)

    # Write each element of the list as a row in the CSV file
    for item in F1_scores:
        writer.writerow([item])
        
F1_scores_SP = F1_scores.copy()

# Save the list as a pickle file
with open('SP_test_results.pkl', 'wb') as file:
    pickle.dump(F1_scores, file)

# Repeat for Vertex Histogram Kernel

In [None]:
# Uses the WL kernel to generate the kernel matrices
# For each Image Calculate the Kernel Matrix 
gk = VertexHistogram(normalize=False)
Kernel_matrix = []
for image in images_to_graphs:
    current_matrix = gk.fit_transform(image)
    # Convert kernel matrix to dissimilarity matrix 
    current_matrix =  np.max(current_matrix) - current_matrix
    Kernel_matrix.append(current_matrix)

# Perform Clustering Using Vertex Histogram Kernel Matrices and Hierrarchical Clustering

In [None]:
# For the number of clusters use the same number of clusters as the ground truth so that the results are comperable
# Get the image number which will be used as key for the groundtruth
predicted_clusters = []
for i in range(0,len(Kernel_matrix)):
    Image_number = get_image_number(train_paths[i])
    k = ground_truth_segmentations[Image_number][1]  # Get the chosen number of regions
    # Perform hierarchical clustering with group average
    clustering = AgglomerativeClustering(n_clusters=k, linkage='average',metric='precomputed')
    labels = clustering.fit_predict(Kernel_matrix[i])
    predicted_clusters.append(labels)

In [None]:
# We need to convert the cluster labels to pixel labels
# Each graph in images_to_graphs is assigned a cluster. However each graph is also connected to 
# several pixels. Therefore we need to assign the graph cluster to their pixels.
image_segmentations = []
for i in range(0,len(Kernel_matrix)):
    current_image = load_image(train_paths[i])
    n_row, n_col, _ = current_image.shape
    image_segmentation = np.zeros((n_row, n_col))  # Final Segmentation
    num_of_superpixels = len(np.unique(superpixel_segmentations[i])) # The number of superpixels-graphs 
    for graph_num in range(1,num_of_superpixels+1):
        indices = np.argwhere(superpixel_segmentations[i] == graph_num)  # Indices of the pixels which are in the specific superpixel
        for pixel in indices:
            x = pixel[0]
            y = pixel[1]
            image_segmentation[x,y] = predicted_clusters[i][graph_num-1]
    
    image_segmentations.append(image_segmentation)

# Plot Results for first 5 Images

In [None]:
# Set the figure size
plt.figure(figsize=(25, 25))

# Create comparison plots for the images
for i, image in enumerate(original_images[0:5]):
    
    # Get the truth also 
    Image_number = get_image_number(train_paths[i])
    human        = ground_truth_segmentations[Image_number][0]
    human        = color.label2rgb(human, image, kind='avg', bg_label=0)
    
    # Plot the original image
    plt.subplot(5, 3, 3*i+1)
    plt.imshow(image)
    plt.axis('off')
    plt.title('Original', fontsize=18)

    # Plot the segmented image
    plt.subplot(5, 3, 3*i+2)
    # Replace each region with Average RGB values
    colored_segmentation =color.label2rgb(image_segmentations[i], image, kind='avg', bg_label=0)
    plt.imshow(colored_segmentation)
    plt.axis('off')
    plt.title('Segmented Image', fontsize=18)
    
    # Plot the truth 
    plt.subplot(5, 3, 3*i+3)
    # Replace each region with Average RGB values
    plt.imshow(human)
    plt.axis('off')
    plt.title('True Segmentation', fontsize=18)

# Adjust the layout and spacing
plt.tight_layout()

# Performance Evaluation - Vertex Histogram Kernel

In [None]:
F1_scores = []
for i in range(0,len(Kernel_matrix)):
    Image_number = get_image_number(train_paths[i])
    ground_truth               = ground_truth_segmentations[Image_number]
    # Get chosen ground truth-segmentation
    ground_truth               = ground_truth[0]

    # Convert segmentations into boundry formats
    ground_truth_boundry = convert_seg_to_boundaries(ground_truth)
    image_segmentation_boundry = convert_seg_to_boundaries(image_segmentations[i])

    # Calculate performance measures 
    F_score = evaluate(image_segmentation_boundry, ground_truth_boundry)
    F1_scores.append(F_score)
    
# Specify the file path and name
csv_file = 'vertex_histogram_test_set.csv'

# Open the CSV file in write mode
with open(csv_file, 'w', newline='') as file:
    writer = csv.writer(file)

    # Write each element of the list as a row in the CSV file
    for item in F1_scores:
        writer.writerow([item])
        
F1_scores_vertex = F1_scores.copy()

# Save the list as a pickle file
with open('vertex_test_results.pkl', 'wb') as file:
    pickle.dump(F1_scores, file)

# Repeat for Edge Histogram Kernel

# Data Transformation - Convert Images to Graphs with Edge Labels

In [None]:
#######################################################################
# Create a list with all the paths for the images in the test set     #
#######################################################################

folder_path_train = "C:/Users/giwrg/Desktop/Master/Modules/Thesis/Reading Material & Data/2- Data/Data Used - 500/images/val"  
# Construct the pattern to match image files
image_pattern = os.path.join(folder_path_train, '*.jpg')  
# Use glob to find all matching image file paths
image_paths = glob.glob(image_pattern)
train_paths = []

# Print the paths for each image
for path in image_paths:
    updated_string = path.rsplit("\\", 1)
    updated_string = "/".join(updated_string)
    train_paths.append(updated_string)
    
###############################################################################
# Create a list of lists containing graphs for each image in the test set #
###############################################################################

images_to_graphs = []
original_images = []
superpixel_segmentations = []
for path in train_paths[:num_images]:  
    current_image = load_image(path)
    original_images.append(current_image)
    segments = superpixel_SLIC(current_image, 800, 10)
    superpixel_segmentations.append(segments)
    graphs = create_graphs_edges(current_image, segments)
    images_to_graphs.append(graphs)

# Calculate Kernel Matrix Using Edge Ηistogram Kernel

In [None]:
# Uses the Edge histogram kernel to generate the kernel matrices
# For each Image Calculate the Kernel Matrix 
gk = EdgeHistogram(normalize=False)
Kernel_matrix = []
for image in images_to_graphs:
    current_matrix = gk.fit_transform(image)
    # Convert kernel matrix to distance matrix 
    current_matrix =  np.max(current_matrix) - current_matrix
    Kernel_matrix.append(current_matrix)

# Perform Clustering Using Edge Histogram Kernel Matrices and Hierrarchical Clustering

In [None]:
# For the number of clusters use the same number of clusters as the ground truth so that the results are comperable
# Get the image number which will be used as key for the groundtruth
predicted_clusters = []
for i in range(0,len(Kernel_matrix)):
    Image_number = get_image_number(train_paths[i])
    k = ground_truth_segmentations[Image_number][1]  # Get the chosen number of regions
    # Perform hierarchical clustering with group average
    clustering = AgglomerativeClustering(n_clusters=k, linkage='average',metric='precomputed')
    labels = clustering.fit_predict(Kernel_matrix[i])
    predicted_clusters.append(labels)

In [None]:
# We need to convert the cluster labels to pixel labels
# Each graph in images_to_graphs is assigned a cluster. However each graph is also connected to 
# several pixels. Therefore we need to assign the graph cluster to their pixels.
image_segmentations = []
for i in range(0,len(Kernel_matrix)):
    current_image = load_image(train_paths[i])
    n_row, n_col, _ = current_image.shape
    image_segmentation = np.zeros((n_row, n_col))  # Final Segmentation
    num_of_superpixels = len(np.unique(superpixel_segmentations[i])) # The number of superpixels-graphs 
    for graph_num in range(1,num_of_superpixels+1):
        indices = np.argwhere(superpixel_segmentations[i] == graph_num)  # Indices of the pixels which are in the specific superpixel
        for pixel in indices:
            x = pixel[0]
            y = pixel[1]
            image_segmentation[x,y] = predicted_clusters[i][graph_num-1]
    
    image_segmentations.append(image_segmentation)

# Plot Results for first 5 Images

In [None]:
# Set the figure size
plt.figure(figsize=(25, 25))

# Create comparison plots for the images
for i, image in enumerate(original_images[0:5]):
    
    # Get the truth also 
    Image_number = get_image_number(train_paths[i])
    human        = ground_truth_segmentations[Image_number][0]
    human        = color.label2rgb(human, image, kind='avg', bg_label=0)
    
    # Plot the original image
    plt.subplot(5, 3, 3*i+1)
    plt.imshow(image)
    plt.axis('off')
    plt.title('Original', fontsize=18)

    # Plot the segmented image
    plt.subplot(5, 3, 3*i+2)
    # Replace each region with Average RGB values
    colored_segmentation =color.label2rgb(image_segmentations[i], image, kind='avg', bg_label=0)
    plt.imshow(colored_segmentation)
    plt.axis('off')
    plt.title('Segmented Image', fontsize=18)
    
    # Plot the truth 
    plt.subplot(5, 3, 3*i+3)
    # Replace each region with Average RGB values
    plt.imshow(human)
    plt.axis('off')
    plt.title('True Segmentation', fontsize=18)

# Adjust the layout and spacing
plt.tight_layout()

# Performance Evaluation - Edge Histogram Kernel

In [None]:
F1_scores = []
edge_segmentations = image_segmentations.copy()
for i in range(0,len(Kernel_matrix)):
    Image_number = get_image_number(train_paths[i])
    ground_truth               = ground_truth_segmentations[Image_number]
    # Get chosen ground truth-segmentation
    ground_truth               = ground_truth[0]

    # Convert segmentations into boundry formats
    ground_truth_boundry = convert_seg_to_boundaries(ground_truth)
    image_segmentation_boundry = convert_seg_to_boundaries(image_segmentations[i])

    # Calculate performance measures 
    F_score = evaluate(image_segmentation_boundry, ground_truth_boundry)
    F1_scores.append(F_score)
    
# Specify the file path and name
csv_file = 'edge_histogram_test_set.csv'

# Open the CSV file in write mode
with open(csv_file, 'w', newline='') as file:
    writer = csv.writer(file)

    # Write each element of the list as a row in the CSV file
    for item in F1_scores:
        writer.writerow([item])
        
F1_scores_edge = F1_scores.copy()

# Save the list as a pickle file
with open('edge_test_results.pkl', 'wb') as file:
    pickle.dump(F1_scores, file)

# Compare Results Between Different Kernels

In [None]:
# F1_scores_propagation
# F1_scores_WL
# F1_scores_SP
# F1_scores_vertex
# F1_scores_edge

# Load the pickle file with the results
with open('propagation_test_results.pkl', 'rb') as file:
    F1_scores_propagation = pickle.load(file)

# Load the pickle file with the results
with open('WL_test_results.pkl', 'rb') as file:
    F1_scores_WL = pickle.load(file)
    
# Load the pickle file with the results
with open('SP_test_results.pkl', 'rb') as file:
    F1_scores_SP = pickle.load(file)

# Load the pickle file with the results
with open('vertex_test_results.pkl', 'rb') as file:
    F1_scores_vertex = pickle.load(file)
    
# Load the pickle file with the results
with open('edge_test_results.pkl', 'rb') as file:
    F1_scores_edge = pickle.load(file)


F1_scores = []
F1_scores.append(F1_scores_propagation)
F1_scores.append(F1_scores_WL)
F1_scores.append(F1_scores_SP)
F1_scores.append(F1_scores_vertex)
F1_scores.append(F1_scores_edge)

labels = ['Propagation', 'WL', 'SP', 'Vertex Histogram', 'Edge Histogram']

# Increase figure size
plt.figure(figsize=(10, 6))
sns.set(style='ticks', context='paper')
# Plot the results
ax = sns.boxplot(data=F1_scores)
ax.set_xticklabels(labels, rotation=90)
plt.title("F1 scores for different Graph Kernels")


# Adding labels and legend
plt.xlabel('Graph Kernel')
plt.ylabel('F1 score')

# Calculate the average values
averages = [np.mean(category) for category in F1_scores]

# Add average value notations to the top of each box plot
for i, average in enumerate(averages):
    ax.text(i, np.max(F1_scores[i]) + 0.005 , f'Avg: {average:.2f}', ha='center', va='bottom', fontweight='bold')

plt.tight_layout()

# Although Propagation kernel achieves way better performance it can undeperform in some cases. 
# Based on what we saw from the hyperparameter tuning, it is sensitive to the choice of the parameters.
# On the other hand we see that kernels that utilize node labels are more stable but tend to underperform.
# Also we can see that the inclusion of structural information of the graphs does not improve the result in the case 
# οf node labelled images.

# Perform Wilcoxon signed-rank test

In [None]:
# Apply Wilcoxon_signed-rank
# Sources used: 
# https://en.wikipedia.org/wiki/Wilcoxon_signed-rank_test
# https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.wilcoxon.html
# https://www.statisticssolutions.com/free-resources/directory-of-statistical-analyses/how-to-conduct-the-wilcox-sign-test/#:~:text=The%20next%20step%20of%20the,where%20the%20difference%20is%20zero.

# We are going to apply this test to see whether there is a difference in the median values 
# We are going to compare the median value of the F1 scores using the Propagation algorithm with the median F1 score
# of WL kernel.

d = [x - y for x, y in zip(F1_scores_propagation, F1_scores_WL)]  
res = wilcoxon(d, alternative='greater')
res.statistic, res.pvalue

# This shows that the null hypothesis that the median of the difference is zero can be rejected at a confidence level of 5% 
# in favor of the alternative that the median is greater than zero.
# Therefore based on the sample of the 100 images we have enough statistical evidence to say that the 
# Propagation kernel performs better than the WL.
# Of course usage of RGB values play an important role in this result as we use more information for measuring the similarity 
# between pixeils. Also propagation kernel takes advantange of the distance of the RGB values.


## Comparison Plots of Performance

In [None]:
# Sort the y-values in increasing order and get the corresponding sorted indices
sorted_indices = sorted(range(len(F1_scores_propagation)), key=lambda k: F1_scores_propagation[k])
sorted_propagation = [F1_scores_propagation[i] for i in sorted_indices]

sorted_indices = sorted(range(len(F1_scores_WL)), key=lambda k: F1_scores_WL[k])
sorted_WL = [F1_scores_WL[i] for i in sorted_indices]

sorted_indices = sorted(range(len(F1_scores_SP)), key=lambda k: F1_scores_SP[k])
sorted_SP = [F1_scores_SP[i] for i in sorted_indices]

sorted_indices = sorted(range(len(F1_scores_vertex)), key=lambda k: F1_scores_vertex[k])
sorted_vertex = [F1_scores_vertex[i] for i in sorted_indices]

sorted_indices = sorted(range(len(F1_scores_edge)), key=lambda k: F1_scores_edge[k])
sorted_edge = [F1_scores_edge[i] for i in sorted_indices]


# Plot the line plot using seaborn
sns.lineplot(x=range(len(F1_scores_propagation))    , y=sorted_propagation, marker='o',  label='Propagation')
sns.lineplot(x=range(len(F1_scores_WL))             , y=sorted_WL, marker='x',  label='WL')
sns.lineplot(x=range(len(F1_scores_SP))             , y=sorted_SP, marker='s',  label='SP')
sns.lineplot(x=range(len(F1_scores_vertex))         , y=sorted_vertex, marker='^',  label='Vertex')
sns.lineplot(x=range(len(F1_scores_edge))           , y=sorted_edge, marker='d',  label='Edge')


# Place the legend outside the plot
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))

plt.title("Comparison of performance among different kernels")
plt.xlabel("Image Rank")
plt.ylabel("F1 score")


#####################################################################################################
#####################################   Barplot #####################################################
#####################################################################################################

F1_scores_flattened = []
for x in  F1_scores:
    for y in x:
        F1_scores_flattened.append(y)
        
labels = ['Propagation']*100 +  ['WL']*100 + ['SP']*100 + ['Vertex Histogram']*100 + ['Edge Histogram']*100 

# Create a dictionary to map labels to colors
color_dict = {'Propagation': 'blue', 'WL': 'green', 'SP': 'orange', 'Vertex Histogram': 'red', 'Edge Histogram': 'black' }

# Set the color palette based on the unique labels
colors = [color_dict[label] for label in set(labels)]

# Binning parameters
num_bins = 4

# Initialize the histogram dictionary for each label
histograms = {label: np.zeros(num_bins) for label in set(labels)}

# Calculate the histogram for each label
for label in set(labels):
    label_scores = [score for score, lbl in zip(F1_scores_flattened, labels) if lbl == label]
    histograms[label], _ = np.histogram(label_scores, bins=[0, 0.15, 0.3, 0.45, 0.80])

# Plot the grouped histogram
fig, ax = plt.subplots()
width = 0.8 / len(set(labels))
ax.bar(np.arange(num_bins) + 1 * width, histograms['Propagation'], width=width, color=color_dict['Propagation'], label='Propagation')
ax.bar(np.arange(num_bins) + 2 * width, histograms['WL'], width=width, color=color_dict['WL'], label='WL')
ax.bar(np.arange(num_bins) + 3 * width, histograms['SP'], width=width, color=color_dict['SP'], label='SP')
ax.bar(np.arange(num_bins) + 4 * width, histograms['Vertex Histogram'], width=width, color=color_dict['Vertex Histogram'], label='Vertex Histogram')
ax.bar(np.arange(num_bins) + 5 * width, histograms['Edge Histogram'], width=width, color=color_dict['Edge Histogram'], label='Edge Histogram')


 # Set the x-tick labels to display the bin limits
bin_edges = np.array([0, 0.15, 0.3, 0.45, 0.80])
bin_labels = [f'{bin_edges[i]:.2f}-{bin_edges[i+1]:.2f}' for i in range(num_bins)]
plt.xticks(np.arange(num_bins) + width * (len(set(labels)) - 1) / 2, bin_labels, rotation=45)

# # Display the legend
# Place the legend outside the plot
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    
    
# # Set labels and title
plt.xlabel('F1 Score')
plt.ylabel('Frequency')
plt.title('Comparison of performance among different kernels')


# Plot 10 top performing for Propagation vs WL 

In [None]:
Best_indexes = list(np.argsort(F1_scores_propagation)[-10:])

# Set the figure size
plt.figure(figsize=(25, 25))

# Create comparison plots for the images
fig, axs = plt.subplots(nrows=len(Best_indexes), ncols=4, figsize=(15, 5*len(Best_indexes)))

for i, image_index in enumerate(Best_indexes):
    # Load Original Image
    image = original_images[image_index]
    
    # Convert Segmentation into average RGB values 
    colored_segmentation_propagation =color.label2rgb(propagation_segmentations[image_index], image, kind='avg', bg_label=0)
    colored_segmentation_WL =color.label2rgb(WL_segmentations[image_index], image, kind='avg', bg_label=0)

    # Convert Ground Truth to average RGB values
    Image_number = get_image_number(train_paths[image_index])
    human        = ground_truth_segmentations[Image_number][0]
    human        = color.label2rgb(human, image, kind='avg', bg_label=0)
    
    

    # Plot the original image
    axs[i, 0].imshow(image)
    axs[i, 0].axis('off')
    axs[i, 0].set_title('Original', fontsize=18)

    # Plot the propagation segmentation image
    axs[i, 1].imshow(colored_segmentation_propagation)
    axs[i, 1].axis('off')
    axs[i, 1].set_title('Propagation Kernel', fontsize=18)
    
    # Plot the WL segmentation image
    axs[i, 2].imshow(colored_segmentation_WL)
    axs[i, 2].axis('off')
    axs[i, 2].set_title('WL Kernel', fontsize=18)
    
    # Plot the human segmentation 
    axs[i, 3].imshow(human)
    axs[i, 3].axis('off')
    axs[i, 3].set_title('Human', fontsize=18)
    

plt.tight_layout()

# Combining Graph Kernels - Average of Kernels

# Average of WL - PG

In [None]:
#############################################################################
# Create a list with all the paths for the images in the validation set     #
#############################################################################

folder_path_train = "C:/Users/giwrg/Desktop/Master/Modules/Thesis/Reading Material & Data/2- Data/Data Used - 500/images/val"  
# Construct the pattern to match image files
image_pattern = os.path.join(folder_path_train, '*.jpg')  
# Use glob to find all matching image file paths
image_paths = glob.glob(image_pattern)
train_paths = []

# Print the paths for each image
for path in image_paths:
    updated_string = path.rsplit("\\", 1)
    updated_string = "/".join(updated_string)
    train_paths.append(updated_string)
    
#################################################################################
# Create a list of lists containing graphs for each image in the validation set #
#################################################################################

images_to_graphs_attributes = []
images_to_graphs_labels = []
original_images = []
superpixel_segmentations = []
for path in train_paths[:num_images]:  
    current_image = load_image(path)
    original_images.append(current_image)
    segments = superpixel_SLIC(current_image, 800, 10)
    superpixel_segmentations.append(segments)
    graphs = create_graphs(current_image, segments)
    graphs2 = create_graphs_grayscale(current_image, segments)
    images_to_graphs_attributes.append(graphs)
    images_to_graphs_labels.append(graphs2)



###############################################################################
#                      Load Ground Truth Segmentations                        #
###############################################################################
ground_truth_segmentations = load_ground_truth_images()


# Uses the propagation kernel to generate the kernel matrices
# For each Image Calculate the Kernel Matrix 
gk = PropagationAttr(normalize=True,  t_max=2, w=0.001)
wl = WeisfeilerLehman(normalize=True, n_iter=4)
Kernel_matrix = []
for index in range(0,len(images_to_graphs_attributes)):
    current_matrix1 = gk.fit_transform(images_to_graphs_attributes[index])
    current_matrix2 = wl.fit_transform(images_to_graphs_labels[index])
    # Take the average of the 2 kernel matrices
    current_matrix = 0.5*current_matrix1 + 0.5*current_matrix2
    # Convert kernel matrix to dissimilarity matrix 
    current_matrix = 1-current_matrix
    Kernel_matrix.append(current_matrix)


# For the number of clusters use the same number of clusters as the ground truth so that the results are comperable
# Get the image number which will be used as key for the groundtruth
predicted_clusters = []
for i in range(0,len(Kernel_matrix)):
    Image_number = get_image_number(train_paths[i])
    k = ground_truth_segmentations[Image_number][1]  # Get the chosen number of regions
    # Perform hierarchical clustering with group average
    clustering = AgglomerativeClustering(n_clusters=k, linkage='average',metric='precomputed')
    labels = clustering.fit_predict(Kernel_matrix[i])
    predicted_clusters.append(labels)


# We need to convert the cluster labels to pixel labels
# Each graph in images_to_graphs is assigned a cluster. However each graph is also connected to 
# several pixels. Therefore we need to assign the graph cluster to their pixels.
image_segmentations = []
for i in range(0,len(Kernel_matrix)):
    current_image = load_image(train_paths[i])
    n_row, n_col, _ = current_image.shape
    image_segmentation = np.zeros((n_row, n_col))  # Final Segmentation
    num_of_superpixels = len(np.unique(superpixel_segmentations[i])) # The number of superpixels-graphs 
    for graph_num in range(1,num_of_superpixels+1):
        indices = np.argwhere(superpixel_segmentations[i] == graph_num)  # Indices of the pixels which are in the specific superpixel
        for pixel in indices:
            x = pixel[0]
            y = pixel[1]
            image_segmentation[x,y] = predicted_clusters[i][graph_num-1]
    
    image_segmentations.append(image_segmentation)

segmentations_PG_WL = image_segmentations.copy()  # Store the segmentations for visualasation purposes

# Set the figure size
plt.figure(figsize=(25, 25))

# Create comparison plots for the images
for i, image in enumerate(original_images[0:5]):
    # Plot the original image
    plt.subplot(5, 2, 2*i+1)
    plt.imshow(image)
    plt.axis('off')
    plt.title('Original', fontsize=18)

    # Plot the segmented image
    plt.subplot(5, 2, 2*i+2)
    plt.imshow(image_segmentations[i])
    plt.axis('off')
    plt.title('Segmented Image', fontsize=18)

# Adjust the layout and spacing
plt.tight_layout()


F1_scores = []
for i in range(0,len(Kernel_matrix)):
    Image_number = get_image_number(train_paths[i])
    ground_truth               = ground_truth_segmentations[Image_number]
    # Get chosen ground truth-segmentation
    ground_truth               = ground_truth[0]

    # Convert segmentations into boundry formats
    ground_truth_boundry = convert_seg_to_boundaries(ground_truth)
    image_segmentation_boundry = convert_seg_to_boundaries(image_segmentations[i])

    # Calculate performance measures 
    F_score = evaluate(image_segmentation_boundry, ground_truth_boundry)
    F1_scores.append(F_score)
    
# Specify the file path and name
csv_file = 'PG_WL_test_set.csv'

# Open the CSV file in write mode
with open(csv_file, 'w', newline='') as file:
    writer = csv.writer(file)

    # Write each element of the list as a row in the CSV file
    for item in F1_scores:
        writer.writerow([item])
        
F1_scores_PG_WL = F1_scores.copy()

# Save the list as a pickle file
with open('PG_WL_results.pkl', 'wb') as file:
    pickle.dump(F1_scores, file)


# Average of PG - Shortest Path 

In [None]:
###########################################################################
# Create a list with all the paths for the images in the validation set ###
###########################################################################

folder_path_train = "C:/Users/giwrg/Desktop/Master/Modules/Thesis/Reading Material & Data/2- Data/Data Used - 500/images/val"  
# Construct the pattern to match image files
image_pattern = os.path.join(folder_path_train, '*.jpg')  
# Use glob to find all matching image file paths
image_paths = glob.glob(image_pattern)
train_paths = []

# Print the paths for each image
for path in image_paths:
    updated_string = path.rsplit("\\", 1)
    updated_string = "/".join(updated_string)
    train_paths.append(updated_string)
    
#################################################################################
# Create a list of lists containing graphs for each image in the validation set #
#################################################################################

images_to_graphs_attributes = []
images_to_graphs_labels = []
original_images = []
superpixel_segmentations = []
for path in train_paths[:num_images]:  
    current_image = load_image(path)
    original_images.append(current_image)
    segments = superpixel_SLIC(current_image, 800, 10)
    superpixel_segmentations.append(segments)
    graphs = create_graphs(current_image, segments)
    graphs2 = create_graphs_grayscale(current_image, segments)
    images_to_graphs_attributes.append(graphs)
    images_to_graphs_labels.append(graphs2)



###############################################################################
#                      Load Ground Truth Segmentations                        #
###############################################################################
# Get dictionary of ground-truth-segmentations
ground_truth_segmentations = load_ground_truth_images()


# Uses the propagation kernel to generate the kernel matrices
# For each Image Calculate the Kernel Matrix 
gk = PropagationAttr(normalize=True,  t_max=2, w=0.001)
sp = ShortestPath(normalize=True)
Kernel_matrix = []
for index in range(0,len(images_to_graphs_attributes)):
    current_matrix1 = gk.fit_transform(images_to_graphs_attributes[index])
    current_matrix2 = sp.fit_transform(images_to_graphs_labels[index])
    current_matrix = 0.5*current_matrix1 + 0.5*current_matrix2
    # Convert kernel matrix to dissimilarity matrix 
    current_matrix = 1-current_matrix
    Kernel_matrix.append(current_matrix)


# For the number of clusters use the same number of clusters as the ground truth so that the results are comperable
# Get the image number which will be used as key for the groundtruth
predicted_clusters = []
for i in range(0,len(Kernel_matrix)):
    Image_number = get_image_number(train_paths[i])
    k = ground_truth_segmentations[Image_number][1]  # Get the chosen number of regions
    # Perform hierarchical clustering with group average
    clustering = AgglomerativeClustering(n_clusters=k, linkage='average',metric='precomputed')
    labels = clustering.fit_predict(Kernel_matrix[i])
    predicted_clusters.append(labels)


# We need to convert the cluster labels to pixel labels
# Each graph in images_to_graphs is assigned a cluster. However each graph is also connected to 
# several pixels. Therefore we need to assign the graph cluster to their pixels.
image_segmentations = []
for i in range(0,len(Kernel_matrix)):
    current_image = load_image(train_paths[i])
    n_row, n_col, _ = current_image.shape
    image_segmentation = np.zeros((n_row, n_col))  # Final Segmentation
    num_of_superpixels = len(np.unique(superpixel_segmentations[i])) # The number of superpixels-graphs 
    for graph_num in range(1,num_of_superpixels+1):
        indices = np.argwhere(superpixel_segmentations[i] == graph_num)  # Indices of the pixels which are in the specific superpixel
        for pixel in indices:
            x = pixel[0]
            y = pixel[1]
            image_segmentation[x,y] = predicted_clusters[i][graph_num-1]
    
    image_segmentations.append(image_segmentation)

segmentations_PG_SP = image_segmentations.copy()  # Store the segmentations for visualasation purposes

# Set the figure size
plt.figure(figsize=(25, 25))

# Create comparison plots for the images
for i, image in enumerate(original_images[0:5]):
    # Plot the original image
    plt.subplot(5, 2, 2*i+1)
    plt.imshow(image)
    plt.axis('off')
    plt.title('Original', fontsize=18)

    # Plot the segmented image
    plt.subplot(5, 2, 2*i+2)
    plt.imshow(image_segmentations[i])
    plt.axis('off')
    plt.title('Segmented Image', fontsize=18)

# Adjust the layout and spacing
plt.tight_layout()


F1_scores = []
for i in range(0,len(Kernel_matrix)):
    Image_number = get_image_number(train_paths[i])
    ground_truth               = ground_truth_segmentations[Image_number]
    # Get chosen ground truth-segmentation
    ground_truth               = ground_truth[0]

    # Convert segmentations into boundry formats
    ground_truth_boundry = convert_seg_to_boundaries(ground_truth)
    image_segmentation_boundry = convert_seg_to_boundaries(image_segmentations[i])

    # Calculate performance measures 
    F_score = evaluate(image_segmentation_boundry, ground_truth_boundry)
    F1_scores.append(F_score)
    
# Specify the file path and name
csv_file = 'PG_SP_test_set.csv'

# Open the CSV file in write mode
with open(csv_file, 'w', newline='') as file:
    writer = csv.writer(file)

    # Write each element of the list as a row in the CSV file
    for item in F1_scores:
        writer.writerow([item])
        
F1_scores_PG_SP = F1_scores.copy()

# Save the list as a pickle file
with open('PG_SP_results.pkl', 'wb') as file:
    pickle.dump(F1_scores, file)

# Average of PG - Node Histogram

In [None]:
#######################################################################
# Create a list with all the paths for the images in the test set     #
#######################################################################

folder_path_train = "C:/Users/giwrg/Desktop/Master/Modules/Thesis/Reading Material & Data/2- Data/Data Used - 500/images/val"  
# Construct the pattern to match image files
image_pattern = os.path.join(folder_path_train, '*.jpg')  
# Use glob to find all matching image file paths
image_paths = glob.glob(image_pattern)
train_paths = []

# Print the paths for each image
for path in image_paths:
    updated_string = path.rsplit("\\", 1)
    updated_string = "/".join(updated_string)
    train_paths.append(updated_string)
    
#################################################################################
# Create a list of lists containing graphs for each image in the validation set #
#################################################################################

images_to_graphs_attributes = []
images_to_graphs_labels = []
original_images = []
superpixel_segmentations = []
for path in train_paths[:num_images]:  
    current_image = load_image(path)
    original_images.append(current_image)
    segments = superpixel_SLIC(current_image, 800, 10)
    superpixel_segmentations.append(segments)
    graphs = create_graphs(current_image, segments)
    graphs2 = create_graphs_grayscale(current_image, segments)
    images_to_graphs_attributes.append(graphs)
    images_to_graphs_labels.append(graphs2)



###############################################################################
#                      Load Ground Truth Segmentations                        #
###############################################################################
# Get dictionary of ground-truth-segmentations
ground_truth_segmentations = load_ground_truth_images()


# Uses the propagation kernel to generate the kernel matrices
# For each Image Calculate the Kernel Matrix 
gk = PropagationAttr(normalize=True,  t_max=2, w=0.001)
vh = VertexHistogram(normalize=True)
Kernel_matrix = []
for index in range(0,len(images_to_graphs_attributes)):
    current_matrix1 = gk.fit_transform(images_to_graphs_attributes[index])
    current_matrix2 = vh.fit_transform(images_to_graphs_labels[index])
    current_matrix = 0.5*current_matrix1 + 0.5*current_matrix2
    # Convert kernel matrix to dissimilarity matrix 
    current_matrix = 1-current_matrix
    Kernel_matrix.append(current_matrix)


# For the number of clusters use the same number of clusters as the ground truth so that the results are comperable
# Get the image number which will be used as key for the groundtruth
predicted_clusters = []
for i in range(0,len(Kernel_matrix)):
    Image_number = get_image_number(train_paths[i])
    k = ground_truth_segmentations[Image_number][1]  # Get the chosen number of regions
    # Perform hierarchical clustering with group average
    clustering = AgglomerativeClustering(n_clusters=k, linkage='average',metric='precomputed')
    labels = clustering.fit_predict(Kernel_matrix[i])
    predicted_clusters.append(labels)


# We need to convert the cluster labels to pixel labels
# Each graph in images_to_graphs[0] is assigned a cluster. However each graph is also connected to 
# several pixels. Therefore we need to assign the graph cluster to their pixels.
image_segmentations = []
for i in range(0,len(Kernel_matrix)):
    current_image = load_image(train_paths[i])
    n_row, n_col, _ = current_image.shape
    image_segmentation = np.zeros((n_row, n_col))  # Final Segmentation
    num_of_superpixels = len(np.unique(superpixel_segmentations[i])) # The number of superpixels-graphs 
    for graph_num in range(1,num_of_superpixels+1):
        indices = np.argwhere(superpixel_segmentations[i] == graph_num)  # Indices of the pixels which are in the specific superpixel
        for pixel in indices:
            x = pixel[0]
            y = pixel[1]
            image_segmentation[x,y] = predicted_clusters[i][graph_num-1]
    
    image_segmentations.append(image_segmentation)

segmentations_PG_VH = image_segmentations.copy()  # Store the segmentations for visualasation purposes

# Set the figure size
plt.figure(figsize=(25, 25))

# Create comparison plots for the images
for i, image in enumerate(original_images[0:5]):
    # Plot the original image
    plt.subplot(5, 2, 2*i+1)
    plt.imshow(image)
    plt.axis('off')
    plt.title('Original', fontsize=18)

    # Plot the segmented image
    plt.subplot(5, 2, 2*i+2)
    plt.imshow(image_segmentations[i])
    plt.axis('off')
    plt.title('Segmented Image', fontsize=18)

# Adjust the layout and spacing
plt.tight_layout()


F1_scores = []
for i in range(0,len(Kernel_matrix)):
    Image_number = get_image_number(train_paths[i])
    ground_truth               = ground_truth_segmentations[Image_number]
    # Get chosen ground truth-segmentation
    ground_truth               = ground_truth[0]

    # Convert segmentations into boundry formats
    ground_truth_boundry = convert_seg_to_boundaries(ground_truth)
    image_segmentation_boundry = convert_seg_to_boundaries(image_segmentations[i])

    # Calculate performance measures 
    F_score = evaluate(image_segmentation_boundry, ground_truth_boundry)
    F1_scores.append(F_score)
    
# Specify the file path and name
csv_file = 'PG_VH_test_set.csv'

# Open the CSV file in write mode
with open(csv_file, 'w', newline='') as file:
    writer = csv.writer(file)

    # Write each element of the list as a row in the CSV file
    for item in F1_scores:
        writer.writerow([item])
        
F1_scores_PG_VH = F1_scores.copy()

# Save the list as a pickle file
with open('PG_VH_results.pkl', 'wb') as file:
    pickle.dump(F1_scores, file)

# Compare Results Between Propagation Kernel and Combinations 

In [None]:
# F1_scores_propagation
# F1_scores_PG_WL
# F1_scores_PG_SP
# F1_scores_PG_VH


# Load the pickle file with the results
with open('propagation_test_results.pkl', 'rb') as file:
    F1_scores_propagation = pickle.load(file)

# Load the pickle file with the results
with open('PG_WL_results.pkl', 'rb') as file:
    F1_scores_PG_WL = pickle.load(file)
    

# Load the pickle file with the results
with open('PG_SP_results.pkl', 'rb') as file:
    F1_scores_PG_SP = pickle.load(file)    


# Load the pickle file with the results
with open('PG_VH_results.pkl', 'rb') as file:
    F1_scores_PG_VH = pickle.load(file)    
    

F1_scores = []
F1_scores.append(F1_scores_propagation)
F1_scores.append(F1_scores_PG_WL)
F1_scores.append(F1_scores_PG_SP)
F1_scores.append(F1_scores_PG_VH)


labels = ['Propagation', 'Propagation-WL', 'Propagation-Shortest Path', 'Propagation-Vertex Histogram']

# Increase figure size
plt.figure(figsize=(10, 6))
sns.set(style='ticks', context='paper')
# Plot the results
ax = sns.boxplot(data=F1_scores)
ax.set_xticklabels(labels, rotation=90)
plt.title("F1 scores for different Graph Kernels")


# Adding labels and legend
plt.xlabel('Graph Kernel')
plt.ylabel('F1 score')

# Calculate the average values
averages = [np.mean(category) for category in F1_scores]

# Add average value notations to the top of each box plot
for i, average in enumerate(averages):
    ax.text(i, np.max(F1_scores[i]) + 0.005 , f'Avg: {average:.2f}', ha='center', va='bottom', fontweight='bold')

plt.tight_layout()

# Although Propagation kernel achieves way better performance it can undeperform in some cases. 
# Based on what we saw from the hyperparameter tuning, it is sensitive so the choice of the parameters.
# Simple averages of kernels does not seem to further improve the results. However it reduces the interquartile range. 

## Comparison Plots of Performance for among PG and averages

In [None]:
# Sort the y-values in increasing order and get the corresponding sorted indices
sorted_indices = sorted(range(len(F1_scores_propagation)), key=lambda k: F1_scores_propagation[k])
sorted_propagation = [F1_scores_propagation[i] for i in sorted_indices]

sorted_indices = sorted(range(len(F1_scores_PG_WL)), key=lambda k: F1_scores_PG_WL[k])
sorted_PG_WL = [F1_scores_PG_WL[i] for i in sorted_indices]

sorted_indices = sorted(range(len(F1_scores_PG_SP)), key=lambda k: F1_scores_PG_SP[k])
sorted_PG_SP = [F1_scores_PG_SP[i] for i in sorted_indices]

sorted_indices = sorted(range(len(F1_scores_PG_VH)), key=lambda k: F1_scores_PG_VH[k])
sorted_PG_VH = [F1_scores_PG_VH[i] for i in sorted_indices]


# Plot the line plot using seaborn
sns.lineplot(x=range(len(F1_scores_propagation))    , y=sorted_propagation, marker='o',  label='Propagation')
sns.lineplot(x=range(len(F1_scores_PG_WL))          , y=sorted_PG_WL                  ,  marker='o',  label='PG-WL')
sns.lineplot(x=range(len(F1_scores_PG_SP))          , y=sorted_PG_SP                  ,  marker='o',  label='PG-SP')
sns.lineplot(x=range(len(F1_scores_PG_VH))          , y=sorted_PG_VH                  ,  marker='o',  label='PG-VH')



# Display the legend
# Place the legend outside the plot
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))

plt.title("Comparison of performance among combinations of kernels")
plt.xlabel("Image Rank")
plt.ylabel("F1 score")


#####################################################################################################
#####################################   Barplot #####################################################
#####################################################################################################



# Sample data
F1_scores_flattened = []
for x in  F1_scores:
    for y in x:
        F1_scores_flattened.append(y)
        
labels = ['Propagation']*100 +  ['PG-WL']*100 + ['PG-SP']*100 + ['PG-VH']*100 

# Create a dictionary to map labels to colors
color_dict = {'Propagation': 'blue', 'PG-WL': 'green', 'PG-SP': 'orange', 'PG-VH': 'red'}

# Set the color palette based on the unique labels
colors = [color_dict[label] for label in set(labels)]

# Binning parameters
num_bins = 4

# Initialize the histogram dictionary for each label
histograms = {label: np.zeros(num_bins) for label in set(labels)}

# Calculate the histogram for each label
for label in set(labels):
    label_scores = [score for score, lbl in zip(F1_scores_flattened, labels) if lbl == label]
    histograms[label], _ = np.histogram(label_scores, bins=[0, 0.15, 0.3, 0.45, 0.80])

# Plot the grouped histogram
fig, ax = plt.subplots()
width = 0.8 / len(set(labels))
ax.bar(np.arange(num_bins) + 1 * width, histograms['Propagation'], width=width, color=color_dict['Propagation'], label='Propagation')
ax.bar(np.arange(num_bins) + 2 * width, histograms['PG-WL'], width=width, color=color_dict['PG-WL'], label='PG-WL')
ax.bar(np.arange(num_bins) + 3 * width, histograms['PG-SP'], width=width, color=color_dict['PG-SP'], label='PG-SP')
ax.bar(np.arange(num_bins) + 4 * width, histograms['PG-VH'], width=width, color=color_dict['PG-VH'], label='PG-VH')


 # Set the x-tick labels to display the bin limits
bin_edges = np.array([0, 0.15, 0.3, 0.45, 0.80])
bin_labels = [f'{bin_edges[i]:.2f}-{bin_edges[i+1]:.2f}' for i in range(num_bins)]
plt.xticks(np.arange(num_bins) + width * (len(set(labels)) - 1) / 2, bin_labels, rotation=45)

# # Display the legend
# Place the legend outside the plot
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    
    
# # Set labels and title
plt.xlabel('F1 Score')
plt.ylabel('Frequency')
plt.title('Comparison of performance among combinations of kernels')


# Comparison of Suggested Approach with Ncut Algorithm

# Define helper functions 

In [None]:
def Normalised_cuts(image: np.array)->np.array:
    """ Apply normalised cuts to image in order to segment it.
    Source: https://scikit-image.org/docs/stable/auto_examples/segmentation/plot_ncut.html#sphx-glr-download-auto-examples-segmentation-plot-ncut-py
    
    :param image: The image to be segmented with RGB values of type `np.array`.
    :param segmentations: The segmentation of the image of type `np.array`.
    """

    img     = image
    labels1 = segmentation.slic(img, compactness=10, n_segments=800, start_label=1)
    g       = graph.rag_mean_color(img, labels1, mode='similarity')
    segmentations = graph.cut_normalized(labels1, g, thresh=0.01)
    return segmentations
    

###############################################################################
#                      Load ALL Ground Truth Segmentations                    #
###############################################################################
# Get dictionary of ground-truth-segmentations
def load_all_ground_truth_images()->dict:
    """ Create a dictionary with groundtruth segmentations for each image in the training set.
    
    :return groundtruth: Dictionary of the form key:[segmentation1,segmentation2,...,segmentationk]. For each image
    we might have more than 1 ground truth segmentations.
    """
    
    folder_path = "C:/Users/giwrg/Desktop/Master/Modules/Thesis/Reading Material & Data/2- Data/Data Used - 500/groundTruth/val"  
    # Construct the pattern to match image files
    image_pattern = os.path.join(folder_path, '*.mat')  
    # Use glob to find all matching image file paths
    image_paths = glob.glob(image_pattern)
    train_paths = []
    groundtruth = {}
    # Replace \ with /
    for path in image_paths:
        updated_string = path.rsplit("\\", 1)
        updated_string = "/".join(updated_string)
        train_paths.append(updated_string)

    for path in image_paths:
        mat_path = path
        # Get the file name from the path
        file_name = os.path.basename(path)
        # Remove the file extension
        file_name = os.path.splitext(file_name)[0]
        # Extract the number
        image_number = file_name.split("\\")[-1]
        mat_contents = scipy.io.loadmat(mat_path)
        num_of_groundtruths = mat_contents['groundTruth'][0].shape[0]
        ground_truths = []
        for i in range(0,num_of_groundtruths):
            selected_truth = mat_contents['groundTruth'][0][i][0][0][0]
            # Downsample true segmentation
            selected_truth = downsample_image(selected_truth)
            ground_truths.append(selected_truth)
        groundtruth[image_number] = ground_truths
    return groundtruth


def original_Images()->dict:
    
    """ Create a dictionary of the form key:value where key = image code and value is the image
    
    
    :return original_images: A dictionary of type `dict` of the form image_number:original_image
    """
    
    folder_path_train = "C:/Users/giwrg/Desktop/Master/Modules/Thesis/Reading Material & Data/2- Data/Data Used - 500/images/val"  
    # Construct the pattern to match image files
    image_pattern = os.path.join(folder_path_train, '*.jpg')  
    # Use glob to find all matching image file paths
    image_paths = glob.glob(image_pattern)
    train_paths = []

    # Print the paths for each image
    for path in image_paths:
        updated_string = path.rsplit("\\", 1)
        updated_string = "/".join(updated_string)
        train_paths.append(updated_string)

    original_images = {}
    for path in train_paths:  
        mat_path = path
        # Get the file name from the path
        file_name = os.path.basename(path)
        # Remove the file extension
        file_name = os.path.splitext(file_name)[0]
        # Extract the number
        image_number = file_name.split("\\")[-1]
        current_image = load_image(path)
        original_images[image_number] = current_image
    return original_images



# Find all segmentations of normalised cuts
def segmentations_Ncuts()->dict:
    """ Find all segmentations using normalised cuts algorithms and store the results 
    in a dictionary of type `dict`. 
    
    :param segmentations_Ncut: A dictionary with the segmentations of Ncut algorithm of the form 
    image_number:segmentation. 
    """

    segmentations_Ncut = {}
    for image_code in original_Images_dict.keys():
        segmentations_Ncut[image_code] = Normalised_cuts(original_Images_dict[image_code]).astype(np.uint8)
        
    return segmentations_Ncut


def calc_score(predicted:dict, truth:dict)->list:
    """ Measure performance using F1 score between the predicted segmentation and the truth.
    
    
    :param predicted: The predicted segmentations of type `dict` of the form key:value where
    key is the image code and value is the predicted segmentation.
    :param truth: The true segmentations of type `dict` of the form key:list[segmentations] where
    key is the image code and list[segmentations] is a list of all the human generated segmentations.
    :return scores: For each image return the maximum F1 score in a `dict` format. The dictionary stores the 
    information in the form image_number: max_F1_score.
    """
    scores = {}
    image_codes = list(truth.keys())
    for code in image_codes:
        truths = truth[code]  # List of all the human segmentations
        # Chose the truth with least number of segments 
        chosen_truth = chosen_truths[code][2]
        truths = [truths[chosen_truth]]
        prediction = predicted[code]
        max_score = 0
        for human_segmentations in truths:
            # Convert segmentations into boundry formats
            ground_truth_boundry = convert_seg_to_boundaries(human_segmentations)
            image_segmentation_boundry = convert_seg_to_boundaries(prediction)

            # Calculate performance measures 
            score = evaluate(image_segmentation_boundry, ground_truth_boundry)        
    
            if score > max_score:
                max_score = score
            else:
                pass
        scores[code] = max_score
    return scores


# Load the original images 
original_Images_dict = original_Images()
# Load all the ground truths for the original images 
load_all_ground_truth_images_dict = load_all_ground_truth_images()
chosen_truths = load_ground_truth_images()


# Νcut Results 

In [None]:
###########################################################################################
############################## Measure the performance ####################################
###########################################################################################

# Convert images to boundries

# Normalise Cuts Algorithm
Ncuts  = segmentations_Ncuts()
ARI_scores_Ncuts = calc_score(Ncuts, load_all_ground_truth_images_dict)


# Propagation Algorithm Results

In [None]:
folder_path_train = "C:/Users/giwrg/Desktop/Master/Modules/Thesis/Reading Material & Data/2- Data/Data Used - 500/images/val"  
# Construct the pattern to match image files
image_pattern = os.path.join(folder_path_train, '*.jpg')  
# Use glob to find all matching image file paths
image_paths = glob.glob(image_pattern)
train_paths = []

# Print the paths for each image
for path in image_paths:
    updated_string = path.rsplit("\\", 1)
    updated_string = "/".join(updated_string)
    train_paths.append(updated_string)
    
###############################################################################
# Create a list of lists containing graphs for each image in the validation set #
###############################################################################

images_to_graphs = []
original_images = []
superpixel_segmentations = []
for path in train_paths:  
    current_image = load_image(path)
    original_images.append(current_image)
    segments = superpixel_SLIC(current_image, 800, 10)
    superpixel_segmentations.append(segments)
    graphs = create_graphs(current_image, segments)
    images_to_graphs.append(graphs)


# Uses the propagation kernel to generate the kernel matrices
# For each Image Calculate the Kernel Matrix 
gk = PropagationAttr(normalize=True,  t_max=2, w=0.001)
Kernel_matrix = []
for image in images_to_graphs:
    current_matrix = gk.fit_transform(image)
    # Convert kernel matrix to dissimilarity matrix 
    current_matrix = 1 - current_matrix
    Kernel_matrix.append(current_matrix)

# For the number of clusters use the same number of clusters as the ground truth so that the results are comperable
# Get the image number which will be used as key for the groundtruth
predicted_clusters = {}
for i in range(0,len(Kernel_matrix)):
    Image_number = get_image_number(train_paths[i])
    predicted_clusters[Image_number] = []
    list_of_truths = load_all_ground_truth_images_dict[Image_number]
    # Keep only the truth with the least number of segmentes
    chosen_truth = chosen_truths[Image_number][2]
    list_of_truths = [list_of_truths[chosen_truth]]
    K_values = []
    # Find the number of regions for each human_segmented_image
    for element in list_of_truths:
        K_values.append(len(np.unique(element)))
    for k in K_values:
        # Perform hierarchical clustering with group average
        clustering = AgglomerativeClustering(n_clusters=k, linkage='average',metric='precomputed')
        labels = clustering.fit_predict(Kernel_matrix[i])
        predicted_clusters[Image_number].append(labels)

# # We need to convert the cluster labels to pixel labels
# # Each graph in images_to_graphs is assigned a cluster. However each graph is also connected to 
# # several pixels. Therefore we need to assign the graph cluster to their pixels.
image_segmentations = {}
for i in range(0,len(Kernel_matrix)):
    current_image = load_image(train_paths[i])
    Image_number = get_image_number(train_paths[i])
    image_segmentations[Image_number] = []
    n_row, n_col, _ = current_image.shape
    predictions = predicted_clusters[Image_number]
    # Convert graph clustering labels to image segmentation labels
    for j in range(0,len(predictions)):
        image_segmentation = np.zeros((n_row, n_col))  # Final Segmentation
        num_of_superpixels = len(np.unique(superpixel_segmentations[i])) # The number of superpixels-graphs 
        for graph_num in range(1,num_of_superpixels+1):
            indices = np.argwhere(superpixel_segmentations[i] == graph_num)  # Indices of the pixels which are in the specific superpixel
            for pixel in indices:
                x = pixel[0]
                y = pixel[1]
                image_segmentation[x,y] = predictions[j][graph_num-1]

        image_segmentations[Image_number].append(image_segmentation)
        
## Find the F1 score score for each image
F1_scores = {}
truth = load_all_ground_truth_images_dict
image_codes = list(truth.keys())
predicted = image_segmentations
for code in image_codes:
    truths = truth[code]  # List of all the human segmentations
    # Κeep only the truth with the smallest number of segments
    chosen_truth = chosen_truths[code][2]
    truths = [truths[chosen_truth]]
    prediction = predicted[code]  # List of all predicted segmentations
    max_score = 0
    for index, human_segmentations in enumerate(truths):        
        # Convert segmentations into boundry formats
        ground_truth_boundry = convert_seg_to_boundaries(human_segmentations)
        image_segmentation_boundry = convert_seg_to_boundaries(prediction[index])

        # Calculate performance measures 
        score = evaluate(image_segmentation_boundry, ground_truth_boundry)   

        if score > max_score:
            max_score = score
        else:
            pass
    F1_scores[code] = max_score
    
ARI_scores_Suggested_Approach = F1_scores.copy()        

# K-means clustering 

In [None]:
# Source: https://www.thepythoncode.com/article/kmeans-for-image-segmentation-opencv-python

def K_means_segmentation(image:np.array, n_clusters: int)->np.array:
    """ Given an image with RGB values perform K-means clustering
        with specified number of clusters.  
        
    :param image: The image to be segmented of type `np.array`.
    :param n_clusters: The number of clusters to use in the clustering of type `int`.
    :return segmented_image: The segmented image of type `np.array` where each pixel has 
    an integer value for the segment it belongs.
    """

    # Reshaping the image into a 2D array with RGB values
    image_transformed = image.reshape((-1, 3))

    # Create an array of coordinates
    coordinates = np.indices(image[:,:,1].shape).reshape(2, -1).T

    # Combine the image_transformed and coordinates arrays
    data = np.concatenate((image_transformed, coordinates), axis=1)

    # Convert to float type
    data = np.float32(data)

    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 100, 0.85) # Stopping criteria
    # Apply K-means 10 times
    retval, labels, centers = cv2.kmeans(data, 3, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS) 
    segmented_image = labels.reshape(image[:,:,1].shape) # reshape data into the original image dimensions
    
    return segmented_image
  
# Load Original Images
original_Images_dict = original_Images()
# Load Ground truth Images
chosen_truths = load_ground_truth_images()

###########################################    
# Perform image segmentation using k-means 
###########################################

segmentations_Kmeans = {}
F1_scores_Kmeans = []
for image_code in original_Images_dict.keys():
    current_image = original_Images_dict[image_code]  # Get the current image
    num_segments  = chosen_truths[image_code][1]  # Get the number of segments
    truth = chosen_truths[image_code][0]  # Get the human segmented truth
    segmentations_Kmeans[image_code] = K_means_segmentation(current_image,num_segments).astype(np.uint8)
    
    # Performance evaluation 
    # Convert segmentations into boundry formats
    ground_truth_boundry = convert_seg_to_boundaries(truth)
    image_segmentation_boundry = convert_seg_to_boundaries(segmentations_Kmeans[image_code])

    # Calculate performance measures 
    score = evaluate(image_segmentation_boundry, ground_truth_boundry)   
    F1_scores_Kmeans.append(score)
    


# Canny Edge Detector

In [None]:
def Canny_detector(image: np.array)->np.array:
    """ Perform Canny edge detector on the given image. 
    
    
    :param image: The image to be segmented of type `np.array`.
    :return edges: The resulted image after applying canny edge detector 
    """
    edges = cv2.Canny(image,200,500)
    return edges


# Load Original Images
original_Images_dict = original_Images()
# Load Ground truth Images
chosen_truths = load_ground_truth_images()

##################################################    
# Perform edge detection using Canny Edge Detector 
##################################################

edges_Canny = {}
F1_scores_Canny = []
for image_code in original_Images_dict.keys():
    current_image = original_Images_dict[image_code]  # Get the current image
    truth = chosen_truths[image_code][0]  # Get the human segmented truth
    edges_Canny[image_code] = Canny_detector(current_image)
    
    # Performance evaluation 
    # Convert segmentations into boundry formats
    ground_truth_boundry = convert_seg_to_boundaries(truth)
    image_segmentation_boundry = convert_seg_to_boundaries(edges_Canny[image_code])

    # Calculate performance measures 
    score = evaluate(image_segmentation_boundry, ground_truth_boundry)   
    F1_scores_Canny.append(score)



# Plot Results

In [None]:
#################################################################################
##########################  Line Plot   #########################################
#################################################################################

Suggested_Approach = list(ARI_scores_Suggested_Approach.values())
Ncut               = list(ARI_scores_Ncuts.values())



# Sort the y-values in increasing order and get the corresponding sorted indices
sorted_indices = sorted(range(len(Suggested_Approach)), key=lambda k: Suggested_Approach[k])
Suggested_Approach = [Suggested_Approach[i] for i in sorted_indices]

sorted_indices = sorted(range(len(Ncut)), key=lambda k: Ncut[k])
Ncut = [Ncut[i] for i in sorted_indices]

sorted_indices = sorted(range(len(F1_scores_Kmeans)), key=lambda k: F1_scores_Kmeans[k])
Kmeans = [F1_scores_Kmeans[i] for i in sorted_indices]

sorted_indices = sorted(range(len(F1_scores_Canny)), key=lambda k: F1_scores_Canny[k])
Canny = [F1_scores_Canny[i] for i in sorted_indices]



# Plot the line plot using seaborn
sns.lineplot(x=range(len(Ncut))                        , y=Ncut, marker='o'  ,  label='Ncut')
sns.lineplot(x=range(len(Suggested_Approach))          , y=Suggested_Approach, marker='^',  label='Propagation Kernel')
sns.lineplot(x=range(len(Kmeans))                      , y=Kmeans, marker='x',  label='K means')
sns.lineplot(x=range(len(Canny))                       , y=Canny , marker='x',  label='Canny')



# Display the legend
# Place the legend outside the plot
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))

plt.title("Comparison of performance among different approaches")
plt.xlabel("Image Rank")
plt.ylabel("F1 Score")


#####################################################################################################
#####################################   Barplot #####################################################
#####################################################################################################

F1_scores_flattened = Suggested_Approach + Ncut + Kmeans + Canny

        
labels = ['Propagation Kernel']*100 +  ['Ncut']*100 + ['Kmeans']*100 + ['Canny']*100

# Create a dictionary to map labels to colors
color_dict = {'Propagation Kernel': 'blue', 'Ncut': 'green', 'Kmeans':'red', 'Canny':'orange'}

# Set the color palette based on the unique labels
colors = [color_dict[label] for label in set(labels)]

# Binning parameters
num_bins = 4

# Initialize the histogram dictionary for each label
histograms = {label: np.zeros(num_bins) for label in set(labels)}

# Calculate the histogram for each label
for label in set(labels):
    label_scores = [score for score, lbl in zip(F1_scores_flattened, labels) if lbl == label]
    histograms[label], _ = np.histogram(label_scores, bins=[0, 0.15, 0.3, 0.45, 0.80])

# Plot the grouped histogram
fig, ax = plt.subplots()
width = 0.8 / len(set(labels))
ax.bar(np.arange(num_bins) + 1 * width, histograms['Propagation Kernel'], width=width, color=color_dict['Propagation Kernel'], label='Propagation Kernel')
ax.bar(np.arange(num_bins) + 2 * width, histograms['Ncut']              , width=width, color=color_dict['Ncut'], label='Ncut')
ax.bar(np.arange(num_bins) + 3 * width, histograms['Kmeans']            , width=width, color=color_dict['Kmeans'], label='Kmeans')
ax.bar(np.arange(num_bins) + 4 * width, histograms['Canny']             , width=width, color=color_dict['Canny'], label='Canny')

# Set the x-tick labels to display the bin limits
bin_edges = np.array([0, 0.15, 0.3, 0.45, 0.80])
bin_labels = [f'{bin_edges[i]:.2f}-{bin_edges[i+1]:.2f}' for i in range(num_bins)]
plt.xticks(np.arange(num_bins) + width * (len(set(labels)) - 1) / 2, bin_labels, rotation=45)

# # Display the legend
# Place the legend outside the plot
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    
# Set labels and title
plt.xlabel('F1 Score')
plt.ylabel('Frequency')
plt.title('Comparison of performance among different approaches')


# Compare results for each Image

In [None]:
Suggested_Approach = list(ARI_scores_Suggested_Approach.values())
Ncut               = list(ARI_scores_Ncuts.values())

sum_count_Propagation = 0
for i in range(0,len(Suggested_Approach)):
    if (Suggested_Approach[i] > Ncut[i]) and (Suggested_Approach[i] > F1_scores_Kmeans[i]) and (Suggested_Approach[i] > F1_scores_Canny[i]):
        sum_count_Propagation = sum_count_Propagation + 1

        
sum_count_Ncut = 0
for i in range(0,len(Suggested_Approach)):
    if (Suggested_Approach[i] < Ncut[i]) and (Ncut[i] > F1_scores_Kmeans[i]) and (Ncut[i] > F1_scores_Canny[i]):
        sum_count_Ncut = sum_count_Ncut + 1        

sum_count_Kmean = 0
for i in range(0,len(Suggested_Approach)):
    if (Suggested_Approach[i] < F1_scores_Kmeans[i]) and (Ncut[i] < F1_scores_Kmeans[i]) and (F1_scores_Kmeans[i] > F1_scores_Canny[i]):
        sum_count_Kmean = sum_count_Kmean + 1     
        
        
sum_count_Canny = 0
for i in range(0,len(Suggested_Approach)):
    if (Suggested_Approach[i] < F1_scores_Canny[i]) and (Ncut[i] < F1_scores_Canny[i]) and (F1_scores_Kmeans[i] < F1_scores_Canny[i]):
        sum_count_Canny = sum_count_Canny + 1   
        
        
print('Propagation Kernel produces better results in: ', sum_count_Propagation)
print('Ncut produces better results in: '              , sum_count_Ncut)
print('Kmeans produces better results in: '            , sum_count_Kmean)
print('Canny produces better results in: '             , sum_count_Canny)

# Create Comparison Plots 

In [None]:
# Get segmentations 
segmentations_Canny   = list(edges_Canny.values())
segmentations_Kmeans_ = list(segmentations_Kmeans.values())
segmentations_Ncut    = list(Ncuts.values())

# Find indeces where suggested Method Performs best 
Best_indexes = list(np.argsort(F1_scores_propagation)[-10:])

# Set the figure size
plt.figure(figsize=(25, 25))

# Create comparison plots for the images
fig, axs = plt.subplots(nrows=len(Best_indexes), ncols=6, figsize=(15, 5*len(Best_indexes)))

for i, image_index in enumerate(Best_indexes):
    # Load Original Image
    image = original_images[image_index]
    
    
    # Boundries 
    colored_segmentation_propagation = convert_seg_to_boundaries(propagation_segmentations[image_index])
    colored_segmentation_Ncut        = convert_seg_to_boundaries(segmentations_Ncut[image_index])
    colored_segmentation_Kmeans      = convert_seg_to_boundaries(segmentations_Kmeans_[image_index])
    colored_segmentation_Canny       = convert_seg_to_boundaries(segmentations_Canny[image_index])
    
    # Coloured Segmentations
#     colored_segmentation_propagation =color.label2rgb(propagation_segmentations[image_index], image, kind='avg', bg_label=0)
#     colored_segmentation_Ncut        =color.label2rgb(segmentations_Ncut[image_index]       , image, kind='avg', bg_label=0)
#     colored_segmentation_Kmeans      =color.label2rgb(segmentations_Kmeans_[image_index]     , image, kind='avg', bg_label=0)
#     colored_segmentation_Canny       =color.label2rgb(segmentations_Canny[image_index]       , image, kind='avg', bg_label=0)
    
    
    # Convert Ground Truth to average RGB values
    Image_number = get_image_number(train_paths[image_index])
    human        = ground_truth_segmentations[Image_number][0]
    human        = convert_seg_to_boundaries(human)
#     human        = color.label2rgb(human, image, kind='avg', bg_label=0)
    
    

    # Plot the original image
    axs[i, 0].imshow(image)
    axs[i, 0].axis('off')
    axs[i, 0].set_title('Original', fontsize=18)
    
    # Plot the human segmentation 
    axs[i, 1].imshow(human)
    axs[i, 1].axis('off')
    axs[i, 1].set_title('Human', fontsize=18)
    
    

    # Plot the propagation segmentation image
    axs[i, 2].imshow(colored_segmentation_propagation)
    axs[i, 2].axis('off')
    axs[i, 2].set_title('Propagation Kernel', fontsize=18)
    
    # Plot the Ncut segmentation image
    axs[i, 3].imshow(colored_segmentation_Ncut)
    axs[i, 3].axis('off')
    axs[i, 3].set_title('Νcut', fontsize=18)
    
    # Plot the Kmeans segmentation image
    axs[i, 4].imshow(colored_segmentation_Kmeans)
    axs[i, 4].axis('off')
    axs[i, 4].set_title('Kmeans', fontsize=18)
    
    
    # Plot the Kmeans segmentation image
    axs[i, 5].imshow(colored_segmentation_Canny)
    axs[i, 5].axis('off')
    axs[i, 5].set_title('Canny', fontsize=18)
    

plt.tight_layout()
