## Homework #2

Implement the Czekanowski-Dice index. 

\begin{equation}
C=2\frac{P\times R}{R+R}.
\end{equation}
Precision is defined as:
\begin{equation}
PPV=\frac{\#TP}{\#TP+\#FP}.
\end{equation}
Recall is defined as:
\begin{equation}
TPR=\frac{\#TP}{\#TP+\#FN}.
\end{equation}

In [1]:
import sys
!conda install --yes --prefix {sys.prefix} pillow

Solving environment: done

# All requested packages already installed.



In [2]:
class ImageConversion:

    def get_image_from_url(self, img_url):
        image = open(img_url,'rb')
        return img.imread(image)

    def get_unique_colours(self, image_matrix):
        feature_matrix = []
        for i in range(len(image_matrix)):
            for j in range(len(image_matrix[0])):
                feature_matrix.append(image_matrix[i, j])
        feature_matrix_np = numpy.array(feature_matrix)
        uniques, index = numpy.unique([str(i) for i in feature_matrix_np], return_index=True)
        return feature_matrix_np[index], feature_matrix

    def save_image(self, size, pixel_matrix, unique_matrix, assignation_matrix, colours, output):
        image_out = Image.new("RGB", size)
        pixels = []
        for i in range(len(pixel_matrix)):
            pixel_list = pixel_matrix[i].tolist()
            for j in range(len(unique_matrix)):
                if(pixel_list == unique_matrix[j].tolist()):
                    for k in range(len(colours)):
                        if assignation_matrix[j][k] == 1:
                            segmented_colours=[int(i) for i in (colours[k]*255)]
                            pixels.append(tuple(segmented_colours))
        image_out.putdata(pixels)
        image_out.save(output)

In [3]:
class Segmentation:

    def __init__(self, feature_matrix, groups):
        self.__data_set = feature_matrix
        self.__groups = groups
        self.__space=[[0, 255], [0, 255], [0, 255]]
        self.__error_margin = 0.5
        self.assignation = numpy.zeros((len(self.__data_set), self.__groups))
        self.centers = []
        self.select_centers()

    def select_centers(self):
        if len(self.centers) == 0:
            iter=0
            while iter<self.__groups:
                self.centers.append(((random.randrange(0, 255)*1.0/255),
                                     (random.randrange(0, 255)*1.0/255),
                                     (random.randrange(0, 255)*1.0/255)))
                iter=iter+1

    def calculate_distance(self, x, v):
        return math.sqrt((x[0]-v[0])**2+(x[1]-v[1])**2+(x[2]-v[2])**2)

    def calculate_u(self, x, i):
        smallest_distance = float(self.calculate_distance(x, self.centers[0]))
        smallest_id = 0
        for i in range(1, self.__groups):
            distance = self.calculate_distance(x, self.centers[i])
            if distance < smallest_distance:
                smallest_id = i
                smallest_distance = distance
        distance = numpy.zeros(self.__groups)
        distance[smallest_id]=1
        return distance

    def calculate_new_centers(self, u):
        new_centers=[]
        for c in range(self.__groups):
            u_x_vector = numpy.zeros(len(self.centers[0]))
            u_scalar = 0
            for i in range(len(u)):
                u_scalar = u_scalar + u[i][c]
                u_x_vector = numpy.add(u_x_vector, numpy.multiply(u[i][c], self.__data_set[i]))
            new_centers.append(numpy.divide(u_x_vector,u_scalar))
        self.centers = new_centers

    def calculate_differences(self,new_assignation):
        diff=0
        for i in range(len(self.assignation)):
            for j in range(self.__groups):
                diff = diff + abs(float(new_assignation[i][j]) - float(self.assignation[i][j]))
        return diff

    def do_segmentation(self):
        difference_limit_not_achieved = True
        iter = 0
        while difference_limit_not_achieved:
            new_assignation = []
            for i in range(len(self.__data_set)):
                new_assignation.append(self.calculate_u(self.__data_set[i], iter))
            self.calculate_new_centers(new_assignation)

            if iter > 0:
                if self.calculate_differences(new_assignation) < self.__error_margin:
                    difference_limit_not_achieved=False
            self.assignation = new_assignation
            iter = iter + 1

    def get_results(self):
        return self.centers, self.assignation

    def print_results(self):
        print(self.assignation)
        print(self.centers)
   
    def czekanowski_dice_index(self, data_set,assignation,labels):
        # First method using centers as labels. We can setup our test with predefined centers
        # Then check if segmentation found the same groups and assign pixels to them
        assigned_groups = []
        for el in range(len(assignation)):
            group_id = numpy.argmax(assignation[el])
            assigned_groups.append(group_id)
        # your code goes here:
        TP = 0
        FP = 0
        FN = 0
        for group in range(len(assigned_groups)):
            distances = []
            for label in range(len(labels)):
                distances.append(self.calculate_distance(data_set[group], labels[label]))
            if numpy.argmin(distances) == assigned_groups[group]:
                TP += 1
            elif numpy.argmax(distances) in numpy.unique(assigned_groups):
                FN += 1
            elif numpy.argmin(distances) != assigned_groups[group]:
                FP += 1

        return 2*TP / (2*TP + FP + FN)
    
    def czekanowski_dice_index_with_conf(self, data_set,assignation,labels):
        # Second method seems to be more suitable to common usage
        # Based on this article http://www0.cs.ucl.ac.uk/staff/G.Brostow/classes/IP2008/L1_Segmentation_02.pdf
        # Thus data_set is not used at all; Labels are grand thruth results of the segmentation
        assigned_groups = []
        for el in range(len(assignation)):
            group_id = numpy.argmax(assignation[el])
            assigned_groups.append(group_id)
        # your code goes here:            
        TP = 0
        FP_and_FN = 0
        conf_matrix_sum = 0
        classes = numpy.unique(assigned_groups)
        # For simplicity I have used sklearn confusion_matrix. It is square matrix with true positives on diagonal
        # Others are false negatives and false postives so we can simple sum up whole matrix
        conf_matrix = confusion_matrix(labels, assigned_groups)
        for i in range(0,classes.size):
            TP += conf_matrix[i][i]
        for i in range(0,classes.size):
            for j in range(0,classes.size):
                conf_matrix_sum += conf_matrix[i][j]
        print(assigned_groups)
        FP_and_FN = conf_matrix_sum - TP
        return 2*TP / (2*TP + FP_and_FN)


In [4]:
import numpy
import random
import math
import matplotlib.image as img
from PIL import Image
from sklearn.metrics import confusion_matrix

image_to_segment = "logo_krakow.png"
image_converter = ImageConversion()
image_data = image_converter.get_image_from_url(image_to_segment)
unique_image_data, image_data_list = image_converter.get_unique_colours(image_data)

groups = 3

segmentation = Segmentation(unique_image_data, groups)
segmentation.do_segmentation()
centers, assignation_matrix = segmentation.get_results()

image_size = (232, 258)
image_converter.save_image(image_size, image_data_list, unique_image_data, assignation_matrix, centers, "output.png")

# Labels are gathered from first assignation as grand thruth segmentation. 
# Normaly it can be done with special tools i.e. in a training process of the face recognition

labels = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
          0, 1, 1, 1, 1, 1, 0, 1, 1, 2, 2, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 2,
          1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 2, 1, 1, 1, 1, 2, 2, 1, 2, 1,
          1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1,
          2, 1, 1, 2, 2, 1, 1, 1, 2, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2,
          2, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 1, 1, 2, 2, 1, 1, 1, 2, 1, 2,
          1, 2, 1, 2, 1, 1, 2, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
          2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 1, 2, 1]
CDI_CONF = segmentation.czekanowski_dice_index_with_conf(unique_image_data, assignation_matrix, labels)
CDI_CONF

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 2, 2, 2, 2, 0, 2, 2, 1, 1, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 1, 2, 1, 2, 2, 2, 2, 1, 1, 2, 1, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 2, 1, 1, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 1, 2, 2, 1, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 1, 2]


0.6010928961748634

Czekanowski dice index is commonly used in image processing and binary processing. That's why I have made tests based on image segmentation. Results show not very good index result. However output image drops white background and some pixels are not in good groups. According to converted image index seems to be relevant.