## Imports

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import math
from skimage import morphology
import matplotlib.image as mpimg
import random
from sklearn.cluster import KMeans
import cv2
from collections import Counter
from skimage.color import rgb2lab, deltaE_cie76
import os
from PIL import ImageColor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import math
from skimage import morphology
import matplotlib.image as mpimg
import random
from sklearn.cluster import KMeans
import cv2
from collections import Counter
from skimage.color import rgb2lab, deltaE_cie76
import os
from PIL import ImageColor

## Loading data

In [2]:
features = pd.read_csv(r'../features/features.csv')
ground_truth = pd.read_csv(r"../data/example_ground_truth.csv")


## Functions

In [3]:
def calculate_area(image):
    """
    image = a numpy array
    """
    return np.sum(image)

def calculate_border(image,thickness):
    """
    image = np array of a picture
    thicness = an integer of the thiccness of the border
    returns = the border of the figure
    """
    struct_el = morphology.disk(thickness)
    mask_eroded = morphology.binary_erosion(image, struct_el)
    return calculate_area(image)-np.sum(mask_eroded)


def compactness(area, border):
    """
    area = the area of the figure
    border = the circumference of the figure
    return = returns the compactness
    """
    top = border**2
    bot = 4*math.pi*area
    return top/bot

def RGB2HEX(color):
    return "#{:02x}{:02x}{:02x}".format(int(color[0]), int(color[1]), int(color[2]))



def average_colour(image,segmentation):
    mask = segmentation == 1
    copy = image.copy()
    r = np.mean(copy[mask,0])
    g = np.mean(copy[mask,1])
    b = np.mean(copy[mask,2])
    #num_of_pixels = copy[mask].shape[0]
    return [r,g,b]

def deviation_of_colour(image,segmentation):
    mask = segmentation == 1
    copy = image.copy()
    r = np.std(copy[mask,0])
    g = np.std(copy[mask,1])
    b = np.std(copy[mask,2])
    return [r,g,b]

def average_luminosity(image,segmentation):
    avg_colour = average_colour(image,segmentation)
    return 0.299*avg_colour[0] + 0.587*avg_colour[1] + 0.114*avg_colour[2]

def deviation_luminosity(image,segmentation):
    deviation_colour = deviation_of_colour(image,segmentation)
    return np.sqrt(0.299**2*deviation_colour[0]**2 + 0.587**2*deviation_colour[1]**2 + 0.114**2*deviation_colour[2]**2)

def findImage(image_id):
    image = plt.imread('../data/example_image/'+image_id+'.jpg')
    image_seg = plt.imread('../data/example_segmentation/'+image_id+'_segmentation.png')
    return image,image_seg

def make_knn_prediction(k):
    neigh = KNeighborsClassifier(n_neighbors=k)
    neigh.fit(x_train, y_train)
    return neigh.predict(x_test)

def from_rgb_to_int(df):
    placeholder = [RGB2HEX(elm) for elm in df]
    hex_val = [int("0x" + elm[1:],0) for elm in placeholder]
    return hex_val

def knn_model_constructor(k,training,labels):
    model = KNeighborsClassifier(n_neighbors=k)
    model.fit(training, labels)
    return model

## Task 0: Exploring the data

In [7]:
#rename column 'id' to 'image_id'
features.rename(columns={'id': 'image_id'}, inplace=True)

In [8]:
# Here we merge the two data frames
all_feature_merge = features.merge(ground_truth, on='image_id', how='right')

In [9]:
# Here we plot the dataframe
all_feature_merge

Unnamed: 0,image_id,area,perimeter,melanoma,seborrheic_keratosis
0,ISIC_0001769,216160.0,2013.0,0.0,0.0
1,ISIC_0001852,130493.0,1372.0,0.0,0.0
2,ISIC_0001871,205116.0,1720.0,0.0,0.0
3,ISIC_0003462,161705.0,1344.0,0.0,0.0
4,ISIC_0003539,317040.0,2063.0,0.0,0.0
...,...,...,...,...,...
145,ISIC_0015443,9732988.0,13775.0,0.0,0.0
146,ISIC_0015445,12049938.0,19517.0,0.0,0.0
147,ISIC_0015483,8856478.0,13813.0,0.0,0.0
148,ISIC_0015496,8951522.0,13742.0,0.0,0.0


In [10]:
# Here we check for missing values, which we do not have
all_feature_merge.isna().any()

image_id                False
area                    False
perimeter               False
melanoma                False
seborrheic_keratosis    False
dtype: bool

## Task 1: implement two features

In [11]:
mask = ground_truth['melanoma'] == 1
melanoma_mask = ground_truth[mask]

In [13]:
mask = ground_truth['seborrheic_keratosis'] == 1
keratosis_mask = ground_truth[mask]

In [15]:


total_data = []
melanoma_data = []
keratosis_data = []
non_infected_data = []
x = 0

for image_id in ground_truth['image_id']:
    print(image_id,x)
    image = plt.imread('../data/example_image/'+image_id+'.jpg') # Here we load the image
    image_seg = plt.imread('../data/example_segmentation/'+image_id+'_segmentation.png') # Here we load the mask
    if image_id in list(melanoma_mask['image_id']): # If the image id is in the melanoma list, label it as melanoma
        melanoma_data.append((image_id,image,image_seg)) #else label it as keratosis
    if image_id in list(keratosis_mask['image_id']):
        keratosis_data.append((image_id,image,image_seg))
    else:
        non_infected_data.append((image_id,image,image_seg))
    total_data.append((image_id,image,image_seg)) #Here we have a list of all the data together
    x += 1

ISIC_0001769 0
ISIC_0001852 1
ISIC_0001871 2
ISIC_0003462 3
ISIC_0003539 4
ISIC_0003582 5
ISIC_0003657 6
ISIC_0003805 7
ISIC_0004337 8
ISIC_0006651 9
ISIC_0006671 10
ISIC_0006815 11
ISIC_0006914 12
ISIC_0007141 13
ISIC_0007156 14
ISIC_0007235 15
ISIC_0007241 16
ISIC_0007332 17
ISIC_0007344 18
ISIC_0007528 19
ISIC_0007796 20
ISIC_0008025 21
ISIC_0008524 22
ISIC_0009995 23
ISIC_0010459 24
ISIC_0012099 25
ISIC_0012109 26
ISIC_0012126 27
ISIC_0012127 28
ISIC_0012143 29
ISIC_0012151 30
ISIC_0012159 31
ISIC_0012160 32
ISIC_0012191 33
ISIC_0012201 34
ISIC_0012204 35
ISIC_0012206 36
ISIC_0012210 37
ISIC_0012221 38
ISIC_0012222 39
ISIC_0012254 40
ISIC_0012256 41
ISIC_0012288 42
ISIC_0012306 43
ISIC_0012313 44
ISIC_0012316 45
ISIC_0012335 46
ISIC_0012380 47
ISIC_0012383 48
ISIC_0012400 49
ISIC_0012417 50
ISIC_0012434 51
ISIC_0012492 52
ISIC_0012513 53
ISIC_0012538 54
ISIC_0012547 55
ISIC_0012660 56
ISIC_0012684 57
ISIC_0012720 58
ISIC_0012746 59
ISIC_0012876 60
ISIC_0012927 61
ISIC_0012956 62
IS

In [17]:
len(total_data)

150

In [18]:
len(keratosis_data)

42

In [19]:
len(melanoma_data)

30

In [20]:
len(non_infected_data)

108

In [22]:
108 + 42 + 30

180

In [None]:
# Here we use our functions area and border to calculate the compactness of the first image
area1 = calculate_area(melanoma_data[0][2])
#border1 = calculate_border(melanoma_data[0][2],4)
#compactness(area1,border1)