In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import math
from skimage import morphology
import matplotlib.image as mpimg
import random
from sklearn.cluster import KMeans
import cv2
from collections import Counter
from skimage.color import rgb2lab, deltaE_cie76
import os
from PIL import ImageColor



In [None]:
features = pd.read_csv(r'../features/features.csv')
ground_truth = pd.read_csv(r"../data/example_ground_truth.csv")


## Functions

In [None]:
def calculate_area(image):
    """
    image = a numpy array
    """
    return np.sum(image)

def calculate_border(image,thickness):
    """
    image = np array of a picture
    thicness = an integer of the thiccness of the border
    returns = the border of the figure
    """
    struct_el = morphology.disk(thickness)
    mask_eroded = morphology.binary_erosion(image, struct_el)
    return calculate_area(image)-np.sum(mask_eroded)


def compactness(area, border):
    """
    area = the area of the figure
    border = the circumference of the figure
    return = returns the compactness
    """
    top = border**2
    bot = 4*math.pi*area
    return top/bot

def RGB2HEX(color):
    return "#{:02x}{:02x}{:02x}".format(int(color[0]), int(color[1]), int(color[2]))



def average_colour(image,segmentation):
    mask = segmentation == 1
    copy = image.copy()
    r = np.mean(copy[mask,0])
    g = np.mean(copy[mask,1])
    b = np.mean(copy[mask,2])
    #num_of_pixels = copy[mask].shape[0]
    return [r,g,b]

def deviation_of_colour(image,segmentation):
    mask = segmentation == 1
    copy = image.copy()
    r = np.std(copy[mask,0])
    g = np.std(copy[mask,1])
    b = np.std(copy[mask,2])
    return [r,g,b]

def average_luminosity(image,segmentation):
    avg_colour = average_colour(image,segmentation)
    return 0.299*avg_colour[0] + 0.587*avg_colour[1] + 0.114*avg_colour[2]

def deviation_luminosity(image,segmentation):
    deviation_colour = deviation_of_colour(image,segmentation)
    return np.sqrt(0.299**2*deviation_colour[0]**2 + 0.587**2*deviation_colour[1]**2 + 0.114**2*deviation_colour[2]**2)

def findImage(image_id):
    image = plt.imread('../data/example_image/'+image_id+'.jpg')
    image_seg = plt.imread('../data/example_segmentation/'+image_id+'_segmentation.png')
    
    return image,image_seg

In [None]:
#Here we load the data
#The list has the following structure
#['id','np_array of a picture','the mask of the picture']

In [None]:
#Training data is structured the following way
#['np_array of a picture','label']
#In our case 
#0 == melanoma
#1 == keratosis

In [None]:
mask = ground_truth['melanoma'] == 1
melanoma_mask = ground_truth[mask]

total_data = []
melanoma_data = []
keratosis_data = []
training_data = []
x = 0

for image_id in ground_truth['image_id']:
    print(image_id,x)
    image = plt.imread('../data/example_image/'+image_id+'.jpg') # Here we load the image
    image_seg = plt.imread('../data/example_segmentation/'+image_id+'_segmentation.png') # Here we load the mask
    if image_id in list(melanoma_mask['image_id']): # If the image id is in the melanoma list, label it as melanoma
        melanoma_data.append((image_id,image,image_seg)) #else label it as keratosis
        training_data.append((image,0))
    else:
        keratosis_data.append((image_id,image,image_seg))
        training_data.append((image,1))
    total_data.append((image_id,image,image_seg)) #Here we have a list of all the data together
    x += 1

In [None]:
#read file
file_input = pd.read_csv("../data/example_ground_truth.csv")
file_input.shape

In [None]:
# data frame for images with no diagnosis
non_malignant_df = file_input.loc[(file_input['melanoma'] == 0.0) & (file_input["seborrheic_keratosis"] == 0.0)]
non_malignant_df

In [None]:
#filter out all images that are not melanoma
melanoma = file_input[file_input["melanoma"] == 1.0]

#remove column for keratosis
melanoma_df = melanoma.drop(["seborrheic_keratosis"], axis = 1)
melanoma_df

In [None]:
#number of images left
melanoma.shape

In [None]:
#read file features
file_features_df = pd.read_csv("../features/features.csv")
file_features_df.shape

#150 rows, 3 columns

In [None]:
#data frame for features
file_features_df

In [None]:
#rename column 'id' to 'image_id'
file_features_df.rename(columns={'id': 'image_id'}, inplace=True)

#merge data frame for file_features_df with melanoma by column 'image_id'
merge_feature_melanoma = file_features_df.merge(melanoma_df, on='image_id', how='right')

In [None]:
#data frame for merged features and melanoma
#this data frame shows only data related to melanoma 

merge_feature_melanoma

In [None]:
# size of data frame
merge_feature_melanoma.shape

In [None]:
merge_feature_non_malignant_df = file_features_df.merge(non_malignant_df, on='image_id', how='right')
#Here we use the function isna() to check for missing values in the different columns
merge_feature_non_malignant_df.isna().any()

## Final DF

In [None]:
#Export features from images
data = pd.DataFrame()
count = 0
rows = []
for image_id in all_feature_merge['image_id']:
    print(image_id,count)
    
    image = plt.imread('../data/example_image/'+image_id+'.jpg')
    image_seg = plt.imread('../data/example_segmentation/'+image_id+'_segmentation.png')
    #data['image_id'] = image_id
    real_area = int(all_feature_merge[all_feature_merge['image_id'] == image_id]['area'])
    real_perimeter = int(all_feature_merge[all_feature_merge['image_id'] == image_id]['perimeter'])
    melanoma = int(all_feature_merge[all_feature_merge['image_id'] == image_id]['melanoma'])
    seborrheic_keratosis = int(all_feature_merge[all_feature_merge['image_id'] == image_id]['seborrheic_keratosis'])
    calculated_area = int(calculate_area(image_seg))
    calculated_perimeter = calculate_border(image_seg,2)
    imgCompactness = compactness(calculate_area(image_seg),calculate_border(image_seg,1))
    average_rgb = average_colour(image,image_seg)
    deviation_of_rgb = deviation_of_colour(image,image_seg)
    average_luminosity_img = average_luminosity(image,image_seg)
    deviation_luminosity_img = deviation_luminosity(image,image_seg)
    row = [image_id, real_area, real_perimeter,melanoma,seborrheic_keratosis,calculated_area,calculated_perimeter,imgCompactness,average_rgb,deviation_of_rgb,average_luminosity_img,deviation_luminosity_img]
    rows.append(row)
    count+= 1

data = pd.DataFrame(rows, columns = ['image_id','real_area','real_perimeter','melanoma','seborrheic_keratosis','calculated_area','calculated_perimeter','compactness','average_rgb','deviation_of_colour','average_luminosity','deviation_luminosity'])

In [None]:
# Here we use our functions area and border to calculate the compactness of the first image
area1 = calculate_area(melanoma_data[0][2])
border1 = calculate_border(melanoma_data[0][2],4)
compactness(area1,border1)

In [None]:


#Structural element, that we will use as a "brush" on our mask
struct_el = morphology.disk(4)

mask_eroded = morphology.binary_erosion(melanoma_data[0][2], struct_el)

# Show side by side

fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10, 5))
axes[0].imshow(melanoma_data[0][2][500:1200,1000:2000], cmap='gray')
#axes[1].imshow(mask_eroded, cmap='gray')
fig.tight_layout()

# Verify it's smaller
#print(area)
print(calculate_area(melanoma_data[0][2])-np.sum(mask_eroded))
border = melanoma_data[0][2]-mask_eroded
plt.imshow(border[500:1200,1000:2000])


In [None]:
def masking(image,mask):
    mask2 = mask == 0
    copy = image.copy()
    copy[mask2] = [0,0,0]
    return copy
test = masking(total_data[0][1],total_data[0][2])

In [None]:
mask2 = total_data[0][2] == 0
copy = total_data[0][1].copy()
copy[mask2] = [0,0,0]

In [None]:
plt.imshow(test)

In [None]:
image = test

In [None]:
modified_image = cv2.resize(image, (600, 400), interpolation = cv2.INTER_AREA)
modified_image = modified_image.reshape(modified_image.shape[0]*modified_image.shape[1], 3)

In [None]:
modified_image

In [None]:
number_of_colors = 5

In [None]:
clf = KMeans(n_clusters = number_of_colors)
labels = clf.fit_predict(modified_image)

In [None]:
counts = Counter(labels)

In [None]:
len(labels)

In [None]:
len(labels)/600

In [None]:
labels.resize(600,400)

In [None]:
labels

In [None]:
plt.imshow(labels)

In [None]:

center_colors = clf.cluster_centers_
# We get ordered colors by iterating through the keys
ordered_colors = [center_colors[i] for i in counts.keys()]
hex_colors = [RGB2HEX(ordered_colors[i]) for i in counts.keys()]
rgb_colors = [ordered_colors[i] for i in counts.keys()]

In [None]:
color_list = []
color_count = []
for elm in zip(counts.values(),hex_colors):
    if elm[1] == '#000000':
        None
    else:
        color_list.append(elm[1])
        color_count.append(elm[0])

In [None]:
plt.figure(figsize = (8, 6))
plt.pie(color_count, labels = color_list, colors = color_list);

In [None]:
for elm in zip(counts.values(),hex_colors):
    print(elm)

In [None]:
color_data = [list(elm) for elm in zip(counts.values(),hex_colors) ]

In [None]:
color_data

In [None]:
counter = 0
for elm in color_data:
    placeholder = ImageColor.getcolor(elm[1], "RGB")
    color_data[counter][1] = placeholder
    counter += 1

In [None]:
color_data

In [None]:


kat = []
for elm in color_data:
    test = []
    hejsa = ImageColor.getcolor(elm[1], "RGB")
    for i in range(3):
        placeholder = []
        for i in range(3):
            placeholder.append(hejsa)
        test.append(placeholder)
    kat.append(test)

In [None]:
random.seed(420)

In [None]:
img = mpimg.imread(r'C:\Users\tjupp\Desktop\ITU\Intro_programing\Exercise24\ex24pick.png',0)
data = img[:,:,:3]
plt.imshow(data)

In [None]:
data.shape

In [None]:
data = (total_data[0][1][600:1200,1300:2100])
plt.imshow(data)

In [None]:
img.shape

In [None]:
total_data[0][1][0][0]

In [None]:
def rgb2gray(rgb):

    r, g, b = rgb[:,:,0], rgb[:,:,1], rgb[:,:,2]
    gray = 0.2989 * r + 0.5870 * g + 0.1140 * b

    return gray

grey_image = rgb2gray(total_data[0][1])

In [None]:
def rgb2gray(rgb):
    return np.dot(rgb[...,:3], [0.2989, 0.5870, 0.1140])

gray = rgb2gray(im)
plt.imshow(gray, cmap='gray')

In [None]:
grey_image.shape

In [None]:
plt.imshow(grey_image)

In [None]:
kmeans = KMeans(n_clusters=5, random_state=0).fit(grey_image)

In [None]:
kmeans.labels_

In [None]:
amount_of_clusters = 5
n_dimensions = 3
def getcentroids(amount_of_clusters,n_dimensions,minpoint,maxpoint):
    centroids = []
    for i in range(amount_of_clusters):
        li = []
        for x in range(n_dimensions):
            li.append(random.randint(minpoint,maxpoint))
        centroids.append(li)
    return centroids

#centroids = getcentroids(amount_of_clusters,n_dimensions,0,255)
#print(centroids)

In [None]:
def distance(pointA,pointB):
    #This function calculates the distance between two points of n dimensions
    li = []
    for elm in zip(pointA, pointB):
        li.append((elm[0]-elm[1])**2)
    return math.sqrt(sum(li))

In [None]:

def linkpointtocentroid(data,centroids):
    #This labels all the pixels with the label closest to a centroid
    final_list = []
    for k in data:
        placeholder = []
        for i in k:
            label = 1
            max_val = float('inf')
            li = 0
            for elm in centroids: 
                if max_val > distance(i, elm):
                    max_val = distance(i,elm)
                    li = label
                label += 1
            placeholder.append(li)
        final_list.append(placeholder)
    final_list = np.array(final_list)
    return final_list
#linkpointtocentroid(data,centroids)

#labels = linkpointtocentroid(data,centroids)




#labels, points_linked_to_centroid = linkpointtocentroid(data,centroids)
#only uncomment line above once you get to step 3
#print(labels.shape)
#print(len(points_linked_to_centroid_dict))

#print(np.unique(labels))
#print(labels)
#plt.imshow(labels)

In [None]:
#labels.shape