<a href="https://colab.research.google.com/github/yasminsarkhosh/fyp2021p3/blob/main/FYP_project_3_introduction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Get some images and import packages

In [None]:
if True:      #A weird trick needed for Google Colab
  # Clone repository with example images 
  !rm -rf fyp2021p3
  !git clone https://github.com/vcheplygina/fyp2021p3.git





# Other useful packages might be skimage or PIL


In [None]:
import matplotlib.pyplot as plt
import numpy as np

***

### READING IN FILE: EXAMPLE_GROUND_TRUTH.CSV

In [None]:
import pandas as pd
import os

#read file
file_input = pd.read_csv("../data/example_ground_truth.csv")
file_input.shape

#150 images in total
#3 columns: image_id, melanoma, keratosis


### DATA FRAME FOR EXAMPLE_GROUND_TRUTH FILE

In [None]:
file_input


***

### DATA FRAME FOR IMAGES WITH NO DIAGNOSIS, MEANING MELANOMA AND KERATOSIS == 0.0

In [None]:
# data frame for images with no diagnosis
non_malignant_df = file_input.loc[(file_input['melanoma'] == 0.0) & (file_input["seborrheic_keratosis"] == 0.0)]
non_malignant_df


## DATA FRAME FOR IMAGES WITH MELANOMA

In [None]:
#filter out all images that are not melanoma
melanoma = file_input[file_input["melanoma"] == 1.0]

#remove column for keratosis
melanoma_df = melanoma.drop(["seborrheic_keratosis"], axis = 1)
melanoma_df

In [None]:
#number of images left
melanoma.shape

### READING IN FILE FOR FEATURES.CSV

In [None]:
#read file features
file_features_df = pd.read_csv("../features/features.csv")
file_features_df.shape

#150 rows, 3 columns

## DATA FRAME FOR FEATURES

In [None]:
#data frame for features
file_features_df

### Merge data frame for features with data frame for melanoma to filter out non-related images

In [None]:
#rename column 'id' to 'image_id'
file_features_df.rename(columns={'id': 'image_id'}, inplace=True)

#merge data frame for file_features_df with melanoma by column 'image_id'
merge_feature_melanoma = file_features_df.merge(melanoma_df, on='image_id', how='right')

## DATA FRAME FOR FREATURE AND MELANOMA IMAGES

In [None]:
#data frame for merged features and melanoma
#this data frame shows only data related to melanoma 

merge_feature_melanoma

In [None]:
# size of data frame
merge_feature_melanoma.shape

## DATA FRAME FOR FEATURE AND NO DIAGNOSIS IMAGES

In [None]:
merge_feature_non_malignant_df = file_features_df.merge(non_malignant_df, on='image_id', how='right')
merge_feature_non_malignant_df

***

# Explore an image from the dataset

In [None]:
# Load an image and display it

im = plt.imread('../data/example_image/ISIC_0012099.jpg')
plt.imshow(im)

***

# Function for basic properties

### Basic Properties of an Image

In [None]:
def prob_img(image):
    print('Type of the image : ' , type(image)) 
    print('Shape of the image : {}'.format(image.shape)) 
    print('Image Hight {}'.format(image.shape[0])) 
    print('Image Width {}'.format(image.shape[1])) 
    print('Dimension of Image {}'.format(image.ndim)) #three layers: Red, Green, Blue

prob_img(im)

In [None]:
def RGB_max_min(image):
    print('Image size {}'.format(image.size)) 
    print('Maximum RGB value in this image {}'.format(image.max())) 
    print('Minimum RGB value in this image {}'.format(image.min()))

RGB_max_min(im)

### Subplots


In [None]:
fig, ax = plt.subplots(nrows = 1, ncols=3, figsize=(15,5))  
for c, ax in zip(range(3), ax):     
     # create zero matrix        
     split_img = np.zeros(im.shape, dtype="uint8") 
     # 'dtype' by default: 'numpy.float64'  # assing each channel      
     split_img[ :, :, c] = im[ :, :, c] # display each channel     
     ax.imshow(split_img)

***


## Erosion

_All the pixels near boundary will be discarded depending upon the size of kernel. Useful for removing small white noises_

In [None]:
import cv2 


In [None]:
img = cv2.imread('../data/example_image/ISIC_0012099.jpg',0)
kernel = np.ones((5,5),np.uint8)
erosion = cv2.erode(img,kernel,iterations = 1)


plt.imshow(erosion)

## Dilation

_Opposite of erosion. Increases the white region in the image after erosion removes the white noises, as it also skrinks our objects. Thus, we dilate it. Also useful in joining broken parts of an object together_

In [None]:
dilation = cv2.dilate(img, kernel, iterations = 1)
plt.imshow(dilation)

## Opening

_Also known as erosion followed by dilation_

In [None]:
opening = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)
plt.imshow(opening)

## Closing

_Reverse of Opening. This, dilation followed by erosion. Useful in closing small holes inside the foreground objects or small black points of the object_

In [None]:
closing = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
plt.imshow(closing)

## Morphological Gradient

_The difference between dilation and erosion of an image. The result will look like the outline of the object_

In [None]:
gradient = cv2.morphologyEx(img, cv2.MORPH_GRADIENT, kernel)
plt.imshow(gradient)

***

In [None]:
#A color image is a array with 3 dimensions (x, y, R-G-B color channels) of integers

print(im.shape)
print(im.dtype)

#Other packages might wrap the image in a different class - you are allowed to use those if you want



In [None]:
#3-dimension array of the data. 
#All of the data is the image: 
# each matrix block is a row of data, and each element within that is the pixel values in RGB-A (Red Green Blue Alpha)

print(np.array(im))

In [None]:

# Get a single RGB value from the blue circle (marker used by dermatologist)
print(im[1500,2000,:])

# Show only the red channel
plt.imshow(im[:,:,0], cmap='gray')



In [None]:
# Display only a part of the image

im_part = im[60:120,130:220,:]
plt.imshow(im_part)



In [None]:
# Modify the image by setting some pixels to black

im_copy = im_part.copy()


im_copy[0:10,0:10,:] = np.tile(0, [10, 10, 3])
plt.imshow(im_copy)


# Explore the segmentation mask

In [None]:
# Load the mask and display it

mask = plt.imread('../data/example_segmentation/ISIC_0012099_segmentation.png')
plt.imshow(mask, cmap='gray')
    



In [None]:
# Show image and mask side by side

fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(8, 4))
axes[0].imshow(im)
axes[1].imshow(mask, cmap='gray')
fig.tight_layout()

In [None]:
# Show the images overlayed, for this we can use PIL 

#!pip install pillow 
from PIL import Image 

# Load images as Image objects  
img1 = Image.open('../data/example_image/ISIC_0012099.jpg') 
img2 = Image.open('../data/example_segmentation/ISIC_0012099_segmentation.png') 
  
# Overlay - more options such as transparency should be available here  
img2.paste(img1, (0,0), mask = img2) 
  
# Display 
img2.show()  # This doesn't actually display an image in Google Colab :(
plt.imshow(img2, cmap='gray')


# Note that this is a single channel image
print(img2.size)          

In [None]:
# Alternative: replace the non-lesion pixels

img1 = im.copy()
img1[mask==0] = 0
  
# Display 
plt.imshow(img1)

# You can use any package you prefer, but beware you might need to convert between formats

In [None]:
def rgb2gray(rgb):

    r, g, b = rgb[:,:,0], rgb[:,:,1], rgb[:,:,2]
    gray = 0.2989 * r + 0.5870 * g + 0.1140 * b

    return gray

img1 = plt.imread('../data/example_image/ISIC_0012099.jpg')
gray = rgb2gray(img1)

plt.imshow(gray, cmap='gray')

In [None]:
#plt.hist(gray)

In [None]:
img2 = gray < 120
plt.imshow(img2, cmap='gray')

# Measurements and masks

In [None]:
im = plt.imread('../data/example_image/ISIC_0012099.jpg')

mask=plt.imread('../data/example_segmentation/ISIC_0012099_segmentation.png')


In [None]:
# total size of the image

total = mask.shape[0] * mask.shape[1]
print("total size of the image is ", total)


In [None]:
# size of mask only: sum of all pixel values in the mask

area = np.sum(mask)
print("size of area is", area)

In [None]:
# as percentage

print(area/total*100)
plt.imshow(mask)

## Measurement: width/height

In [None]:
pixels_in_col = np.max(np.sum(mask, axis=0))
pixels_in_row = np.max(np.sum(mask, axis=1))
print("Number of pixels in column is, also known as width \n",pixels_in_col, '\n')

print("Number of pixels in row is, also known as height \n",pixels_in_row)

## Measurement: diameter at an angle

In [None]:
from skimage import transform

rot_im = transform.rotate(mask, 30)
plt.imshow(rot_im, cmap='gray')

# Find perimeter using morphology

_perimeter is the sum of pixels on the border_

In [None]:
from skimage import morphology

#Structural element, that we will use as a "brush" on our mask
struct_el = morphology.disk(20)

print(struct_el)

In [None]:
mask_eroded = morphology.binary_erosion(mask, struct_el)

# Show side by side

fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(8, 5))
axes[0].imshow(mask, cmap='gray')
axes[1].imshow(mask_eroded, cmap='gray')
fig.tight_layout()

# Verify it's smaller
print(area)
print(np.sum(mask_eroded))

In [None]:
# Subtract the two masks from each other to get the border/perimeter

image_perimeter = mask - mask_eroded

plt.imshow(image_perimeter, cmap='gray') #The perimeter is very thin so it might be difficult to see on the screen

#What is the length? 
print('The perimeter or border of the area is', np.sum(image_perimeter))

# Creating your own mask

In [None]:
# Work with color image as grayscale

def rgb2gray(rgb):
    return np.dot(rgb[...,:3], [0.2989, 0.5870, 0.1140])

gray = rgb2gray(im)
plt.imshow(gray, cmap='gray')



In [None]:
# Let's get rid of the marker
gray2 = gray[0:1400,:]
plt.imshow(gray2, cmap='gray')

mask2 = mask[0:1500,:]


In [None]:
#Look at intensities of image
#plt.hist(gray2)


In [None]:

# Threshold
mymask = gray2 < 120  #Pixels with lower intensities will be equal to 1 in the mask
plt.imshow(mymask, cmap='gray')

In [None]:
# There is some noise, we can get rid of it by morphological operators

from skimage.morphology import opening

# Opening = first EROSION, then DILATION 

# Erosion will get rid of hairs but also make the lesion smaller. 
# Dilation will restore the lesion (but not the hairs)

struct_el = morphology.disk(5)
opened = opening(mask2, struct_el)

plt.imshow(opened, cmap='gray')

# Filtering

In [None]:
# Gaussian filtering (blur)

from skimage import filters

blurred = filters.gaussian(mask,sigma=10)

plt.imshow(blurred, cmap='gray')

#What kind of values are in the image now?



In [None]:
# Threshold again

mask2 = blurred > 0.5
plt.imshow(mask2, cmap='gray')


In [None]:
# Blur color image - this could be useful for measuring color (variability)

blurred = filters.gaussian(im,sigma=25)

plt.imshow(blurred)


# General purpose features

In [None]:
# Many examples in https://scikit-image.org/docs/dev/api/skimage.feature.html 

# Crop image first

im2 = im[700:1150,1250:1700,:]
mask2 = mask[700:1150,1250:1700]


plt.imshow(im2)


In [None]:
# Gaussian features recently available (might need to update version)

# Example segmentation for microscopy image: https://scikit-image.org/docs/dev/auto_examples/segmentation/plot_trainable_segmentation.html#sphx-glr-auto-examples-segmentation-plot-trainable-segmentation-py 

!pip install scikit-image==0.18.0

In [None]:


from skimage import feature
from functools import partial 

#Extract feature images
feat_im = feature.multiscale_basic_features(im2, multichannel=True, intensity=False, edges=False, texture=True)
print(feat_im.shape)


In [None]:
plt.imshow(feat_im[:,:,3], cmap='gray')


In [None]:
# We measured X features for every pixel in the image - this is good for segmentation, but not image classification yet

# For classification we need to aggregate the outputs for each feature type into one vector

feat_vec, bin_edges = np.histogram(feat_im[:,:,8], bins=16)

plt.bar(np.arange(0,16), feat_vec)
print(feat_vec)

In [None]:
# Determine bins based on intensities instead... 
# plt.hist(feat_im[:,:,8], bins='auto')     # Very slow for large images

flat_im = np.ndarray.flatten(feat_im[:,:,8])
flat_mask = np.ndarray.flatten(mask2)

# Only pixels inside the mask
flat_im = flat_im[flat_mask==1]

quantile_bins = np.quantile(flat_im, np.arange(0,1,0.1))

# Bins have different widths
print(quantile_bins)

#feat_vec, bin_edges = np.histogram(flat_im, bins=quantile_bins)
print(feat_vec)

In [None]:
# Note that bins should be the same across images (for a particular feature). 

# Define bins once on "representative image" (how?), then use for all images

***

In [None]:
"""
Main script for the FYP 2021 project 3
"""

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from skimage import morphology
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import accuracy_score




file_data = 'data/example_ground_truth.csv'
path_image = 'data/example_image'
path_mask = 'data/example_segmentation'

file_features = 'features/features.csv'






***

In [None]:
import cv2

im_test = cv2.imread('../data/example_image/ISIC_0012099.jpg',1)
#plt.imshow(im_test)

In [None]:
dimensions = im_test.shape
print(dimensions)

In [None]:
# Defining the width and height
h=2000
w=3008
# Definig aspect ratio of a resized image
ratio = 500.0 / w
# Dimensions of a resized image
dim = (500, int(h * ratio))
# We have obtained a new image that we call resized3
resized_2 = cv2.resize(im_test, dim)
plt.imshow(resized_2)

In [None]:
height, width = im_test.shape[:2]
# Negative values of tx will shift the image to the left
# Positive values will shift the image to the right
# Negative values of ty will shift the image up
# Positive values will shift the image down
M = np.float32([[1, 0, 100], [0, 1, 50]])
translated = cv2.warpAffine(im_test, M, (width, height))
plt.imshow(translated)

In [None]:
im_test = cv2.imread('../data/example_segmentation/ISIC_0012099_segmentation.png',1)
img = cv2.imread('../data/example_image/ISIC_0012099.jpg',1)

# Crop image first


mask_crop = im_test[700:1150,1250:1700]
img_crop = img[700:1150,1250:1700]


plt.imshow(img_crop)

In [None]:
# Flipping the image around y-axis (horizontal flipping)
flipped_y = cv2.flip(img_crop, 1)
plt.imshow(flipped_y)

In [None]:
# Flipping the image around x-axis (vertical flipping)
flipped_x = cv2.flip(img_crop, 0)
plt.imshow(flipped_x)

In [None]:
# Flipping the image around both axes
flipped_both = cv2.flip(img_crop, -1)
plt.imshow(flipped_both)

In [None]:
#def hist_plt(mask):
   # hist = plt.hist(mask)
   # return hist


#hist_orignal_img = hist_plt(original_img)
#hist_flip_y = hist_plt(mask_flipped_y)
#hist_flip_x = hist_plt(mask_flipped_x)
#hist_flip_both = hist_plt(mask_flipped_both)

In [None]:

#cropped version
input_img = img_crop #original image
mask_img  = mask_crop #segmentation image

# select only masked area below
masked = input_img.copy()
masked[mask_img == 0 ] = 0

fig, axes = plt.subplots(1, 3, figsize=(16, 12))
ax = axes.flatten()

ax[0].imshow(input_img, cmap="gray")
ax[0].set_axis_off()
ax[0].set_title("Original Imput Image", fontsize=12, c = 'w')

ax[1].imshow(mask_img, cmap="gray")
ax[1].set_axis_off()
ax[1].set_title("Segmentation Mask", fontsize=12, c = 'w')

ax[2].imshow(masked, cmap="gray")
ax[2].set_axis_off()
ax[2].set_title("Masked Image", fontsize=12, c = 'w')

plt.show()



# TASK 1: ABC FEATURES - ASYMMETRY SHAPE

In [None]:
# same images flipped vertically and horizontally 

original_img = rgb2gray(input_img)
mask_flipped_y = rgb2gray(flipped_y)
mask_flipped_x = rgb2gray(flipped_x)
mask_flipped_both  = rgb2gray(flipped_both)

fig, axes = plt.subplots(nrows=1, ncols=4, figsize=(8, 10))
axes[0].imshow(original_img, cmap='gray')
axes[1].imshow(mask_flipped_y, cmap='gray')
axes[2].imshow(mask_flipped_x, cmap='gray')
axes[3].imshow(mask_flipped_both, cmap='gray')
fig.tight_layout()


In [None]:
# Threshold: for customizing our own masks for each image
def threshold_mask(mask):
    custom_mask = mask < 125 #from plt.hist function
    return custom_mask

cust_mask_orignal = threshold_mask(original_img)
cust_mask_y = threshold_mask(mask_flipped_y)
cust_mask_x = threshold_mask(mask_flipped_x)
cust_mask_both = threshold_mask(mask_flipped_both)


# subplot of same flipped masked image
fig, axes = plt.subplots(nrows=1, ncols=4, figsize=(8, 10))
axes[0].imshow(cust_mask_orignal, cmap='gray')
axes[1].imshow(cust_mask_y, cmap='gray')
axes[2].imshow(cust_mask_x, cmap='gray')
axes[3].imshow(cust_mask_both, cmap='gray')
fig.tight_layout()


## Find center points of each segmentation masked image
_ code by Gino Franco Fazzi_

In [None]:
def centerpoint(mask):
    borders = np.where(mask == 1) # This will return 2 arrays with the index where the pixels are ones
    up, down, left, right = max(borders[0]), min(borders[0]), min(borders[1]), max(borders[1])
    center = ((up+down) //2, (left + right) //2) # Tuple with the coordinates for the center of the lesion
    return center

#print(borders)
#print(up, down, left, right)
#print(center)

center_original = centerpoint(cust_mask_orignal)

#center_flip_y = centerpoint(mask_flipped_y)
#center_flip_x = centerpoint(mask_flipped_x)
#center_flip_both = centerpoint(mask_flipped_both)

print('Center point coordinates of masked image is:', center_original)

#print('Coordinates for the center:', center_flip_y)
#print('Coordinates for the center:', center_flip_x)
#print('Coordinates for the center:', center_flip_both)

***

## Original image

In [None]:
cust_mask_orignal[:,center_original[0]:]
plt.imshow(cust_mask_orignal)

## Subplot: Left part of center line and right part mirrored over center line

In [None]:

# Left part of center line
left = cust_mask_orignal[:,0:center_original[0]+1]

# right part mirrored over the center line
right_mirrored = np.fliplr(cust_mask_orignal)[:,0:center_original[0]+1]

fig, axes = plt.subplots(1, 2, figsize=(6, 8))
ax = axes.flatten()


ax[0].imshow(left, cmap="gray")
ax[0].set_axis_off()
ax[0].set_title("Left part of center line", fontsize=12, c = 'w')

ax[1].imshow(right_mirrored, cmap="gray")
ax[1].set_axis_off()
ax[1].set_title("Right part mirrored over center line", fontsize=12, c = 'w')

fig.tight_layout()

## Subplot: Right part of center line and left part mirrored over center line

In [None]:
# Right part of the center line
right = cust_mask_orignal[:,center_original[0]:]

# Left part mirrored over the center line
left_mirrored = np.fliplr(cust_mask_orignal)[:,center_original[0]:]

fig, axes = plt.subplots(1, 2, figsize=(6, 8))
ax = axes.flatten()

ax[0].imshow(right, cmap="gray")
ax[0].set_axis_off()
ax[0].set_title("Right part of center line", fontsize=12, c = 'w')

ax[1].imshow(left_mirrored, cmap="gray")
ax[1].set_axis_off()
ax[1].set_title("Left part mirrored over center line", fontsize=12, c = 'w')


fig.tight_layout()

## Symmetric area of: 

_ 
a) the right and left mirrored part of the center line and 
b) the left and right mirrored part of the center line 
_

In [None]:
# a)
def find_right_left_mirrored(right, left_mirrored):
    right_left_mirrored = right + left_mirrored
    symmetric_area_right = np.count_nonzero(right_left_mirrored)
    return right_left_mirrored, symmetric_area_right

right_left_mirrored, symmetric_area_right = find_right_left_mirrored(right, left_mirrored)
print("Symmetric area for right and left mirrored parts: \n", symmetric_area_right)

# b)
def find_left_right_mirrored(left, right_mirrored):
    left_right_mirrored = left + right_mirrored
    symmetric_area_left = np.count_nonzero(left_right_mirrored)
    return left_right_mirrored, symmetric_area_left

left_right_mirrored, symmetric_area_left = find_left_right_mirrored(left, right_mirrored)

print("\nSymmetric area for left and right mirrored parts: \n", symmetric_area_left)


print("\nTotal size of image: \n", cust_mask_orignal.size)


## Subplots of a) and b): visualization of overlapping and non-overlapping parts of images

In [None]:
def subplot_A_featur(left_right_mirrored, right_left_mirrored):

    fig, axes = plt.subplots(1, 2, figsize=(14, 12))
    ax = axes.flatten()

    ax[0].imshow(left_right_mirrored)
    ax[0].set_axis_off()
    ax[0].set_title("Symmetry: Left of center", fontsize=12, c = 'w')

    ax[1].imshow(right_left_mirrored)
    ax[1].set_axis_off()
    ax[1].set_title("Symmetry: Right of center ", fontsize=12, c = 'w')

    return 

subplot_asymmetry(left_right_mirrored, right_left_mirrored)
# Left part plus the right part - see below
# The yellow part of the figure are where the two parts intersect (overlapping)
# The green part is where the figures are non-overlapping

## Size of area: accordingly to center line

_We will here look at each part: right, left, right_mirrored and left_mirrored parts of the segmentation mask image, and find size of each part seperately_

In [None]:
# size of mask only: sum of all pixel values in the mask

def size_mask_centerline(right, left, right_mirrored, left_mirrored):
    area_right = np.sum(right)
    #print(f'Size of masked area left of center line is {area_right: 22d}')

    area_left = np.sum(left)
    #print(f'Size of masked area left of center line is {area_left: 22d}')

    area_right_mirrored = np.sum(right_mirrored)
    #print(f'Size of masked area right mirrored over center line is {area_right_mirrored: 10d}')

    area_left_mirrored = np.sum(left_mirrored)
    #print(f'Size of masked area left mirrored over center line is  {area_left_mirrored: 10d}')
    return area_right, area_left, area_right_mirrored, area_left_mirrored
    

size_mask_values = list(size_mask_centerline(right, left, right_mirrored, left_mirrored))


#list of names for the different parts of the center line
name_key = ['right', 'left', 'right_mirrored', 'left_mirrored']

# dictionary where key is the name of the part and value is an integer of mask size
res = {name_key[i]: size_mask_values[i] for i in range(len(name_key))}

# display name and value of each part in the mask
for key in res.items():
    print('\nSize of masked area over center line is:\n', key)

    


