# Project 2 - Medical Imaging 

## Imports

In [159]:
import os 
import re 

import numpy as np 
import pandas as pd 
from PIL import Image 
from matplotlib import pyplot as plt 
import seaborn as sns 

from skimage import morphology 
from skimage.transform import rotate 


from time import time 

## Global Variables

In [160]:
# Initial parameters for running the notebook 
PREPROCESS = True 
COMPUTE_FEATURES = True 

In [161]:
# Paths to directories used 

ROOT_DIR = "../"
DATA_DIR = ROOT_DIR + "data"
IM_DIR = DATA_DIR + "/ISIC-2017_Training_Data"
MASK_DIR = DATA_DIR + "/ISIC-2017_Training_Part1_GroundTruth"
DIAGNOSIS_PATH = DATA_DIR + "/ISIC-2017_Training_Part3_GroundTruth.csv"


In [162]:
# Load all filenames into a dictionary
FILENAMES = {}
FILENAMES['images'] = sorted([IM_DIR + "/" + i for i in list(os.walk(IM_DIR))[0][2]])
FILENAMES['masks'] = sorted([f"{MASK_DIR}/{i}" for i in list(os.walk(MASK_DIR))[0][2]])
FILENAMES['image_num'] = len(FILENAMES['images'])

## Helper functions 

In [163]:
def make_sides_even(image): 
    '''
    Function to make the numbers of columns and rows in 
    an image even. 
    Input: An image 
    Output: An image
    '''
    # Convert image to numpy array
    image = np.array(image)
    
    # Check if the number of rows is even 
    if image.shape[0] % 2 != 0: 
        # Delete the first row
        image = np.delete(image,0,axis = 0)
    
    # Check if the number of columns is even 
    if image.shape[1] % 2 != 0: 
        # Delete the first column
        image = np.delete(image,0,axis = 1)
    
    # Convert numpy array back to image 
    image = Image.fromarray(image)
    # Return the updated image 
    return image

In [164]:
def filter_and_crop_image(image_path,mask_path): 
    image = Image.open(image_path) 
    mask = Image.open(mask_path)
    
    image_crop = image.crop(mask.getbbox())
    mask_crop = mask.crop(mask.getbbox())
    
    image_crop = make_sides_even(image_crop)
    mask_crop = make_sides_even(mask_crop)

    tmp_image = Image.new("RGB",image_crop.size, 0)
    filtered_image = Image.composite(image_crop,tmp_image,mask_crop)
    
    return filtered_image, mask_crop, image_crop

## Preprocessing

In [165]:
start = time()
if PREPROCESS: 
    try: 
        os.makedirs(DATA_DIR + "/filtered_images")
        os.makedirs(DATA_DIR + "/filtered_masks")
        os.makedirs(DATA_DIR + "/cropped_images")

    except FileExistsError:
        print("Directories exist")
    except IsADirectoryError: 
        print("Directories exist")
    IMAGE_FILTER_DIR = DATA_DIR + "/filtered_images"
    MASK_FILTER_DIR = DATA_DIR + "/filtered_masks"
    IMAGE_CROP_DIR = DATA_DIR + "/cropped_images"
    for i in range(FILENAMES['image_num']):
        mask_path = FILENAMES['masks'][i]
        image_path = FILENAMES['images'][i]
        image, mask, crop = filter_and_crop_image(image_path,mask_path)
        image_name = image_path.split("/")[-1].split(".")[-2]
        mask_name = mask_path.split("/")[-1].split(".")[-2]
        image.save(IMAGE_FILTER_DIR + "/" + image_name + ".jpg")
        mask.save(MASK_FILTER_DIR + "/" + mask_name + ".png")
        crop.save(IMAGE_CROP_DIR + "/" + image_name + ".jpg")
        del mask_path, image_path, image, mask, crop, image_name, mask_name
    
    FILENAMES['filtered_images'] = sorted([f"{IMAGE_FILTER_DIR}/{i}" for i in list(os.walk(IMAGE_FILTER_DIR))[0][2]])
    FILENAMES['cropped_images'] = sorted([f"{IMAGE_CROP_DIR}/{i}" for i in list(os.walk(IMAGE_CROP_DIR))[0][2]])
    FILENAMES['cropped_masks'] = sorted([f"{MASK_FILTER_DIR}/{i}" for i in list(os.walk(MASK_FILTER_DIR))[0][2]])
else: 
    IMAGE_FILTER_DIR = DATA_DIR + "/filtered_images"
    MASK_FILTER_DIR = DATA_DIR + "/filtered_masks"
    IMAGE_CROP_DIR = DATA_DIR + "/cropped_images"
    FILENAMES['filtered_images'] = sorted([f"{IMAGE_FILTER_DIR}/{i}" for i in list(os.walk(IMAGE_FILTER_DIR))[0][2]])
    FILENAMES['cropped_images'] = sorted([f"{IMAGE_CROP_DIR}/{i}" for i in list(os.walk(IMAGE_CROP_DIR))[0][2]])
    FILENAMES['cropped_masks'] = sorted([f"{MASK_FILTER_DIR}/{i}" for i in list(os.walk(MASK_FILTER_DIR))[0][2]])

end = time()
print(f"The entire operation took {end - start:6.3f} seconds")

Directories exist
The entire operation took  1.609 seconds


## Feature Extraction Functions

In [166]:
## Find the area and perimeter of the mask 
def get_area_perimeter(mask): 
    '''
    Function which takes in a mask for a
    given image and returns the area and 
    perimeter of the mask. 
    '''

    mask = np.where(np.array(mask) ==255,1,0)
    area = np.sum(mask)
    mask_erosion = morphology.binary_erosion(mask,morphology.disk(1))
    perimeter = np.sum(mask - mask_erosion)
    return area, perimeter 

In [167]:
def get_compactness(mask): 
    '''
    Function which takes in a mask for a given 
    image, calls the get_area_perimeter function
    to get the area and perimeter, and returns a 
    compactness score based upon [CITATION NEEDED]. 
    '''
    area, perimeter = get_area_perimeter(mask) 
    compactness = perimeter ** 2 / (4 * np.pi * area)
    return compactness 

In [168]:
def get_asymmetry(mask, rotation=45): 
    '''
    Takes in a mask for a given image, rotates it 
    180 times by one degree, compares the left and 
    right half and returns an average asymmetry score.
    '''
    mask = np.array(mask)
    axes = 0 
    length, width = mask.shape 
    pad_size = int(max((length, width))/2)
    if pad_size % 2 != 0: 
        pad_size += 1 
    mask = np.pad(mask,pad_size)
    
    diffs = []

    while axes * rotation < 180:
        temp_mask = rotate(mask, axes * rotation)
        length_lesion = np.nonzero(np.sum(temp_mask, axis = 0))[0][-1] - np.nonzero(np.sum(temp_mask, axis = 0))[0][0]
        left_mask = temp_mask[0: width, 0:np.nonzero(np.sum(temp_mask, axis = 0))[0][0] + length_lesion//2]
        right_mask = temp_mask[0: width, np.nonzero(np.sum(temp_mask, axis = 0))[0][0] + length_lesion//2:length]
        diffs.append(np.abs(np.sum(left_mask)-np.sum(right_mask)))
        
        axes += 1
    
    diff = np.mean(diffs)
    
    return diff / np.sum(mask)



In [169]:
def get_average_luminance(image): 
    '''
    A function which takes in an image and
    returns the average luminance. 
    Input: A PIL Image 
    Output: Average luminance of the image
    '''
    grayscale = np.array(image.convert('L'))
    average_luminance = round(np.mean(grayscale[grayscale > 0]))
    return average_luminance

In [170]:
def get_luminance_variability(image,measure="variance"): 
    grayscale = np.array(image.convert('L'))
    if measure == 'variance': 
        return round(np.var(grayscale[grayscale > 0]))
    elif measure == "standard_deviation": 
        return round(np.std(grayscale[grayscale > 0]))
    else: 
        raise ValueError("Only 'variance' or 'standard_deviation' accepted.") 

In [171]:
def get_avg_color(image): 
    '''
    A function which takes in an image and returns 
    the average color of the image. 
    '''
    r, g, b = image.split()
    r = np.array(r)
    g = np.array(g)
    b = np.array(b)
    average_color = (
        round(np.mean(r[r > 0])), 
        round(np.mean(g[g > 0])), 
        round(np.mean(b[b > 0]))
    )
    return average_color

In [172]:
def get_color_variance(image,measure="variance"): 
    '''
    A function which takes in an image and 
    returns the variance of the color. 
    '''
    r,g,b = image.split()
    r = np.array(r)
    g = np.array(g)
    b = np.array(b)
    if measure == "variance": 
        rgb = (
            np.var(r[r>0]),
            np.var(g[g>0]),
            np.var(b[b>0])
        )
    elif measure == "standard_deviation": 
        rgb = (
            np.std(r[r>0]),
            np.std(g[g>0]),
            np.std(b[b>0])
        )
    else: 
        raise ValueError("Only 'variance' or 'standard_deviation' accepted.") 
    return np.mean(rgb)

## Feature Extraction 

In [174]:
if COMPUTE_FEATURES: 
    feature_dictionary = {
        "image_id": [],
        "area": [], 
        "perimeter": [],
        "compactness": [], 
        "asymmetry": [], 
        "luminance_average": [],
        "luminance_variance": [],
        "red_average": [],
        "green_average": [],
        "blue_average": [],
        "color_variance": [],
    }
    for i in range(FILENAMES['image_num']): 
        filtered_image_path = FILENAMES['filtered_images'][i]
        cropped_image_path = FILENAMES['cropped_images'][i]
        cropped_mask_path = FILENAMES['cropped_masks'][i]

        image_name = filtered_image_path.split("/")[-1].split(".")[-2]
        print(f"Currently working on {i} - Image id: {image_name}")
        filtered_image = Image.open(filtered_image_path)
        cropped_image = Image.open(cropped_image_path)
        cropped_mask = Image.open(cropped_mask_path)

        feature_dictionary['image_id'].append(image_name)
        area, perimeter = get_area_perimeter(cropped_mask)
        feature_dictionary['area'].append(area)
        feature_dictionary['perimeter'].append(perimeter)
        feature_dictionary['compactness'].append(get_compactness(cropped_mask))
        feature_dictionary['asymmetry'].append(get_asymmetry(cropped_mask))
        feature_dictionary['luminance_average'].append(get_average_luminance(filtered_image))
        feature_dictionary['luminance_variance'].append(get_luminance_variability(filtered_image))
        red, green, blue = get_avg_color(filtered_image)
        feature_dictionary['red_average'].append(red)
        feature_dictionary['green_average'].append(green)
        feature_dictionary['blue_average'].append(blue)
        feature_dictionary['color_variance'].append(get_color_variance(filtered_image))
    features = pd.DataFrame(feature_dictionary)
    features.to_csv(ROOT_DIR + "/features/feature_set.csv",sep=";",index=False)
else: 
    features = pd.read_csv(ROOT_DIR + "/features/feature_set.csv", sep=";", index=False)

    

Currently working on 0 - Image id: ISIC_0000000
Currently working on 1 - Image id: ISIC_0000001
Currently working on 2 - Image id: ISIC_0000002
Currently working on 3 - Image id: ISIC_0000003
Currently working on 4 - Image id: ISIC_0000004
Currently working on 5 - Image id: ISIC_0000006
Currently working on 6 - Image id: ISIC_0000007
Currently working on 7 - Image id: ISIC_0000008
Currently working on 8 - Image id: ISIC_0000009
Currently working on 9 - Image id: ISIC_0000010
Currently working on 10 - Image id: ISIC_0000011
Currently working on 11 - Image id: ISIC_0000012
Currently working on 12 - Image id: ISIC_0000013
Currently working on 13 - Image id: ISIC_0000014
Currently working on 14 - Image id: ISIC_0000015
Currently working on 15 - Image id: ISIC_0000016
Currently working on 16 - Image id: ISIC_0000017
Currently working on 17 - Image id: ISIC_0000018
Currently working on 18 - Image id: ISIC_0000019
Currently working on 19 - Image id: ISIC_0000020
Currently working on 20 - Imag

In [175]:
features[0:10]

Unnamed: 0,image_id,area,perimeter,compactness,asymmetry,luminance_average,luminance_variance,red_average,green_average,blue_average,color_variance
0,ISIC_0000000,364956,1959,0.836794,0.001191,79,1618,85,77,82,1649.854004
1,ISIC_0000001,53426,890,1.179825,0.001186,54,891,73,50,40,839.756753
2,ISIC_0000002,237402,2061,1.423842,0.001273,110,1191,126,105,106,1204.156624
3,ISIC_0000003,260708,1824,1.015513,0.000692,100,1499,130,93,69,1402.423134
4,ISIC_0000004,269476,1696,0.849419,0.000284,143,1571,183,125,150,1730.049722
5,ISIC_0000006,69443,1073,1.319353,0.000297,101,934,115,97,87,1038.776818
6,ISIC_0000007,67559,922,1.00131,0.000956,87,891,101,82,74,930.860672
7,ISIC_0000008,261530,1964,1.173685,0.000749,105,1551,141,95,71,1456.725603
8,ISIC_0000009,178245,1644,1.206636,0.001442,100,1052,129,93,72,980.315309
9,ISIC_0000010,62084,906,1.052124,0.000155,91,1208,110,88,72,1147.859299
