<h1>Skin detector</h1>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image, ImageOps
from tensorflow.keras import backend as K
from keras.utils import np_utils
import os

TRAIN_PATH = '../datasets/skin_cancer/train'
TEST_PATH = '../datasets/skin_cancer/test'
#tensorflow = channel last
#theano = channel first
print(K.backend())
K.set_image_data_format(data_format='channels_last')

tensorflow


"The data consists of two folders with each <strong>1800</strong> pictures <strong>(224x224)</strong> of the two types of moles."</br>
<strong>The goal is to predict if a skin spot is either benign or malignant.</strong>

<h2>Building datasets</h2>

In [2]:
def create_array(train_test_path, benign_malign_path, width, height):
    complete_path = f'{train_test_path}/{benign_malign_path}/'
    X = np.empty(shape=(len(os.listdir(complete_path)), width, height)) #1440 images of 224x224 pixels in RGB but for computation reason we will use grayscale
    for i, image_file in enumerate(os.listdir(complete_path)):
        im = ImageOps.grayscale(Image.open(f'{complete_path}{image_file}')) #from (224, 224, 3) to (224, 224)
        X[i] = np.array(im)
    if benign_malign_path == 'benign':
        y = np.zeros(shape=X.shape[0])
    elif benign_malign_path == 'malignant':
        y = np.ones(shape=X.shape[0])
    else:
        print('benign_malign_path must be "benign" or "malign"')
    return X, y

def shuffle_in_unison(a, b):
    """
        Shuffle two array in the same order (see: https://stackoverflow.com/questions/4601373/better-way-to-shuffle-two-numpy-arrays-in-unison)
    """
    rng_state = np.random.get_state()
    np.random.shuffle(a)
    np.random.set_state(rng_state)
    np.random.shuffle(b)

In [3]:
X_train_benign, y_train_benign = create_array(train_test_path=TRAIN_PATH, benign_malign_path='benign', width=224, height=224)
X_train_malign, y_train_malign = create_array(train_test_path=TRAIN_PATH, benign_malign_path='malignant', width=224, height=224)

In [4]:
X_test_benign, y_test_benign = create_array(train_test_path=TEST_PATH, benign_malign_path='benign', width=224, height=224)
X_test_malign, y_test_malign = create_array(train_test_path=TEST_PATH, benign_malign_path='malignant', width=224, height=224)

In [5]:
print(X_train_benign.shape, y_train_benign.shape)
print(X_train_malign.shape, y_train_malign.shape)

X_train = np.concatenate((X_train_benign, X_train_malign), axis=0)
y_train = np.concatenate((y_train_benign, y_train_malign), axis=0)
print(X_train.shape, y_train.shape)

(1440, 224, 224) (1440,)
(1197, 224, 224) (1197,)
(2637, 224, 224) (2637,)


In [6]:
print(X_test_benign.shape, y_test_benign.shape)
print(X_test_malign.shape, y_test_malign.shape)

X_test = np.concatenate((X_test_benign, X_test_malign), axis=0)
y_test = np.concatenate((y_test_benign, y_test_malign), axis=0)
print(X_test.shape, y_test.shape)

(360, 224, 224) (360,)
(300, 224, 224) (300,)
(660, 224, 224) (660,)


In [7]:
shuffle_in_unison(X_train, y_train)
shuffle_in_unison(X_test, y_test)

<h2>Processing data</h2>

In [8]:
def resize(X, y):
    X = X[:, :, :, np.newaxis]
    y = np.reshape(y, newshape=(y.shape[0], 1))
    return X, y

def rescale(X, y):
    X = X.astype('float32')
    return X/255, np_utils.to_categorical(y)

<h3>Resizing :</h3>

In [9]:
#we need a n * (width, height, 1) shape (channels_last)
X_train, y_train = resize(X_train, y_train)
X_test, y_test = resize(X_test, y_test)

print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)

(2637, 224, 224, 1) (660, 224, 224, 1)
(2637, 1) (660, 1)


<h3>Rescaling :</h3>

In [None]:
X_train, y_train = rescale(X_train, y_train)

In [10]:
X_test, y_test = rescale(X_test, y_test)

<h2>Modelling :</h2>