Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from tensorflow.keras import models, layers 
from tensorflow.keras.callbacks import EarlyStopping

In [2]:
image_info = pd.read_csv('../processed_data/ISIC_2019_Training_GroundTruth_Processed_Balanced.csv')

In [3]:
image_info

Unnamed: 0,image,MEL,NV,BCC,AK,BKL,DF,VASC,SCC,UNK,Cat,y
0,ISIC_0000001,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,NV,0
1,ISIC_0000002,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,MEL,1
2,ISIC_0000003,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,NV,0
3,ISIC_0000004,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,MEL,1
4,ISIC_0000007,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,NV,0
...,...,...,...,...,...,...,...,...,...,...,...,...
24801,ISIC_0073153_flipped,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,AK,1
24802,ISIC_0073157_flipped,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,AK,1
24803,ISIC_0073198_flipped,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,AK,1
24804,ISIC_0073214_flipped,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,AK,1


In [4]:
def get_image(df):
    return np.asarray(Image.open((f"../processed_data/{df['Cat']}/{df['image']}.jpg")), dtype=np.float32)

In [5]:
image_info.shape

(24806, 12)

In [6]:
X = image_info[:1000].apply(get_image, axis=1)
X

0      [[[204.0, 204.0, 214.0], [204.0, 204.0, 214.0]...
1      [[[135.0, 146.0, 178.0], [135.0, 146.0, 178.0]...
2      [[[235.0, 235.0, 237.0], [235.0, 235.0, 237.0]...
3      [[[115.0, 115.0, 115.0], [115.0, 115.0, 115.0]...
4      [[[126.0, 135.0, 144.0], [128.0, 137.0, 146.0]...
                             ...                        
995    [[[188.0, 144.0, 117.0], [187.0, 143.0, 116.0]...
996    [[[205.0, 178.0, 171.0], [206.0, 179.0, 172.0]...
997    [[[199.0, 163.0, 149.0], [206.0, 170.0, 156.0]...
998    [[[157.0, 127.0, 125.0], [157.0, 129.0, 126.0]...
999    [[[144.0, 116.0, 102.0], [144.0, 116.0, 102.0]...
Length: 1000, dtype: object

In [7]:
X[0].shape

(400, 400, 3)

In [8]:
y = image_info['y'][:1000]
y

0      0
1      1
2      0
3      1
4      0
      ..
995    1
996    1
997    1
998    1
999    1
Name: y, Length: 1000, dtype: int64

In [9]:
from sklearn.model_selection import train_test_split   

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

Dummy model

In [11]:
def initialize_model():
    
    model = models.Sequential()

    model.add(layers.Conv2D(16, (3, 3), input_shape = (400,400,3), activation = 'relu', padding='same'))
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))

    model.add(layers.Conv2D(32, (3, 3), activation = 'relu', padding='same' ))
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))
    
    model.add(layers.Conv2D(64, (3, 3), activation = 'relu', padding='same'))
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))
    
    model.add(layers.Conv2D(64, (2, 2), activation = 'relu', padding='same'))
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))

    model.add(layers.Flatten())
    model.add(layers.Dense(units = 100, activation = 'relu'))
    model.add(layers.Dense(units = 1, activation = 'sigmoid'))

    model.compile(loss='binary_crossentropy',
                 optimizer='adam',
                 metrics =['accuracy'])   

    return model

In [12]:
model_dummy = initialize_model()
model_dummy.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 400, 400, 16)      448       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 200, 200, 16)     0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 200, 200, 32)      4640      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 100, 100, 32)     0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 100, 100, 64)      18496     
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 50, 50, 64)       0

In [13]:
X_train.shape

(700,)

In [14]:
np.stack(X_train).shape

(700, 400, 400, 3)

In [15]:
X_train.iloc[0].shape

(400, 400, 3)

In [16]:
es = EarlyStopping(patience=5, verbose=1)

history_dummy = model_dummy.fit(np.stack(X_train), y_train,
                    validation_split = 0.3,
                    batch_size = 16,
                    epochs=10,
                    callbacks=[es],
                    verbose = 1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
 6/31 [====>.........................] - ETA: 35s - loss: 0.6878 - accuracy: 0.6146

: 

: 

### Evaluate our CNN ###

print(model_dummy.evaluate(X_test, y_test, verbose=1))