<h1> Melanoma classification </h1>

<h3> Imports </h3>

In [1]:
from keras.applications import VGG16
from keras import models, layers, optimizers, regularizers
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau , ModelCheckpoint, EarlyStopping
import matplotlib.pyplot as plt
from sklearn.metrics import recall_score
from pathlib import Path
import os, shutil
import numpy as np
from skimage import exposure,color,io,transform,filters,util
from keras import backend as K
import pandas as pd
import cv2

Using TensorFlow backend.


<h3> Data directories </h3>

In [2]:
data_dir = Path('D:/DATASETS/MELANOMA')

train_dir = data_dir / 'train'

test_dir = data_dir / 'test'

train_csv_dir = data_dir / 'train_csv'

test_csv_dir = data_dir / 'test_csv'

<h4> Structured data load and preprocessing </h4>

In [3]:
train = pd.read_csv(data_dir / 'train.csv')
test = pd.read_csv(data_dir / 'test.csv')

In [4]:
train = pd.read_csv(data_dir / 'train.csv')
test = pd.read_csv(data_dir / 'test.csv')

train.drop(['benign_malignant', 'patient_id', 'diagnosis'], axis=1, inplace=True)
test.drop('patient_id', axis=1, inplace=True)

train['anatom_site_general_challenge'].fillna('Not_especified', inplace=True)
test['anatom_site_general_challenge'].fillna('Not_especified', inplace=True)
train.dropna(inplace=True)
test.dropna(inplace=True)

train = pd.get_dummies(train, columns=['sex', 'anatom_site_general_challenge'])
test = pd.get_dummies(test, columns=['sex', 'anatom_site_general_challenge'])

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(train['age_approx'].values.reshape(-1,1))

train['age_approx'] = scaler.transform(train['age_approx'].values.reshape(-1,1))
test['age_approx'] = scaler.transform(test['age_approx'].values.reshape(-1,1))

<h4> Elimino imagenes con Nan </h4>

In [5]:
# for fname in os.listdir(train_dir / 'BENIGN'):
    
#     img_name = fname[:-4]
    
#     if img_name not in train.image_name.values:
        
#         os.remove(train_dir / 'BENIGN' / fname)

<h4> Separo las imagenes de entrenamiento según el target: benigno o maligno </h4>

In [6]:
# directories = [train_dir, test_dir]

# for direct in directories:
    
#     for fname in os.listdir(direct):
        
#         if fname not in ["BENIGN", "MALIGN"]:

#             src = direct / fname

#             if fname[:-4] in train[train["target"] == 0]["image_name"].values:

#                 dst = direct / 'BENIGN' / fname
#                 shutil.move(src,dst)

#             if fname[:-4] in train[train["target"] == 1]["image_name"].values:

#                 dst = direct / 'MALIGN' / fname
#                 shutil.move(src,dst)

In [7]:
train.head()

Unnamed: 0,image_name,age_approx,target,sex_female,sex_male,anatom_site_general_challenge_Not_especified,anatom_site_general_challenge_head/neck,anatom_site_general_challenge_lower extremity,anatom_site_general_challenge_oral/genital,anatom_site_general_challenge_palms/soles,anatom_site_general_challenge_torso,anatom_site_general_challenge_upper extremity
0,ISIC_2637011,-0.269122,0,0,1,0,1,0,0,0,0,0
1,ISIC_0015719,-0.269122,0,1,0,0,0,0,0,0,0,1
2,ISIC_0052212,0.078579,0,1,0,0,0,1,0,0,0,0
3,ISIC_0068279,-0.269122,0,1,0,0,1,0,0,0,0,0
4,ISIC_0074268,0.426281,0,1,0,0,0,0,0,0,0,1


<h4> Calculo los pesos </h4>

In [9]:
class_weights = {0:1, 1:56}

<h4> Creo el generador de datos </h4>

In [10]:
def mixed_data_generator(image_dir, csv_dir, batch_size):
    
    i = 0
    image_file_list = os.listdir(image_dir)
    
    while True:
        
        batch_x = {'images': list(), 'other_feats': list()}  # use a dict for multiple inputs
        batch_y = list()
        
        for b in range(batch_size):
            
            if i == len(image_file_list):
                i = 0
                random.shuffle(image_file_list)
                
            image_file_path = image_file_list[i]
            csv_file_path = os.path.join(csv_dir, os.path.basename(image_file_path).replace('.jpg', '.csv'))
            
            i += 1
            
            image = procesar_imagenes(cv2.imread(os.path.join(image_dir, os.path.basename(image_file_path))))
            image = cv2.resize(image, (128,128))
            csv_file = pd.read_csv(csv_file_path)
            
            batch_y.append(csv_file['target'].values)
            csv_file.drop('target', axis=1, inplace=True)
            other_feat = csv_file.values.reshape(-1,1)
            batch_x['images'].append(image)
            batch_x['other_feats'].append(other_feat)

        batch_x['images'] = np.array(batch_x['images'])
        batch_x['other_feats'] = np.array(batch_x['other_feats'])
        batch_y = np.array(batch_y)

        
        
        yield [batch_x['other_feats'].reshape(batch_size,10), batch_x['images']], batch_y.reshape(batch_size,1)

In [11]:
batch_size = 32
train_generator = mixed_data_generator(train_dir, train_csv_dir, batch_size)

<h4> Creo el modelo para datos estructurados </h4>

In [12]:
mlp_model = models.Sequential()

mlp_model.add(layers.Dense(32, kernel_initializer='glorot_normal', kernel_regularizer=regularizers.l2(0.001), input_shape=(10,)))
mlp_model.add(layers.BatchNormalization())
mlp_model.add(layers.Activation('relu'))

mlp_model.add(layers.Dense(16, kernel_initializer='glorot_normal', kernel_regularizer=regularizers.l2(0.001)))
mlp_model.add(layers.BatchNormalization())
mlp_model.add(layers.Activation('relu'))

mlp_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 32)                352       
_________________________________________________________________
batch_normalization_1 (Batch (None, 32)                128       
_________________________________________________________________
activation_1 (Activation)    (None, 32)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 16)                528       
_________________________________________________________________
batch_normalization_2 (Batch (None, 16)                64        
_________________________________________________________________
activation_2 (Activation)    (None, 16)                0         
Total params: 1,072
Trainable params: 976
Non-trainable params: 96
_____________________________________________________

<h4> Creo el modelo convolucional </h4>

In [13]:
conv_base = VGG16(weights='imagenet',
                  include_top=False,
                  input_shape=(128,128,3))
conv_base.summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 128, 128, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 128, 128, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 128, 128, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 64, 64, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 64, 64, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 64, 64, 128)       147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 32, 32, 128)       0     

In [14]:
conv_model = models.Sequential()

conv_model.add(conv_base)

conv_model.add(layers.Flatten())

conv_model.add(layers.Dense(512, kernel_initializer='glorot_normal', kernel_regularizer=regularizers.l2(0.001)))
conv_model.add(layers.BatchNormalization())
conv_model.add(layers.Activation('relu'))

conv_model.add(layers.Dropout(0.4))

conv_model.add(layers.Dense(256, kernel_initializer='glorot_normal', kernel_regularizer=regularizers.l2(0.001)))
conv_model.add(layers.BatchNormalization())
conv_model.add(layers.Activation('relu'))

conv_model.add(layers.Dropout(0.2))

conv_model.add(layers.Dense(128, kernel_initializer='glorot_normal'))
conv_model.add(layers.BatchNormalization())
conv_model.add(layers.Activation('relu'))

conv_base.trainable = False

conv_model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 4, 4, 512)         14714688  
_________________________________________________________________
flatten_1 (Flatten)          (None, 8192)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 512)               4194816   
_________________________________________________________________
batch_normalization_3 (Batch (None, 512)               2048      
_________________________________________________________________
activation_3 (Activation)    (None, 512)               0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 512)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 256)              

In [15]:
combinedInput = layers.Concatenate()([mlp_model.output, conv_model.output])

print(combinedInput.shape)

x = layers.Dense(256, kernel_initializer='glorot_normal', kernel_regularizer=regularizers.l2(0.001))(combinedInput)
x = layers.BatchNormalization()(x)
x = layers.Activation('relu')(x)

x = layers.Dropout(0.4)(x)

x = layers.Dense(128, kernel_initializer='glorot_normal', kernel_regularizer=regularizers.l2(0.001))(x)
x = layers.BatchNormalization()(x)
x = layers.Activation('relu')(x)

x = layers.Dropout(0.4)(x)

x = layers.Dense(64, kernel_initializer='glorot_normal', kernel_regularizer=regularizers.l2(0.001))(x)
x = layers.BatchNormalization()(x)
x = layers.Activation('relu')(x)

x = layers.Dense(1, kernel_initializer='glorot_normal', activation='sigmoid')(x)

(None, 144)


In [16]:
model = models.Model(inputs=[mlp_model.input, conv_model.input], outputs=x)

In [17]:
opt = optimizers.RMSprop(learning_rate=1e-5, rho=0.9)

lr_reduce = ReduceLROnPlateau(monitor='accuracy', factor=0.1, min_delta=0.001, patience=5, cooldown=3, verbose=1)

early_stop = EarlyStopping(monitor='accuracy', mode='max', patience=10, verbose=1)

filepath="melanoma_v1.hdf5"
checkpoint = ModelCheckpoint(filepath, monitor='accuracy', verbose=1, save_best_only=True, mode='max')

model.compile(loss='binary_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

In [None]:
%%time
model.fit_generator(train_generator,
                    class_weight=class_weights,
                    steps_per_epoch=1033,
                    epochs=5,
                    callbacks=[lr_reduce,checkpoint,early_stop])