In [1]:
import tensorflow as tf
import numpy as np

import pandas as pd
import cv2
import os
import math
import scipy as sp
import PIL

# Tensorflow
from tensorflow.keras import models, layers, Model
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import Dense, Dropout, Flatten, GlobalAveragePooling2D
from tensorflow.keras.layers import Flatten, Dense, Dropout, ZeroPadding2D

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard, ReduceLROnPlateau
from tensorflow.keras import optimizers
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications import EfficientNetB4, EfficientNetB6, ResNet50V2
#from keras_tuner.tuners import RandomSearch

import scikitplot as skplt
from sklearn.metrics import roc_auc_score
from matplotlib import pyplot as plt
import matplotlib.pyplot as plt

In [2]:
# batch_size: 한번에 forward & Backword 하는 샘플의 수
batch_size = 12

# Training 수
epochs = 50

# Weight 조절 parameter
LearningRate = 1e-3 # 0.001
Decay = 1e-6

img_width = 224
img_height = 224

# Data Information in DataFrame

In [3]:
# 디렉토리 경로 설정 필요
CurrentDirectory = "./"

TRAIN_DF = pd.read_csv("TRAIN.csv")
TEST_DF  = pd.read_csv("TEST.csv" )

In [4]:
TRAIN_DF['sex'] = pd.get_dummies(TRAIN_DF['sex'])['female'].astype('float')
TEST_DF ['sex'] = pd.get_dummies( TEST_DF['sex'])['female'].astype('float')

In [5]:
TRAIN_DF.head()

Unnamed: 0,filename,lesion_id,image_id,dx,dx_type,age,sex,localization
0,akiec/ISIC_0024372.jpg,HAM_0005389,ISIC_0024372,akiec,histo,70,0.0,lower extremity
1,akiec/ISIC_0024418.jpg,HAM_0003380,ISIC_0024418,akiec,histo,75,1.0,lower extremity
2,akiec/ISIC_0024450.jpg,HAM_0005505,ISIC_0024450,akiec,histo,50,0.0,upper extremity
3,akiec/ISIC_0024463.jpg,HAM_0004568,ISIC_0024463,akiec,histo,50,0.0,upper extremity
4,akiec/ISIC_0024468.jpg,HAM_0006301,ISIC_0024468,akiec,histo,75,0.0,neck


In [6]:
TEST_DF.head()

Unnamed: 0,filename,lesion_id,image_id,dx,dx_type,age,sex,localization
0,akiec/ISIC_0024329.jpg,HAM_0002954,ISIC_0024329,akiec,histo,75,1.0,lower extremity
1,akiec/ISIC_0024707.jpg,HAM_0005448,ISIC_0024707,akiec,histo,60,0.0,face
2,akiec/ISIC_0024710.jpg,HAM_0004609,ISIC_0024710,akiec,histo,75,0.0,back
3,akiec/ISIC_0025247.jpg,HAM_0005231,ISIC_0025247,akiec,histo,70,0.0,lower extremity
4,akiec/ISIC_0025368.jpg,HAM_0004472,ISIC_0025368,akiec,histo,40,1.0,face


# Custom Generator with flow from directory

In [7]:
class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, Generator, dataFrame, directory, x_col, y_col, target_size, batch_size, shuffle, class_mode, subset=None):
        self.GeneratorObject = Generator.flow_from_dataframe(
                dataframe=dataFrame,
                directory = directory,
                x_col=x_col,
                y_col=y_col,
                target_size = target_size,
                batch_size = batch_size,
                shuffle = False,
                class_mode = class_mode,
                subset = subset)
        self.n = self.GeneratorObject.n
        self.dataframe = dataFrame
        self.batch_size = batch_size
        self.classes = self.GeneratorObject.classes
        self.filenames = self.GeneratorObject.filenames
        # shuffle 코드 추가
        
    def __len__(self):
        return self.GeneratorObject.__len__()
    
    def getBatchIndex(self):
        return self.GeneratorObject.batch_index

    def __getitem__(self, index):
        Images, Labels = self.GeneratorObject.__getitem__(index)
        curBatchIDX = index * batch_size
        X = self.dataframe.iloc[curBatchIDX : (curBatchIDX + Images.shape[0]), : ]
        return [Images, np.array(X[['age', 'sex']])], Labels  ## 이미지와 임상변수를 함께 return 해준다
    
    def next(self):
        curBatchIDX = self.GeneratorObject.batch_index * self.batch_size  # 한 번에 batch_size만큼 불러오기 때문에 곱해준다.
        Images, Labels = self.GeneratorObject.next()
        X = self.dataframe.iloc[curBatchIDX : (curBatchIDX + Images.shape[0]), : ]  # 마지막 batch에 남아있는 수 만큼 불러온다
        return [Images, X[['age', 'sex']]], Labels   ## 이미지와 임상변수를 함께 return 해준다
    
    def on_epoch_end(self):
        return self.GeneratorObject.on_epoch_end

In [8]:
# Online-augmentation 적용 Generator
# 1. 이미지를 전부다 불러서 램 (메모리)에 올릴 수 없기 때문
# 2. 이미지는 Augmentation을 해주는게 좋아서

DATAGEN_TRAIN = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    data_format="channels_last",
    validation_split=0.10) # Train / Validation

DATAGEN_TEST = ImageDataGenerator(
    rescale=1./255,
    data_format="channels_last",)

In [13]:
TRAIN_GENERATOR = DataGenerator(DATAGEN_TRAIN,
    dataFrame=TRAIN_DF,
    directory='./SkinCancer/TRAIN',
    target_size = (224, 224),
    batch_size = 12,
    x_col="filename",
    y_col="dx",
    class_mode='categorical',
    shuffle = True,
    subset='training')

Found 719 validated image filenames belonging to 3 classes.


In [14]:
VALID_GENERATOR = DataGenerator(DATAGEN_TRAIN,
    dataFrame=TRAIN_DF,
    directory='./SkinCancer/TRAIN',
    target_size = (224, 224),
    batch_size = 12,
    x_col="filename",
    y_col="dx",
    class_mode='categorical',
    shuffle = True,
    subset='validation')

Found 79 validated image filenames belonging to 3 classes.


In [15]:
TEST_GENERATOR = DataGenerator(DATAGEN_TEST,
    dataFrame=TEST_DF,
    directory='./SkinCancer/TEST',
    target_size = (224, 224),
    batch_size = 12,
    x_col="filename",
    y_col="dx",
    class_mode='categorical',
    shuffle = True,)

Found 106 validated image filenames belonging to 3 classes.


In [16]:
res = TRAIN_GENERATOR.__getitem__(0)

In [17]:
res[0][0].shape

(12, 224, 224, 3)

In [18]:
print(res[0][1].shape)

(12, 2)


In [19]:
res[1].shape

(12, 3)

In [20]:
BaseModel = tf.keras.applications.EfficientNetB0(include_top=False, weights='imagenet', input_tensor=None, input_shape=(img_width,img_height,3), pooling=None)
CNN = GlobalAveragePooling2D()(BaseModel.output)

InputB = tf.keras.layers.Input(shape=(2,), name='inputB')
OutputB = Dense(2, activation='relu')(InputB)

Merged = tf.keras.layers.Concatenate(axis=1)([CNN, OutputB])
Output = Dense(3, activation='softmax')(Merged)

DeepLearning = tf.keras.Model(inputs=[BaseModel.input, InputB], outputs=Output)

In [21]:
DeepLearning.compile(optimizer=
         SGD(lr=LearningRate, decay=Decay, momentum=0.9, nesterov=True), 
         loss='categorical_crossentropy',
         metrics=['acc']
) # 나이를, MSE

  super(SGD, self).__init__(name, **kwargs)


In [None]:
tf.keras.utils.plot_model(DeepLearning, show_shapes=True)

In [None]:
DeepLearning.summary()

In [20]:
DeepLearning.fit(TRAIN_GENERATOR,         
        epochs=1,
        validation_data=VALID_GENERATOR)



<keras.callbacks.History at 0x1602564d660>