# Histopathologic Cancer Detection

In [22]:
# import package
import numpy as np
import pandas as pd
from glob import glob
import os
import tensorflow
import keras
from keras.callbacks import TensorBoard, ModelCheckpoint
from keras.layers import Input, Dense, Conv2D, MaxPooling2D,Flatten, BatchNormalization,Dropout,Reshape,Activation, GlobalMaxPooling2D
from keras.layers import ZeroPadding2D, AveragePooling2D, GlobalAveragePooling2D, Add
from keras.models import Model
from skimage.io import imread 
from keras.preprocessing.image import ImageDataGenerator

In [19]:
#Hyperparameter
SAMPLE_COUNT = 20
TRAINING_RATIO = 0.7
IMAGE_SIZE = 96
EPOCHS=10
BATCH_SIZE =5


In [3]:
#load Data
input_dir = 'data/'
training_dir = os.path.join(input_dir + 'train/')
#training_dir = '/data/train'
data_frame = pd.DataFrame({'path': glob(os.path.join(training_dir,'*.tif'))})
#get id, data_frame.path is series
data_frame['id'] = data_frame.path.map(lambda x: x.split('/')[1].split('\\')[1].split('.')[0])


In [4]:
#add trainingset label
labels = pd.read_csv(input_dir + 'train_labels.csv')
data_frame = data_frame.merge(labels, on='id')


In [5]:
#sample
negatives = data_frame[data_frame.label == 0].sample(SAMPLE_COUNT)
positives = data_frame[data_frame.label == 1].sample(SAMPLE_COUNT)

In [6]:
# use reset_index() to get new sequential index
data_frame = pd.concat([negatives,positives]).reset_index()
data_frame = data_frame[['path','id','label']]

In [7]:
data_frame['image'] = data_frame['path'].map(imread)

In [8]:
# split dataset
from sklearn.model_selection import train_test_split
import shutil
training_path = 'data/training/'
validation_path = 'data/validation/'

for folder in [training_path, validation_path]:
    for subfolder in ['0', '1']:
        path = os.path.join(folder, subfolder)
        os.makedirs(path, exist_ok=True)

training, validation = train_test_split(data_frame, train_size=0.7, stratify=data_frame['label'])

data_frame.set_index('id', inplace=True)
for images_and_path in [(training, training_path), (validation, validation_path)]:
    images = images_and_path[0]
    path = images_and_path[1]
    for image in images['id'].values:
        file_name = image + '.tif'
        label = str(data_frame.loc[image,'label'])
        destination = os.path.join(path, label, file_name)
        if not os.path.exists(destination):
            source = os.path.join(input_dir + 'train', file_name)
            shutil.copyfile(source, destination)

In [24]:
#print(glob(os.path.join(training_dir,'*.tif')))
data_frame.head()

Unnamed: 0_level_0,path,label,image
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
31d44a4b746c879c35da7bfa1e2ba05f88794976,data/train\31d44a4b746c879c35da7bfa1e2ba05f887...,0,"[[[252, 252, 252], [252, 252, 252], [252, 252,..."
57807e856a84c17ac886032ebac22f2499210039,data/train\57807e856a84c17ac886032ebac22f24992...,0,"[[[240, 235, 239], [239, 234, 238], [239, 234,..."
5cdb808784c230a31ac3a84006b5d34e36728363,data/train\5cdb808784c230a31ac3a84006b5d34e367...,0,"[[[164, 119, 148], [163, 122, 154], [36, 0, 38..."
90a56681dfa6d7b689856dc40ce9dd76e67bdbd7,data/train\90a56681dfa6d7b689856dc40ce9dd76e67...,0,"[[[233, 238, 232], [233, 233, 233], [241, 230,..."
06528e66d433eadb384204100c0c79a1dbbb8e20,data/train\06528e66d433eadb384204100c0c79a1dbb...,0,"[[[246, 245, 243], [246, 245, 243], [245, 244,..."


In [15]:
#Data Augmentation
training_data_generator = ImageDataGenerator(rescale=1./255,
                                             horizontal_flip=True,
                                             vertical_flip=True,
                                             rotation_range=180,
                                             zoom_range=0.4,
                                             width_shift_range=0.3,
                                             height_shift_range=0.3,
                                             shear_range=0.3,
                                             channel_shift_range=0.3)

In [20]:
training_generator = training_data_generator.flow_from_directory(training_path,
                                                                target_size=(IMAGE_SIZE,IMAGE_SIZE),
                                                                batch_size = BATCH_SIZE,
                                                                class_mode='binary')

validation_generator = ImageDataGenerator(rescale=1./255).flow_from_directory(validation_path,
                                                                              target_size=(IMAGE_SIZE,IMAGE_SIZE),
                                                                              batch_size=BATCH_SIZE,
                                                                              class_mode='binary')

testing_generator = ImageDataGenerator(rescale=1./255).flow_from_directory(validation_path,
                                                                           target_size=(IMAGE_SIZE,IMAGE_SIZE),
                                                                           batch_size=BATCH_SIZE,
                                                                           class_mode='binary',
                                                                           shuffle=False)

Found 84 images belonging to 2 classes.
Found 36 images belonging to 2 classes.
Found 36 images belonging to 2 classes.


In [None]:
#module of structure of neural network (Resnet 50)
def Conv2d_BN(Input, n_filter, kernel_size, strides=(1,1),padding='same', stage, block, n_serial):
    #stage: integer, current stage label, used for generating layer names
    #block: 'a','b'....,current block label, used for generating layer names
    #n_serial: used for generating layer names
    conv_name_base = 'res' + str(stage)+ block + '_branch'
    bn_name_base = 'bn' + str(stage) + block +  '_branch'
    x = Conv2D(n_filter, kernel_size, padding=padding,strides=strides,activation='relu', name=conv_name_base+n_serial)(Input)
    x = BatchNormalization(name=bn_name_base+n_serial )(x)
    return x

def identity_Block(Input, n_filters, kernel_size, strides=(1,1), stage, block, with_conv_shortcut=False):
    #kernel_size: default=3
    filter1, filter2, filter3 = filters
    x = Conv2d_BN(Input,n_filter=filter1, kernel_size=1, stage, block, n_serial='2a')
    x = Conv2d_BN(x,n_filter=filter2, kernel_size=kernel_size, stage, block, n_serial='2b')
    x = Conv2d_BN(x,n_filter=filter3, kernel_size=1, stage, block, n_serial='2b')
    
    if with_conv_shortcut:
        shortcut = Conv2d_BN(Input, n_filter=filter3, kernel_size=1, )
    
    
    

In [None]:
#model
def model_cnn():

    

In [13]:
import matplotlib.pyplot as plt