# InceptionV3 Cancer Classification

In [None]:
# Import some packages to use
import cv2 # OpenCV; 이미지 처리
import numpy as np # Mathematical lib.; 다차원 배열과 매트릭스의 빠른 계산 지원
import pandas as pd # Data manipulation and analysis; 특히 숫자표와 시계열 데이터 조작

import matplotlib.pyplot as plt # Plotting & Image-displaying
%matplotlib inline 
# Notebook에 plot을 표시

# To see our directory
import os # computer/file 시스템에 접근
import random # data set split&shuffle
import gc # 불필요한 변수 및 메모리 관리

#### Image set preparation

In [None]:
print(os.getcwd())

In [None]:
# Count Negative Images
negative_path = '/home/ssj0921/SSJ/Data/20190731_Classification/TCIA/TCGA_Normal_256/' 
negative_list = []
for (path, dir, files) in os.walk(negative_path):
    for filename in files:
        ext = os.path.splitext(filename)[-1]
        if ext == '.jpg':
            filePath = "{0}/{1}".format(path, filename)
            # print(filePath)
            negative_list.append(filePath)  

# Create Data Frame
# Negative = 0
negative_df = pd.DataFrame(data={'filename': negative_list, 
                               'label': [0 for i in range(len(negative_list))]})

len(negative_list), len(negative_df)

In [None]:
# Count Positive Images
positive_path = '/home/ssj0921/SSJ/Data/20190731_Classification/TCIA/TCGA_Cancer_256/' 
positive_list = []
for (path, dir, files) in os.walk(positive_path):
    for filename in files:
        ext = os.path.splitext(filename)[-1]
        if ext == '.jpg':
            filePath = "{0}/{1}".format(path, filename)
            positive_list.append(filePath)

# Create Data Frame
# Positive = 1
positive_df = pd.DataFrame(data={'filename': positive_list, 
                               'label': [1 for i in range(len(positive_list))]})

len(positive_list), len(positive_df)

In [None]:
# Merge Cancer + Normal ImageList
train_img = negative_list + positive_list
roi_img_df = negative_df.append(positive_df)

# Shuffle
sh_df = roi_img_df.sample(frac=1).reset_index(drop=True)

len(train_img), len(roi_img_df), len(sh_df)

In [None]:
#sh_df = sh_df[0:100]

In [None]:
len(sh_df)

In [None]:
sh_df

In [None]:
# Matplotlib을 이용하여 Image 읽기
from matplotlib.image import imread

im_list = []

for path in sh_df['filename']:
    img = imread(path)
    im_list.append(img)

In [None]:
import matplotlib.pyplot as plt

def showImage(img):
    plt.imshow(img)    
    plt.show()

# Draw Image
showImage(im_list[0])

In [None]:
# Seperate Color Image View
def showImages(input_img):
    f, axarr = plt.subplots(2,2)

    axarr[0,0].imshow(input_img[...,0]) # R
    axarr[0,1].imshow(input_img[...,1]) # G
    axarr[1,0].imshow(input_img[...,2]) # B
    
    return

showImages(im_list[0])

In [None]:
import numpy as np

x = np.array(im_list)
y = np.array(sh_df['label'])

#### Image visualization

In [None]:
# Visualization
def visualize_data(cancer_images, normal_images, N_TO_VISUALIZE):
    # INPUTS
    # positive_images - Images where the label = 1 (True)
    # negative_images - Images where the label = 0 (False)
 
    figure = plt.figure()
    count = 0
 
    for i in range(cancer_images.shape[0]):
        count += 1
        figure.add_subplot(2, cancer_images.shape[0], count)
        plt.imshow(cancer_images[i, :, :])
        plt.axis('off')
        plt.title("Positive".format(i))
 
        figure.add_subplot(1, normal_images.shape[0], count)
        plt.imshow(normal_images[i, :, :])
        plt.axis('off')
        plt.title("Negative".format(i), y=-0.3)
        
    plt.show()

In [None]:
# Number of positive and negative examples to show
N_TO_VISUALIZE = 5
 
# Select the first N Cancer examples
positive_example_indices = (y == 1)
positive_examples = x[positive_example_indices, :, :]
positive_examples = positive_examples[0:N_TO_VISUALIZE, :, :]
 
# Select the first N Normal examples
negative_example_indices = (y == 0)
negative_examples = x[negative_example_indices, :, :]
negative_examples = negative_examples[0:N_TO_VISUALIZE, :, :]
 
# Call the visualization function
visualize_data(positive_examples, negative_examples, N_TO_VISUALIZE)

### [3] Image resize and data set split

In [None]:
from PIL import Image

# Resize Image
train = []

for img in x:
    arr = Image.fromarray(img)
    res = arr.resize((299, 299))
    train.append(np.array(res))
    
train = np.array(train)

In [None]:
from sklearn.model_selection import train_test_split

# Split Data
x_train, x_test, y_train, y_test = train_test_split(train, y, test_size=0.2)

In [None]:
# Generalize (rescale [0,255] --> [0,1])
x_train = x_train.astype('float32')/255
x_test = x_test.astype('float32')/255

In [None]:
print('training dataset =', len(x_train))
print('training label =', len(y_train))

print('test dataset =', len(x_test))
print('test label =', len(y_test))

## Call a Inception pretrained model

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals
#!pip install -q tensorflow-gpu==2.0.0-rc1
import tensorflow as tf

In [None]:
os.chdir('/home/ssj0921/SSJ/Model/')

In [None]:
os.listdir()

In [None]:
from keras.applications import InceptionV3
import keras
from keras import layers
from keras import models
from keras import regularizers
from keras import optimizers
from livelossplot.keras import PlotLossesCallback
from keras.models import load_model

mirrored_strategy = tf.distribute.MirroredStrategy()

with mirrored_strategy.scope():

    conv_base = InceptionV3(weights='imagenet', 
                                  include_top=False, 
                                  input_shape=(299, 299, 3))    
    conv_base.trainable = True
    model = models.Sequential()
    model.add(conv_base)
    model.add(layers.Flatten())
    model.add(layers.Dense(256, activation='relu',
                           kernel_regularizer=regularizers.l2(0.01))),
    model.add(layers.Dense(1, activation='sigmoid'))    

    model.compile(loss='binary_crossentropy',
                  optimizer=optimizers.RMSprop(lr=2e-5), metrics=['acc'])
    
    plot_losses_call_back = PlotLossesCallback()

    hist = model.fit(x_train, y_train, epochs=2, batch_size=10,
                     validation_split=0.2, verbose=0,                 
                     callbacks=[plot_losses_call_back])    
    
    model.save('Model_InceptionV3_20191226.h5')

In [None]:
fig, loss_ax = plt.subplots()

acc_ax = loss_ax.twinx()

plt.title('Training Process', fontsize=15)

loss_ax.plot(hist.history['loss'], 'y', label='train loss')
loss_ax.plot(hist.history['val_loss'], 'r', label='val loss')

acc_ax.plot(hist.history['acc'], 'b', label='train acc')
acc_ax.plot(hist.history['val_acc'], 'g', label='val acc')

loss_ax.set_xlabel('epoch', fontsize=11)
loss_ax.set_ylabel('loss', fontsize=11)
acc_ax.set_ylabel('accuray', fontsize=11)

loss_ax.legend(loc='upper left')
acc_ax.legend(loc='lower left')

fig = plt.gcf()
plt.show()
#fig.savefig('Class_InceptionV3_20190803.png')

In [None]:
# Test Data Set의 Normal/Cancer 비율 확인하기
Normal = 0
Cancer = 0

for i in y_test:
    if i == 1.0:
        Normal += 1
    else:
        Cancer += 1
        
print('Normal:', Normal, ', Cancer:', Cancer)

In [None]:
# 테스트 데이터로 평가하기
score = model.evaluate(x_test, y_test, verbose=1)
print('loss=', score[0])
print('accuracy=', score[1])

In [None]:
from sklearn.metrics import roc_auc_score, auc, roc_curve

# Compute ROC curve and ROC area for each class
fpr = dict()
tpr = dict()
roc_auc = dict()

y_score = model.predict_proba(x_test)
n_classes = y_test.shape[0]

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc

lw = 2
class_F = y_test
proba_F = y_score

false_positive_rate_F, true_positive_rate_F, thresholds_F = roc_curve(class_F, proba_F)
roc_auc_F = auc(false_positive_rate_F, true_positive_rate_F)

plt.title('Cancer Classification AUROC\n(trained by TCIA)\n(Internal Validation with TICA)', fontsize=15)
plt.xlabel('False Positive Rate(1 - Specificity)', fontsize=13)
plt.ylabel('True Positive Rate(Sensitivity)', fontsize=13)

plt.plot(false_positive_rate_F, true_positive_rate_F, 'darkorange', 
         label='ROC curve (AUC = %0.5f)'% roc_auc_F)

plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--', label='Random guess')

plt.legend(loc='lower right')
plt.show()
#fig.savefig('Cancer_AUROC_InceptionV3_E50_TCGA_20191118.png')