In [1]:
# Use original+Aug images 
# Dermoscopic - Malignant
# Dermoscopic - Benign

# Training - 80%
# Validation-(80%)20%
# Testing - 20%

# References:
#     Very Deep Convolutional Networks for Large-Scale Image Recognition.
#     K. Simonyan, A. Zisserman. arXiv technical report, 2014.

In [2]:
import cv2
import numpy as np
import os
from random import shuffle
from tqdm import tqdm


In [3]:
TRAIN_DIR = ''
VALIDATION_DIR = ''
TEST_DIR = ''
IMG_SIZE = 50
LR = 1e-6
FILE_NAME = 'MelanomaCNN_AIrootNet'

MODEL_NAME = 'MelanomaBenVsMal-{}-{}.model'.format(LR, FILE_NAME) 

In [4]:
def label_img(img):
    word_label = img.split('.')[-3]
    if word_label == 'Ben': return [1,0]
    elif word_label == 'Mal' : return [0,1]
    
    

In [5]:
def create_train_data():
    training_data = []
    for img in tqdm(os.listdir(TRAIN_DIR)):
        label = label_img(img)
        path = os.path.join(TRAIN_DIR, img)
        img = cv2.resize(cv2.imread(path, cv2.IMREAD_GRAYSCALE),(IMG_SIZE, IMG_SIZE))
        training_data.append([np.array(img), np.array(label)])
    shuffle(training_data)
    np.save('Aug50_train_data.npy', training_data)
    return training_data

In [6]:
def create_validation_data():
    validation_data = []
    for img in tqdm(os.listdir(VALIDATION_DIR)):
        label = label_img(img)
        path = os.path.join(VALIDATION_DIR, img)
        img = cv2.resize(cv2.imread(path, cv2.IMREAD_GRAYSCALE),(IMG_SIZE, IMG_SIZE))
        validation_data.append([np.array(img), np.array(label)])
    shuffle(validation_data)
    np.save('Aug50_validation_data.npy', validation_data)
    return validation_data

In [7]:
def process_test_data():
    testing_data = []
    for img in tqdm(os.listdir(TEST_DIR)):
        path = os.path.join(TEST_DIR, img)
        img_num = img.split('.')[0]
        img = cv2.resize(cv2.imread(path, cv2.IMREAD_GRAYSCALE),(IMG_SIZE, IMG_SIZE))
        testing_data.append([np.array(img), img_num])
    
    np.save('BenOritest_data.npy', testing_data)
    return testing_data

In [8]:
# train_data = create_train_data()
# validate_data = create_validation_data()
# if you already have train data:
train_data  = np.load('Aug50_train_data.npy')
validate_data  = np.load('Aug50_validation_data.npy')


In [9]:
import tflearn
from tflearn.layers.conv import conv_2d, max_pool_2d, highway_conv_2d
from tflearn.layers.core import input_data, dropout, fully_connected

from tflearn.layers.normalization import local_response_normalization, batch_normalization
from tflearn.layers.estimator import regression


# Reset the kernal
import tensorflow as tf
tf.reset_default_graph()

# Network Architecture 

convnet = input_data(shape=[None, IMG_SIZE, IMG_SIZE, 1], name='input')

convnet = conv_2d(convnet, 64, 3, activation='relu')
convnet = conv_2d(convnet, 64, 3, activation='relu')
convnet = max_pool_2d(convnet, 2, strides=2)
convnet = batch_normalization(convnet)

convnet = conv_2d(convnet, 128, 3, activation='relu')
convnet = conv_2d(convnet, 128, 3, activation='relu')
convnet = max_pool_2d(convnet, 2, strides=2)
convnet = batch_normalization(convnet)


convnet = conv_2d(convnet, 256, 3, activation='relu')
convnet = conv_2d(convnet, 256, 3, activation='relu')
convnet = conv_2d(convnet, 256, 3, activation='relu')
convnet = max_pool_2d(convnet, 2, strides=2)
convnet = batch_normalization(convnet)


convnet = conv_2d(convnet, 512, 3, activation='relu')
convnet = conv_2d(convnet, 512, 3, activation='relu')
convnet = conv_2d(convnet, 512, 3, activation='relu')
convnet = max_pool_2d(convnet, 2, strides=2)
convnet = batch_normalization(convnet)

convnet = conv_2d(convnet, 512, 3, activation='relu')
convnet = conv_2d(convnet, 512, 3, activation='relu')
convnet = conv_2d(convnet, 512, 3, activation='relu')
convnet = max_pool_2d(convnet, 2, strides=2)
convnet = batch_normalization(convnet)

convnet = fully_connected(convnet, 4096, activation='relu')
convnet = dropout(convnet, 0.5)
convnet = fully_connected(convnet, 4096, activation='relu')
convnet = dropout(convnet, 0.5)

convnet = fully_connected(convnet, 2, activation='softmax')
convnet = regression(convnet, optimizer='rmsprop', learning_rate=LR, loss='categorical_crossentropy', name='targets')


model = tflearn.DNN(convnet, tensorboard_dir=FILE_NAME)




# #highway convolutions with pooling and dropout
# for i in range(4):
#     for j in [3, 2, 1]: 
#         convnet = conv_2d(convnet, 32, j, activation='elu')
#     convnet = max_pool_2d(convnet, 2)
#     convnet = batch_normalization(convnet)
    
# convnet = fully_connected(convnet, 128, activation='elu')
# # convnet = dropout(convnet, 0.5)
# convnet = fully_connected(convnet, 256, activation='elu')
# convnet = dropout(convnet, 0.8)

# convnet = fully_connected(convnet, 2, activation='softmax')
# convnet = regression(convnet, optimizer='adam', learning_rate=LR, loss='categorical_crossentropy', name='targets')


# model = tflearn.DNN(convnet, tensorboard_dir=FILE_NAME)


In [10]:
if os.path.exists('{}.meta'.format(MODEL_NAME)):
    model.load(MODEL_NAME)
    print('model loaded!')

In [11]:
train = train_data
test = validate_data


In [12]:
X = np.array([i[0] for i in train]).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
Y = [i[1] for i in train]

test_x = np.array([i[0] for i in test]).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
test_y = [i[1] for i in test]

In [13]:
model.fit({'input': X}, {'targets': Y}, n_epoch=16, validation_set=({'input': test_x}, {'targets': test_y}), snapshot_step=200, show_metric=True, run_id=MODEL_NAME)

Training Step: 5023  | total loss: [1m[32m0.58863[0m[0m | time: 4639.780s
| RMSProp | epoch: 016 | loss: 0.58863 - acc: 0.7140 -- iter: 20032/20046
Training Step: 5024  | total loss: [1m[32m0.57602[0m[0m | time: 4911.848s
| RMSProp | epoch: 016 | loss: 0.57602 - acc: 0.7238 | val_loss: 0.71888 - val_acc: 0.6163 -- iter: 20046/20046
--


In [14]:
model.save(MODEL_NAME)

INFO:tensorflow:/home/airoot/CNN_Melanona/AIrootNet/MelanomaBenVsMal-1e-06-MelanomaCNN_AIrootNet.model is not in all_model_checkpoint_paths. Manually adding it.


In [15]:
# True positive: Sick people correctly identified as sick
# False positive: Healthy people incorrectly identified as sick
# True negative: Healthy people correctly identified as healthy
# False negative: Sick people incorrectly identified as healthy
# In general, Positive = identified and negative = rejected. Therefore:

# True positive = correctly identified
# False positive = incorrectly identified
# True negative = correctly rejected
# False negative = incorrectly rejected

Mal_True_Positive = 0
Mal_False_Negative = 0 
Ben_True_Negative = 0
Ben_False_Positive = 0

Mal_test_data = np.load('Mal_test_data.npy')
Ben_test_data = np.load('Ben_test_data.npy')

for num, data in enumerate(Mal_test_data):
#     print(num)
    img_data = data[0]
    data1 = img_data.reshape(IMG_SIZE,IMG_SIZE,1)
    #print(data1)
    model_out = model.predict([data1])
#     print(model_out)
    
    if np.argmax(model_out) == 1: 
        str_label='Mal'
        Mal_True_Positive += 1
    
    else: 
        str_label = 'Ben'
        Mal_False_Negative += 1
    
#     print(str_label)
# print("True Positive : " + str(i))
# print("False Negative : " + str(j))

for num, data in enumerate(Ben_test_data):
#     print(num)
    img_data = data[0]
    data1 = img_data.reshape(IMG_SIZE,IMG_SIZE,1)
    #print(data1)
    model_out = model.predict([data1])
#     print(model_out)
    
    if np.argmax(model_out) == 1: 
        str_label='Mal'
        Ben_False_Positive += 1
    
    else: 
        str_label = 'Ben'
        Ben_True_Negative += 1


        
print("True_Positive : " + str(Mal_True_Positive))
print("False_Negative : " + str(Mal_False_Negative))
print("True_Negative : " + str(Ben_True_Negative))
print("False_Positive : " + str(Ben_False_Positive))

True_Positive : 1149
False_Negative : 852
True_Negative : 1276
False_Positive : 736
