In [1]:
import os
import cv2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from glob import glob
from PIL import Image
from tensorflow.keras.utils import to_categorical,normalize
from sklearn.model_selection import train_test_split
from sklearn import decomposition
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from imblearn.over_sampling import RandomOverSampler 
from tensorflow.keras.applications.resnet50 import ResNet50



In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
data = pd.read_csv("gdrive/Shareddrives/FIT3163 - Group 6 Assignment/CNN/HAM10000_metadata.csv")

In [None]:
#find the path for the corresponding jpg file
data_dir = 'gdrive/Shareddrives/FIT3163 - Group 6 Assignment/CNN'
all_image_path = glob(os.path.join(data_dir, '*', '*.jpg'))
imageid_path_dict = {os.path.splitext(os.path.basename(x))[0]: x for x in all_image_path}

In [None]:
# fill the empty cell with the average value
data['age'].fillna((data['age'].mean()), inplace=True)

In [None]:
cancer_type = {
    'nv': 'Melanocytic nevi',
    'mel': 'Melanoma',
    'bkl': 'Benign keratosis-like lesions ',
    'bcc': 'Basal cell carcinoma',
    'akiec': 'Actinic keratoses',
    'vasc': 'Vascular lesions',
    'df': 'Dermatofibroma'
}

In [None]:
#create 3 columns 
data['path'] = data['image_id'].map(imageid_path_dict.get) # the path for the jpg file 
data['cell_type'] = data['dx'].map(cancer_type.get) # the corresponding cancer type
data['cell_type_idx'] = pd.Categorical(data['cell_type']).codes # the index representing the cancer type

In [None]:
data.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,path,cell_type,cell_type_idx
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,gdrive/Shareddrives/FIT3163 - Group 6 Assignme...,Benign keratosis-like lesions,2
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,gdrive/Shareddrives/FIT3163 - Group 6 Assignme...,Benign keratosis-like lesions,2
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,gdrive/Shareddrives/FIT3163 - Group 6 Assignme...,Benign keratosis-like lesions,2
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,gdrive/Shareddrives/FIT3163 - Group 6 Assignme...,Benign keratosis-like lesions,2
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,gdrive/Shareddrives/FIT3163 - Group 6 Assignme...,Benign keratosis-like lesions,2


In [None]:
#add the 'image' that reads the jpg file in the corresponding path and resize the image to 100*75
data['image'] = data['path'].map(lambda x: np.asarray(Image.open(x).resize((100,75))))

In [None]:
x=data.drop(columns=['cell_type_idx'],axis=1)
y=data['cell_type_idx']

In [None]:
#splitting train,validate,test sets

x_train_val, x_test, y_train_val, y_test = train_test_split(x, y,test_size=0.2, random_state = 8) 

# Converting image data from matrix to list
x_train_val = np.asarray(x_train_val['image'].tolist())
x_test = np.asarray(x_test['image'].tolist())

y_train_val = to_categorical(y_train_val, num_classes = 7)
y_test = to_categorical(y_test, num_classes = 7)

x_train, x_valid, y_train, y_valid = train_test_split(x_train_val, y_train_val, test_size = 0.25, random_state = 8 ) 

In [None]:
#reshape the training, testing, validation set into 75*100 
x_train = x_train.reshape(x_train.shape[0], *(75, 100, 3))
x_test = x_test.reshape(x_test.shape[0], *(75, 100, 3))
x_valid = x_valid.reshape(x_valid.shape[0], *(75, 100, 3))

In [None]:
# regenerating image to expand the training set
img_generate =ImageDataGenerator(rotation_range=15, # rotate the image 15 degrees
                               shear_range=0.1, # cutting away part of the image (max 10%)
                               zoom_range=0.05, # Zoom in by 5% max
                               horizontal_flip=True,
                               vertical_flip=True,
                               fill_mode='nearest')

In [None]:
# add the generated images into the training set
img_generate.fit(x_train)

In [None]:
learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', patience=5, verbose=0, factor=0.5, min_lr=0.00001)
early_stopping_monitor = EarlyStopping(patience=5,monitor='val_accuracy')

In [5]:
base_model = ResNet50(include_top=False, input_shape=(96,128, 3),pooling = 'avg', weights = 'imagenet');

In [6]:
base_model.summary()

Model: "resnet50"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 96, 128, 3)] 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 102, 134, 3)  0           input_2[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 48, 64, 64)   9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, 48, 64, 64)   256         conv1_conv[0][0]                 
___________________________________________________________________________________________

In [3]:
# Model Building

base_model = ResNet50(include_top=False, input_shape=(96,128, 3),pooling = 'avg', weights = 'imagenet');

ResNet50model = Sequential()
ResNet50model.add(base_model)
ResNet50model.add(Dropout(0.5))
ResNet50model.add(Dense(128, activation="relu"))
ResNet50model.add(Dropout(0.5))
ResNet50model.add(Dense(7, activation = 'softmax'))
###################################

for layer in base_model.layers[:-8]:
    layer.trainable = False 


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [4]:
ResNet50model.summary() #summary of model

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50 (Functional)        (None, 2048)              23587712  
_________________________________________________________________
dropout (Dropout)            (None, 2048)              0         
_________________________________________________________________
dense (Dense)                (None, 128)               262272    
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 7)                 903       
Total params: 23,850,887
Trainable params: 3,678,727
Non-trainable params: 20,172,160
_________________________________________________________________


In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

In [None]:
# Compile the model
ResNet50model.compile(optimizer = optimizer , loss = "categorical_crossentropy", metrics=["accuracy"])

In [None]:
ResNet50model.fit(x_train, y_train, epochs = 10, validation_data=(x_valid, y_valid),batch_size = 128,
          callbacks=[early_stopping_monitor])

In [None]:
test_loss, test_acc = ResNet50model.evaluate(x_test,y_test, verbose=2)