<a href="https://colab.research.google.com/github/Mullen44/CapstoneProject_SkinLesionClassification/blob/main/imageClassification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# GPU Allocation

In [None]:
# memory footprint support libraries/code
!ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi
!pip install gputil
!pip install psutil
!pip install humanize
import psutil
import humanize
import os
import GPUtil as GPU
# retrieve GPU type
GPUs = GPU.getGPUs()
gpu = GPUs[0]
# check allocation of GPU resources
def printm():
 process = psutil.Process(os.getpid())
 print("Gen RAM Free: " + humanize.naturalsize( psutil.virtual_memory().available ), " | Proc size: " + humanize.naturalsize( process.memory_info().rss))
 print("GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))
printm()

# Import Modules and Mount Drive

In [None]:
# Mounting Google Drive
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Mount google drive
from google.colab import drive
drive.mount('/content/gdrive', force_remount = True)

# Authenticate and create the PyDrive client
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [None]:
# Import Necessary Libraries and personal scripts (Utils.py & Models.py)
# Tensorflow, matplotlib, numpy etc
%cd /content/gdrive/Shared drives/CAPSTONE/DeepLearning/Scripts/
from Utils import *
from models import *
import os
import cv2
import numpy as np
from matplotlib.pyplot import imsave, imread
import matplotlib.pyplot as plt
import math as mth
import pandas as pd
import tensorflow as tf
from tensorflow.keras.applications import ResNet50, VGG16, VGG19, InceptionV3, InceptionResNetV2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, CSVLogger
import random


In [None]:
%cd /content/gdrive/Shared drives/CAPSTONE/DeepLearning/
print(len(os.listdir('Data/deep/bkl/')))
print(len(os.listdir('Data/deep/mel/')))
print(len(os.listdir('Data/deep/nv/')))
print(len(os.listdir('Data/deep/test/')))
print(len(os.listdir('Data/deep/train/')))
print(len(os.listdir('Data/log/Features/bkl/')))
print(len(os.listdir('Data/log/Features/mel/')))
print(len(os.listdir('Data/log/Features/nv')))

train = np.load('Data/log/Features/train.npy')
print(len(train))
test = np.load('Data/log/Features/test.npy')
print(len(test))


# Divide Data

In [None]:
# Change to Directory where data is stored
%cd /content/gdrive/Shared drives/CAPSTONE/DeepLearning/DummyData/

# Read in the metadata information about the images
meta = pd.read_csv('HAM10000_metadata.csv')
# Create a dictionary to store images
data = dict()

# Loop through and fill in data variable with key=filename and value=image array
for filename in os.listdir('HAM10000_images_part_1/') :
  data[filename[:-4]] = np.array(imread('HAM10000_images_part_1/' + filename))

for filename in os.listdir('HAM10000_images_part_2/') :
  data[filename[:-4]] = np.array(imread('HAM10000_images_part_2/' + filename))

# Specify path to save labelled images in
melPath = '/content/gdrive/Shared drives/CAPSTONE/DeepLearning/DummyData/mel/'
bklPath = '/content/gdrive/Shared drives/CAPSTONE/DeepLearning/DummyData/bkl/'
akiecPath = '/content/gdrive/Shared drives/CAPSTONE/DeepLearning/DummyData/akiec/'
bccPath = '/content/gdrive/Shared drives/CAPSTONE/DeepLearning/DummyData/bcc/'
dfPath = '/content/gdrive/Shared drives/CAPSTONE/DeepLearning/DummyData/df/'
nvPath = '/content/gdrive/Shared drives/CAPSTONE/DeepLearning/DummyData/nv/'
vascPath = '/content/gdrive/Shared drives/CAPSTONE/DeepLearning/DummyData/vasc/'

for i in range(len(meta)) :
  if meta.image_id[i] in data :
    if meta.dx[i] == 'akiec' :
      # Save in akiec Folder
      name = akiecPath + meta.image_id[i] + '.jpg'
      imsave(name, data[meta.image_id[i]])
    if meta.dx[i] == 'mel' :
      # Save in Mel Folder
      name = melPath + meta.image_id[i] + '.jpg'
      imsave(name, data[meta.image_id[i]])
    if meta.dx[i] == 'bkl' :
      # Save in BKL folder
      name = bklPath + meta.image_id[i] + '.jpg'
      imsave(name, data[meta.image_id[i]])
    if meta.dx[i] == 'bcc' :
      # Save in bcc Folder
      name = bccPath + meta.image_id[i] + '.jpg'
      imsave(name, data[meta.image_id[i]])
    if meta.dx[i] == 'df' :
      # Save in df Folder
      name = dfPath + meta.image_id[i] + '.jpg'
      imsave(name, data[meta.image_id[i]])
    if meta.dx[i] == 'nv' :
      # Save in nv Folder
      name = nvPath + meta.image_id[i] + '.jpg'
      imsave(name, data[meta.image_id[i]])
    if meta.dx[i] == 'vasc' :
      # Save in vasc Folder
      name = vascPath + meta.image_id[i] + '.jpg'
      imsave(name, data[meta.image_id[i]])

/content/gdrive/Shared drives/CAPSTONE/DeepLearning/DummyData


In [None]:
# Load Preprocessed images
melPath = '/content/gdrive/Shared drives/CAPSTONE/DeepLearning/DummyData/mel/'
bklPath = '/content/gdrive/Shared drives/CAPSTONE/DeepLearning/DummyData/bkl/'
nvPath = '/content/gdrive/Shared drives/CAPSTONE/DeepLearning/DummyData/nv/'

mel = dict()
bkl = dict()
nv = dict()

i = 0
for filename in os.listdir(melPath) :
  mel[filename] = imread(melPath + filename)
  print(i)
  i = i+1

for filename in os.listdir(bklPath) :
  bkl[filename] = imread(bklPath + filename)
  print(i)
  i = i+1

for filename in os.listdir(nvPath) :
  nv[filename] = imread(nvPath + filename)
  print(i)
  i = i+1


# Divide data (Images from 3 classes) into training(60%), Validation(20%), Testing(20%)
train_percent = 0.8
val_percent = 0.25 * train_percent 
test_percent = 0.2

# Create index variables
indexMel = createIndex(mel, train_percent, val_percent, test_percent)
indexBkl = createIndex(bkl, train_percent, val_percent, test_percent)
indexNv = createIndex(nv, train_percent, val_percent, test_percent)

# Make sure its randomized to avoid any bias
# Save divided data into directories
dataPath = '/content/gdrive/Shared drives/CAPSTONE/DeepLearning/Data/'
saveImagesToFolders(mel, indexMel, 'mel/', dataPath)
saveImagesToFolders(bkl, indexBkl, 'bkl/', dataPath)
saveImagesToFolders(nv, indexMel, 'nv/', dataPath)


In [None]:
print('akiec', len(os.listdir('/content/gdrive/Shareddrives/CAPSTONE/DeepLearning/DummyData/akiec')))
print('mel', len(os.listdir('/content/gdrive/Shareddrives/CAPSTONE/DeepLearning/DummyData/mel')))
print('bcc', len(os.listdir('/content/gdrive/Shareddrives/CAPSTONE/DeepLearning/DummyData/bcc')))
print('bkl', len(os.listdir('/content/gdrive/Shareddrives/CAPSTONE/DeepLearning/DummyData/bkl')))
print('df', len(os.listdir('/content/gdrive/Shareddrives/CAPSTONE/DeepLearning/DummyData/df')))
print('nv', len(os.listdir('/content/gdrive/Shareddrives/CAPSTONE/DeepLearning/DummyData/nv')))
print('vasc', len(os.listdir('/content/gdrive/Shareddrives/CAPSTONE/DeepLearning/DummyData/vasc')))

# New Data Division

In [None]:
melPath = '/content/gdrive/Shared drives/CAPSTONE/DeepLearning/DummyData/mel/'
bklPath = '/content/gdrive/Shared drives/CAPSTONE/DeepLearning/DummyData/bkl/'
nvPath = '/content/gdrive/Shared drives/CAPSTONE/DeepLearning/DummyData/nv/'

mel = dict()
bkl = dict()
nv = dict()

for filename in os.listdir(melPath) :
  mel[filename] = imread(melPath + filename)

for filename in os.listdir(bklPath) :
  bkl[filename] = imread(bklPath + filename)

for filename in os.listdir(nvPath) :
  nv[filename] = imread(nvPath + filename)


In [None]:
len(bkl)//2

In [None]:
# DeepLearning folders
melDeepPath = '/content/gdrive/Shared drives/CAPSTONE/DeepLearning/Data/deep/mel/'
bklDeepPath = '/content/gdrive/Shared drives/CAPSTONE/DeepLearning/Data/deep/bkl/'
nvDeepPath = '/content/gdrive/Shared drives/CAPSTONE/DeepLearning/Data/deep/nv/'
# Logistic Regression Folders
melLogPath = '/content/gdrive/Shared drives/CAPSTONE/DeepLearning/Data/log/mel/'
bklLogPath = '/content/gdrive/Shared drives/CAPSTONE/DeepLearning/Data/log/bkl/'
nvLogPath = '/content/gdrive/Shared drives/CAPSTONE/DeepLearning/Data/log/nv/'
# State Dictionarys
bklDeep = dict()
melDeep = dict()
nvDeep = dict()

i = 0
for key, value in bkl.items() :
  if i <= len(bkl)//2 :
    imsave(bklDeepPath + key, value)
    bklDeep[key] = value
  else :
    imsave(bklLogPath + key, value)
  i += 1
print('Done BKL')

i = 0
for key, value in mel.items() :
  if i <= len(mel)//2 :
    imsave(melDeepPath + key, value)
    melDeep[key] = value
  else :
    imsave(melLogPath + key, value)
  i += 1
print('Done MEL')

i = 0
for key, value in  nv.items() :
  if i <= len(nv)//2 :
    imsave(nvDeepPath + key, value)
    nvDeep[key] = value
  else :
    imsave(nvLogPath + key, value)
  i += 1
print('Done NV')

# Save into train val and test
# Divide data (Images from 3 classes) into training(60%), Validation(20%), Testing(20%)
train_percent = 0.8
val_percent = 0.25 * train_percent 
test_percent = 0.2

# Create index variables
indexMel = createIndex(melDeep, train_percent, val_percent, test_percent)
indexBkl = createIndex(bklDeep, train_percent, val_percent, test_percent)
indexNv = createIndex(nvDeep, train_percent, val_percent, test_percent)
print('Indexes Created')

# Make sure its randomized to avoid any bias
# Save divided data into directories
dataPath = '/content/gdrive/Shared drives/CAPSTONE/DeepLearning/Data/deep/'
saveImagesToFolders(melDeep, indexMel, 'mel/', dataPath)
print('Saved MEL')
saveImagesToFolders(bklDeep, indexBkl, 'bkl/', dataPath)
print('Saved BKL')
saveImagesToFolders(nvDeep, indexMel, 'nv/', dataPath)
print('Saved NV')

# Train Models

In [None]:
# Select Architecture
# Choices = 'ResNet50', 'VGG16', 'InceptionResNetV2'
# For transfer learning have load_weights = 'imagenet', for non transfer learning load_weights = None
model_choice = 'ResNet50'
load_weights = 'imagenet'
model = get_model(model_choice, load_weights)
model.summary()

# Set argumemts for the training and validation generator
# Probably add vertical and horizontal flip
train_args = dict(
              rotation_range=0.2,
              width_shift_range=0.05,
              height_shift_range=0.05,
              shear_range=0.05,
              zoom_range=0.05,
              fill_mode='nearest'
)
val_args = dict()

# Set paths for training and validation data
trainPath = '/content/gdrive/Shared drives/CAPSTONE/DeepLearning/Data/deep/train'
valPath = '/content/gdrive/Shared drives/CAPSTONE/DeepLearning/Data/deep/val'

# Create Data Generators w/ data augmentations (flip, shift, translations, shear, rescale etc)
train_gen = ImageDataGenerator(train_args)
val_gen = ImageDataGenerator(val_args)

# Set Hyperparameters (epochs, learning rate, batchsize, steps, validation steps, cost function)
epochs = 100
batchsize = 16
steps = mth.ceil(1061/batchsize)
val_steps = mth.ceil(266/batchsize) 
learningrate = 0.000001
model_name = 'e' + str(epochs) + '_BS' + str(batchsize) + '_lr' + str(learningrate) + '/'
ModelPath = '/content/gdrive/Shareddrives/CAPSTONE/DeepLearning/Models/TransferLearning/' + model_choice + '/' + model_name

# Specify where the input images are coming from
train_gen = train_gen.flow_from_directory(trainPath, target_size=(224,224), batch_size=batchsize)
val_gen = val_gen.flow_from_directory(valPath, target_size=(224,224), batch_size=batchsize)


if os.path.isdir(ModelPath) == 0 :
  os.mkdir(ModelPath)
  print('Creating Directory')

# Set callbacks using tensorflow functions (csvlogger, early stopping, model checkpoint)
model_checkpoint = ModelCheckpoint(ModelPath + 'model.hdf5', monitor='val_loss', save_best_only=True)
early_stop = EarlyStopping(monitor = 'val_loss', patience = 10)
csv_logger = CSVLogger(ModelPath + 'model.csv')

# Create Directory Label for specific models
# Compile Model Using Hyperparamters [model.compile()]
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), optimizer=tf.keras.optimizers.Adam(lr=learningrate), metrics=['accuracy'])

# Train The model [model.fit()] - This trains the model, logs the data by epoch and saves the best version

model_history = model.fit_generator(train_gen, 
                                    steps_per_epoch = steps, 
                                    validation_data = val_gen, 
                                    validation_steps = val_steps, 
                                    epochs=epochs,
                                      callbacks = [model_checkpoint, early_stop, csv_logger])

# Once finished training...
# Display loss curves to double check it looks good

# Testing



In [None]:
# TestDataPaths
bklPath = '/content/gdrive/Shareddrives/CAPSTONE/DeepLearning/Data/deep/test/bkl/'
melPath = '/content/gdrive/Shareddrives/CAPSTONE/DeepLearning/Data/deep/test/mel/'
nvPath = '/content/gdrive/Shareddrives/CAPSTONE/DeepLearning/Data/deep/test/nv/'

# Load Model
modelPath = '/content/gdrive/Shareddrives/CAPSTONE/DeepLearning/newModels/TransferLearning/ResNet50/e100_BS16_lr1e-05/newmodel.hdf5'
model_choice = 'ResNet50'
shape = (224, 224, 3) 
model = get_model(model_choice=model_choice)
model.load_weights(modelPath)

# Make Predictions
bklPredict = getPredictions(model, shape, bklPath)
melPredict = getPredictions(model, shape, melPath)
nvPredict = getPredictions(model, shape, nvPath)

# Create Actuals
bklActual = ['bkl'] * len(bklPredict)
melActual = ['mel'] * len(melPredict)
nvActual = ['nv'] * len(nvPredict)

# Concatenate Predictions and Actuals
Actual = bklActual + melActual + nvActual
Predicted = bklPredict + melPredict + nvPredict


# Evaluate Models


In [None]:
# Choose Model
dataPath = ModelPath + '/model.csv' #'/content/gdrive/Shareddrives/CAPSTONE/DeepLearning/Models/TransferLearning/ResNet50/e150_BS16_lr5e-07/model.csv'
data = pd.read_csv(dataPath)
# Plot Loss
plotLoss(data)
plotAccuracy(data)

# Display max/min
print(max(data['accuracy']))
print(max(data['val_accuracy']))
print(min(data['loss']))
print(min(data['val_loss']))



**Confusion matrix**

In [None]:
# Confusion Matrices
# using panda for confusion matrix
act_y = pd.Series(Actual, name = 'Actual')
pred_y = pd.Series(Predicted, name = 'Predicted')
df_conf = pd.crosstab(act_y, pred_y)
print(df_conf)
print()

#to find some of rows ( not sure if it is require for us)
#adding true = margin
df_conf1 = pd.crosstab(act_y, pred_y, rownames=['Actual'], colnames=['Predicted'], margins=True)
print(df_conf1)
print()

# if you need to normalize it
df_conf_norm = df_conf / df_conf.sum(axis=1)
print(df_conf_norm)
print()

# now for plotting
def plot_confusion_matrix(df_conf, title='Confusion matrix', cmap=plt.cm.gray_r):
    plt.matshow(df_conf, cmap=cmap) # imshow
    #plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(df_conf.columns))
    plt.xticks(tick_marks, df_conf.columns, rotation=45)
    plt.yticks(tick_marks, df_conf.index)
    #plt.tight_layout()
    plt.ylabel(df_conf.index.name)
    plt.xlabel(df_conf.columns.name)

plot_confusion_matrix(df_conf)

In [None]:
printConfMatrix()