# HW8.1: Segmented Skin Cancer Image Classification using DL models & techniques with Keras
---
###### Name: Devson Butani
###### ID: 000732711
###### LTU Honor Code: "I pledge that on all academic work that I submit, I will neither give nor receive unauthorized aid, nor will I present another person's work as my own."

# Get dataset from `Google Drive`

In [None]:
# # Mount Google Drive
# from google.colab import drive
# drive.mount('/content/drive')

# # Change directory to a drive folder of choice - Runs and models will be saved here
# %cd '/content/drive/MyDrive/DL_data'
%pwd # Verify

In [None]:
train_dir = "skin_cancer/train"
vali_dir = "skin_cancer/validation"
test_dir = "skin_cancer/test"

train_benign_dir = "skin_cancer/train/benign"
train_malignant_dir = "skin_cancer/train/malignant"
vali_benign_dir = "skin_cancer/validation/benign"
vali_malignant_dir = "skin_cancer/validation/malignant"
test_benign_dir = "skin_cancer/test/benign"
test_malignant_dir = "skin_cancer/test/malignant"

import os, shutil

print("total training benign images:", len(os.listdir(train_benign_dir)))
print("total training malignant images:", len(os.listdir(train_malignant_dir)))
print("total validation benign images:", len(os.listdir(vali_benign_dir)))
print("total validation malignant images:", len(os.listdir(vali_malignant_dir)))
print("total test benign images:", len(os.listdir(test_benign_dir)))
print("total test malignant images:", len(os.listdir(test_malignant_dir)))

Dataset seems to be mostly balanced with a slight bias towards the benign class.

# Install Dependencies

In [None]:
# %pip install comet_ml --quiet
# %pip install pillow
# %pip install matplotlib

# Initialize `Comet` for datalogging

In [None]:
import comet_ml

comet_ml.config.save(api_key="mQStuXAxGmHmK1vOsTRucvz76")  # Insert API key from comet user account
comet_ml.init(project_name="HW8_Skin_Cancer_Classification")

# Import Dependencies

In [None]:
import numpy as np
from tensorflow import keras
import tensorflow as tf
from keras import layers
from keras import models
from keras import optimizers

In [None]:
# Check if GPU available and linked to tensorflow so that keras can use it
print("Num GPUs Available: ", len(tf.config.list_physical_devices("GPU")))

# Setup Data Generator and Augmentation

In [None]:
from keras.preprocessing.image import ImageDataGenerator

img_size = (60, 60)  # Need to investigate why 224x224 is lower accuracy. Maybe layer size?

train_datagen = ImageDataGenerator(
    rescale=1.0 / 255,  # Rescale the pixel values from [0-255] to [0, 1] for easier NN processing
    # rotation_range=15,  # Full range, samples may not be orientation normalized
    # width_shift_range=0.05,  # lateral shift
    # height_shift_range=0.05,  # vertical shift
    # shear_range=0.1, # Small because most samples are taken flat to surface. Does not improve performance
    # zoom_range=0.02,  # zoom can vary in real-life samples.
    horizontal_flip=True, # Try with or without. Without is better accuracy
    fill_mode="nearest",
)
# ^^^ DA improvement is less than 5% but helps curb overfitting for 10-15 more epochs
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=img_size,  # All images will be resized to 224x224
    batch_size=18,  # Use default batch size 32? 18 is better for the this Mac Pro's GPU
    shuffle=False,  # default is True
    class_mode="binary",
)  # Binary classification problem. Benign or malignant

vali_datagen = ImageDataGenerator(rescale=1.0 / 255)  # No DA
validation_generator = vali_datagen.flow_from_directory(
    vali_dir,
    target_size=img_size,
    batch_size=10,
    shuffle=False,
    class_mode="binary",
)

test_datagen = ImageDataGenerator(rescale=1.0 / 255)  # No DA
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=img_size,
    batch_size=267,  # a batch with all test images at once
    shuffle=False,
    class_mode="binary",
)

# Build CNN

In [None]:
def build_model():
    model = models.Sequential()

    model.add(layers.Conv2D(3, (3, 3), activation="relu", input_shape=(60, 60, 3)))
    # model.add(layers.DepthwiseConv2D((5, 5), activation="relu"))
    # model.add(layers.DepthwiseConv2D((7, 7), activation="relu"))
    model.add(layers.MaxPooling2D((3, 3)))
    
    model.add(layers.Conv2D(3, (3, 3), activation="relu"))
    # model.add(layers.SpatialDropout2D(0.2))
    # model.add(layers.Conv2D(9, (9, 9), activation="relu"))
    # model.add(layers.MaxPooling2D((2, 2)))
    
    # model.add(layers.Conv2D(19, (9, 9), activation="relu"))
    model.add(layers.MaxPooling2D((3, 3)))
    model.add(layers.Flatten())  # CNN flattened
    
    model.add(layers.Dense(896, activation="relu"))  # DNN
    model.add(layers.Dropout(0.1))
    model.add(layers.Dense(147, activation="relu"))
    model.add(layers.Dense(1, activation="sigmoid"))

    model.compile(
        loss="binary_crossentropy",
        optimizer=optimizers.RMSprop(learning_rate=1e-4),
        metrics=["acc"],
    )
    return model

In [None]:
# Start logging
experiment = comet_ml.Experiment(
    # auto_histogram_weight_logging=True,
    # auto_histogram_gradient_logging=True,
    # auto_histogram_activation_logging=True,
)

# Start Training and Validation
model_DA = build_model()
model_DA.summary()
history = model_DA.fit(
    train_generator,
    steps_per_epoch=100,
    epochs=50,
    validation_data=validation_generator,
    validation_steps=50,
    verbose=2,
)

In [None]:
import matplotlib.pyplot as plt

acc2 = history.history["acc"]
val_acc2 = history.history["val_acc"]
loss2 = history.history["loss"]
val_loss2 = history.history["val_loss"]

epochs = range(len(acc2))

plt.plot(epochs, acc2, "b", label="Training acc")
plt.plot(epochs, val_acc2, "r", label="Validation acc")
plt.title("Training and validation accuracy")
plt.legend()

plt.figure()

plt.plot(epochs, loss2, "b", label="Training loss")
plt.plot(epochs, val_loss2, "r", label="Validation loss")
plt.title("Training and validation loss")
plt.legend()

plt.show()

In [None]:
import os.path

path, dirs, files = next(os.walk("/Users/cedar/Downloads/HW8/history_files"))
file_count = len(files)
save_path = "history_files/"
name_of_file = "model_"
completeName = os.path.join(save_path, name_of_file + str(file_count) + ".h5")
# model_DA.save('history_files/skin_cancer_03.h5')
print(completeName)
model_DA.save(completeName)

In [None]:
# • Use any (as many) techniques covered in class
# • At least, for each model introduced
#    CHECK - Validation graphs (accuracy and loss)
#    CHECK - model.evaluate(…) to test (140+127=267) test images using the trained model. Must display test accuracy
#    CHECK - batch of all 267 test images ONCE. 
#    CHECK - Must use m.evaluate(test_generator, steps = 1), only ONCE. 
#    CHECK - Shuffle for DA must be false. https://keras.io/api/preprocessing/image/
#    Display some images that failed in the classification with proper labels (e.g. false positive or false negative)
# • Compare and summarize your results. 
# • Best test accuracy
# • Upload hw8_yourname.ipynb file on Canvas

In [None]:
# Run the test dataset to find the accuracy
test_loss, test_acc = model_DA.evaluate(test_generator, steps=1)  # generator will get both data and label
print(f"test acc: {test_acc*100.0:.2f}%")

In [None]:
# Plot the results, actual v/s model confidence
import matplotlib.pyplot as plt

plt.figure(figsize=(6, 6))
images = test_generator[0][0]
labels = test_generator[0][1]
results = model_DA.predict(test_generator)
for i in range(12):
    plt.subplot(4, 3, i + 1)
    i = i + 134  # offset for un-shuffled dataset
    plt.tight_layout()
    plt.imshow(images[i])
    actual = int(labels[i])
    predicted = results[i][0]
    predicted = round(float(predicted), 3)
    title = f"{actual}::{predicted}"
    if actual - predicted < -0.5:  # false positive or false negative
        title = f"{actual}::{predicted}::F+ve"
    if actual - predicted > 0.5:
        title = f"{actual}::{predicted}::F-ve"
    plt.title(title)
    plt.xticks([])
    plt.yticks([])

In [None]:
experiment.end()  # CometML stop logging