# Melanoma Detection

This project is meant to be a test in image processing through a neural net and creates a classification algorithm using the topics covered in the Machine Learning Specialization on Coursera. It uses a dataset from kaggle that can be reached here:
https://www.kaggle.com/datasets/wanderdust/skin-lesion-analysis-toward-melanoma-detection

This model will use greyscale images of different types of skin blemishes to train on.

We first must import the required libraries

In [2]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from PIL import Image
import os
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


### Data Collection

The next portion of this will consist of processing the images into a feature training set. This will also classify each based on the directory they come from. We can change the way the classification works but will classify based on the data it comes from.

In [3]:
def import_data(dir, classifier, dimension):
  Xarr = []
  Yarr = []
  print(f"*** Processing: {dir} ***")
  for path in os.listdir(dir):
      print(f"*** Processing: {path} ***")

      for filename in os.listdir(f"{dir}/{path}"):
          print(f"Processing {path}-{filename}")

          #Open Image and resize
          im =  Image.open(f"{dir}/{path}/{filename}").convert("L")
          resized_im = im.resize(dimension)

          #Append values to arrays
          Xarr.append(list(resized_im.getdata()))
          Yarr.append(classifier[f"{path}"])
          im.close()

  return Xarr, Yarr

In [4]:
# Initialize some values
image_dimension = (200,200)
features = image_dimension[0]*image_dimension[1]
data_dir = "./drive/MyDrive/ML/Melanoma/skin-lesions"
train_dir = f"{data_dir}/train"
val_dir = f"{data_dir}/valid"
test_dir = f"{data_dir}/test"
class_vals = {
    "nevus": 0,
    "seborrheic_keratosis": 1,
    "melanoma": 2
}


#Import the Data
X_train, Y_train = import_data(train_dir, class_vals, image_dimension)
X_val, Y_val = import_data(val_dir, class_vals, image_dimension)

#Reshape Data
X_train = np.reshape(np.array(X_train), (-1, features))
Y_train = np.reshape(np.array(Y_train), (-1, 1))
X_val = np.reshape(np.array(X_val), (-1, features))
Y_val = np.reshape(np.array(Y_val), (-1, 1))

#Print data shapes
print(f"X shape: {X_train.shape}")
print(f"Y shape: {Y_train.shape}")
print(f"X shape: {X_val.shape}")
print(f"Y shape: {Y_val.shape}")

Processing melanoma-ISIC_0011305.jpg
Processing melanoma-ISIC_0011166.jpg
Processing melanoma-ISIC_0011121.jpg
Processing melanoma-ISIC_0011140.jpg
Processing melanoma-ISIC_0011348.jpg
Processing melanoma-ISIC_0011349.jpg
Processing melanoma-ISIC_0011137.jpg
Processing melanoma-ISIC_0012290.jpg
Processing melanoma-ISIC_0012678.jpg
Processing melanoma-ISIC_0012382.jpg
Processing melanoma-ISIC_0012435.jpg
Processing melanoma-ISIC_0012391.jpg
Processing melanoma-ISIC_0012512.jpg
Processing melanoma-ISIC_0012450.jpg
Processing melanoma-ISIC_0012715.jpg
Processing melanoma-ISIC_0012749.jpg
Processing melanoma-ISIC_0012887.jpg
Processing melanoma-ISIC_0012988.jpg
Processing melanoma-ISIC_0012756.jpg
Processing melanoma-ISIC_0012701.jpg
Processing melanoma-ISIC_0012930.jpg
Processing melanoma-ISIC_0013141.jpg
Processing melanoma-ISIC_0013198.jpg
Processing melanoma-ISIC_0013079.jpg
Processing melanoma-ISIC_0013184.jpg
Processing melanoma-ISIC_0012990.jpg
Processing melanoma-ISIC_0013258.jpg
P

We need to normalize the data as well, we will use z-score normalization on the data

In [5]:
def zScoreNormilization(X):
    mean = np.mean(X, axis = 0)
    deviation = np.std(X, axis = 0)

    normalized_x = (X - mean)/deviation

    return normalized_x, mean, deviation

X_train, train_mean, train_deviation = zScoreNormilization(X_train)
X_val = (X_val - train_mean)/train_deviation

We also need to shuffle the data so that the model sees different labels throughout its training

In [6]:
p = np.random.permutation(X_train.shape[0])
X_train = X_train[p]
Y_train = Y_train[p]

In [None]:
epochs = 2000
batch_size = 50
reg_lambda = 0.00000
model = tf.keras.Sequential([
    tf.keras.Input(shape = (features,),),
    tf.keras.layers.Dense(50, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(reg_lambda)),
    tf.keras.layers.Dense(25, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(reg_lambda)),
    tf.keras.layers.Dense(10, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(reg_lambda)),
    tf.keras.layers.Dense(7, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(reg_lambda)),
    tf.keras.layers.Dense(units = 3, activation="linear")
])

model.summary()

model.compile(
    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer = tf.keras.optimizers.Adam(learning_rate = 0.01),
    metrics = [tf.keras.metrics.SparseCategoricalAccuracy()],
)




history = model.fit(
    X_train,
    Y_train,
    epochs=epochs,
    validation_data=(X_val,Y_val),
    batch_size = batch_size)




Model: "sequential_15"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_87 (Dense)            (None, 100)               4000100   
                                                                 
 dense_88 (Dense)            (None, 30)                3030      
                                                                 


# Plotting the Training Accuracy VS the Validation Accuracy

In [None]:
def setup_plot(ax, title, x_lim, y_lim):
    ax.title(title)
    ax.xlim(x_lim)
    ax.ylim(y_lim)
    ax.legend()

def plot_metrics(axs, history, epochs):
  axs[0].plot(range(epochs), history.history["accuracy"], label="Training Accuracy", color='blue')
  axs[0].plot(range(epochs), history.history["val_accuracy"], label="Validation Accuracy", color='red')
  axs[1].plot(range(epochs), history.history["loss"], label="Training Loss", color='blue')
  axs[1].plot(range(epochs), history.history["val_loss"], label="Validation Loss", color='red')



In [None]:
fig, axs = plt.subplots(1, 2)
fig.set_size_inches((15,8))
plot_metrics(axs, history, epochs)
setup_plot(axs[0], "Accuracy", [0,epochs], [0,1])
setup_plot(axs[1], "Loss", [0,epochs], [0,3])
plt.show()

# Testing the model

In [86]:
X_test, Y_test = import_data(test_dir, class_vals, image_dimension)
X_test = np.reshape(np.array(X_test), (-1, features))
Y_test = np.reshape(np.array(Y_test), (-1, 1))
X_test = (X_test - train_mean)/train_deviation

*** Processing: ./drive/MyDrive/ML/Melanoma/skin-lesions/test ***
*** Processing: nevus ***
Processing nevus-ISIC_0012092.jpg
Processing nevus-ISIC_0012095.jpg
Processing nevus-ISIC_0012147.jpg
Processing nevus-ISIC_0012149.jpg
Processing nevus-ISIC_0012152.jpg
Processing nevus-ISIC_0012357.jpg
Processing nevus-ISIC_0012216.jpg
Processing nevus-ISIC_0012484.jpg
Processing nevus-ISIC_0012551.jpg
Processing nevus-ISIC_0012493.jpg
Processing nevus-ISIC_0012654.jpg
Processing nevus-ISIC_0012656.jpg
Processing nevus-ISIC_0012837.jpg
Processing nevus-ISIC_0012708.jpg
Processing nevus-ISIC_0012836.jpg
Processing nevus-ISIC_0012722.jpg
Processing nevus-ISIC_0012803.jpg
Processing nevus-ISIC_0012904.jpg
Processing nevus-ISIC_0012967.jpg
Processing nevus-ISIC_0012941.jpg
Processing nevus-ISIC_0012903.jpg
Processing nevus-ISIC_0013159.jpg
Processing nevus-ISIC_0013164.jpg
Processing nevus-ISIC_0013070.jpg
Processing nevus-ISIC_0013045.jpg
Processing nevus-ISIC_0013109.jpg
Processing nevus-ISIC_00

In [87]:

predict = model.predict(X_test)

final = np.argmax(tf.nn.softmax(predict), axis=1)
for i in range(predict.shape[0]):
  print(f"Actual: {Y_test[i]}, Predicted: {predict[i]} softmax: {final[i]}")

Actual: [0], Predicted: [-26.65909   20.242018  10.220254] softmax: 1
Actual: [0], Predicted: [-15.911117   11.963892    6.1419873] softmax: 1
Actual: [0], Predicted: [-48.990288  37.441586  18.693718] softmax: 1
Actual: [0], Predicted: [-80.93556   62.045948  30.815203] softmax: 1
Actual: [0], Predicted: [-41.622543  31.76692   15.898066] softmax: 1
Actual: [0], Predicted: [-15.244495  11.450459   5.889041] softmax: 1
Actual: [0], Predicted: [-18.572937  14.014036   7.152002] softmax: 1
Actual: [0], Predicted: [-16.195324   12.182788    6.2498274] softmax: 1
Actual: [0], Predicted: [-23.15671   17.544472   8.891293] softmax: 1
Actual: [0], Predicted: [-60.739414  46.4908    23.151869] softmax: 1
Actual: [0], Predicted: [-16.170288  12.163507   6.240329] softmax: 1
Actual: [0], Predicted: [ 0.46471518 -1.3374559  -0.25267324] softmax: 0
Actual: [0], Predicted: [-68.43768   52.420036  26.072939] softmax: 1
Actual: [0], Predicted: [-4.85777    3.33083    1.9337976] softmax: 1
Actual: [0]