# CNN Model

This Convolutional Neural Network will identify features in an image and will pass these features onto the XGBoost model to determine the nature of the skin lesion.

## Step 1: Import Libraries

We will be importing the necessary libraries needed in order to develop this CNN.

In [72]:
# Import Necessary Libraries
import numpy as np
import tensorflow as tf
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
import kagglehub

## Step 2: Initializing Data

Process the data so that the model will be able to understand it. We will be using Kaggle's Skin Cancer Database.

In [73]:
# Step 2.1: Load Dataset
dataset_path = kagglehub.dataset_download("farjanakabirsamanta/skin-cancer-dataset") # Get Data from Kaggle Database
print("Data set path: ", dataset_path + "/HAM10000_metadata.csv")

df = pd.read_csv(dataset_path + "/HAM10000_metadata.csv")
df.head() # See data in csv form before processing

Data set path:  C:\Users\leyan\.cache\kagglehub\datasets\farjanakabirsamanta\skin-cancer-dataset\versions\1/HAM10000_metadata.csv


Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear


In [97]:
# Step 2.2: Processing Data
# We will be using Pillow to load in the data as an array
from PIL import Image

# Receive Notification after data is fully loaded into this array :)
from notifypy import Notify

# @CAN CHANGE:
NUMBER_IMAGES = 500
send_notif = True

if (send_notif):
    notification = Notify()
    notification.title = "Started Loading Data"
    notification.message = "Currently loading: " + str(NUMBER_IMAGES) + " Images"
    notification.send()

images = np.array([np.asarray(Image.open(dataset_path + "/Skin Cancer/Skin Cancer/" + id + ".jpg")) for id in df.loc[0:NUMBER_IMAGES-1, "image_id"]])
print(images[0][100, 100])

if(send_notif):
    notification = Notify()
    notification.title = "Data Finished Loading"
    notification.message = "Successfully loaded: " + str(NUMBER_IMAGES) + " Images"
    notification.send()

# Create Labels
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
df["dx"] = encoder.fit_transform(df["dx"])
df.head()


[215 170 199]


Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization
0,HAM_0000118,ISIC_0027419,2,histo,80.0,male,scalp
1,HAM_0000118,ISIC_0025030,2,histo,80.0,male,scalp
2,HAM_0002730,ISIC_0026769,2,histo,80.0,male,scalp
3,HAM_0002730,ISIC_0025661,2,histo,80.0,male,scalp
4,HAM_0001466,ISIC_0031633,2,histo,75.0,male,ear


## Step 3: Train Model

In this step, we will create the CNN models and train them to identify structures in an image.

In [None]:
# Main Model
model = Sequential([
    Conv2D(32, (5, 5), activation="relu", input_shape=(450, 600, 3), use_bias=True),
    MaxPooling2D((2, 2)),
    #Conv2D(64, (3, 3), activation="relu", use_bias=True),
    #MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation="relu"),
    Dense(1, activation="sigmoid")
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

print(images.shape)
model.fit(images, df.loc[0:NUMBER_IMAGES-1, "dx"], epochs=5)

if(send_notif):
    notification = Notify()
    notification.title("Model Finished Training")
    notification.message("The CNN model has finished training.")
    notification.send()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


(500, 450, 600, 3)
Epoch 1/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 4s/step - accuracy: 0.0000e+00 - loss: -1902030.6250
Epoch 2/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 4s/step - accuracy: 0.0000e+00 - loss: -82047008.0000
Epoch 3/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 4s/step - accuracy: 0.0000e+00 - loss: -869117888.0000
Epoch 4/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 5s/step - accuracy: 0.0000e+00 - loss: -4865954816.0000
Epoch 5/5
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m73s[0m 4s/step - accuracy: 0.0000e+00 - loss: -18413463552.0000


TypeError: 'str' object is not callable

## Step 4: Testing the Model

In this step, we will test the model for its accuracy.

In [85]:
import tkinter

# CAN CHANGE
TEST_IMAGES = 100
test_images = np.array([np.asarray(Image.open(dataset_path + "/Skin Cancer/Skin Cancer/" + id + ".jpg")) for id in df.loc[NUMBER_IMAGES:TEST_IMAGES-1, "image_id"]])

predictions = model.predict(test_images)

resultsdf = pd.DataFrame({
    "Image":    test_images,
    "Result":   predictions,
    "Actual":   df.loc[NUMBER_IMAGES, TEST_IMAGES-1, "dx"]
})



ValueError: math domain error