In [2]:
#import all libraries necessary

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from glob import glob
import seaborn as sns
from PIL import Image

np.random.seed(42)
from sklearn.metrics import confusion_matrix

import tensorflow as tf
from tensorflow.keras.utils import to_categorical # used for converting labels to one-hot-encoding
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization
from tensorflow.keras import regularizers
from sklearn.model_selection import train_test_split
from scipy import stats
from sklearn.preprocessing import LabelEncoder
SIZE = 32

In [3]:
skin_df = pd.read_csv('HAM10000_metadata.csv')
#load in the data

In [4]:
le = LabelEncoder()
le.fit(skin_df['dx'])
LabelEncoder()
print(list(le.classes_))
 
skin_df['label'] = le.transform(skin_df["dx"]) 
print(skin_df.sample(10))

#assign labels (0-6) to string labels like akiec, bcc, and more

['akiec', 'bcc', 'bkl', 'df', 'mel', 'nv', 'vasc']
        lesion_id      image_id   dx    dx_type   age     sex  \
1617  HAM_0007180  ISIC_0033272  mel      histo  65.0    male   
8128  HAM_0007195  ISIC_0031923   nv      histo  40.0  female   
2168  HAM_0001835  ISIC_0026652  mel      histo  65.0    male   
1090  HAM_0000465  ISIC_0030583  bkl  consensus  35.0  female   
7754  HAM_0001720  ISIC_0034010   nv      histo  45.0    male   
8071  HAM_0006333  ISIC_0024424   nv      histo  35.0    male   
7423  HAM_0004548  ISIC_0032832   nv      histo  45.0  female   
8984  HAM_0006526  ISIC_0026671   nv      histo  55.0    male   
2310  HAM_0003102  ISIC_0032389  mel      histo  65.0    male   
7256  HAM_0004260  ISIC_0025525   nv      histo  65.0    male   

         localization  label  
1617             face      4  
8128  lower extremity      5  
2168             back      4  
1090            trunk      2  
7754          abdomen      5  
8071            trunk      5  
7423  upper extr

In [5]:
from sklearn.utils import resample
print(skin_df['label'].value_counts())

#finds how much of each skin cancer type is in the data

label
5    6705
4    1113
2    1099
1     514
0     327
6     142
3     115
Name: count, dtype: int64


In [6]:
df_0 = skin_df[skin_df['label'] == 0]
df_1 = skin_df[skin_df['label'] == 1]
df_2 = skin_df[skin_df['label'] == 2]
df_3 = skin_df[skin_df['label'] == 3]
df_4 = skin_df[skin_df['label'] == 4]
df_5 = skin_df[skin_df['label'] == 5]
df_6 = skin_df[skin_df['label'] == 6]

In [7]:
n_samples=500
df_0_balanced = resample(df_0, replace=True, n_samples=n_samples, random_state=42) 
df_1_balanced = resample(df_1, replace=True, n_samples=n_samples, random_state=42) 
df_2_balanced = resample(df_2, replace=True, n_samples=n_samples, random_state=42)
df_3_balanced = resample(df_3, replace=True, n_samples=n_samples, random_state=42)
df_4_balanced = resample(df_4, replace=True, n_samples=n_samples, random_state=42)
df_5_balanced = resample(df_5, replace=True, n_samples=n_samples, random_state=42)
df_6_balanced = resample(df_6, replace=True, n_samples=n_samples, random_state=42)

In [8]:
skin_df_balanced = pd.concat([df_0_balanced, df_1_balanced, 
                              df_2_balanced, df_3_balanced, 
                              df_4_balanced, df_5_balanced, df_6_balanced])

# Data augmentation: balance the data, now every skin cancer type has 500 samples 

In [9]:
skin_df_balanced.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,label
9789,HAM_0003136,ISIC_0026645,akiec,histo,65.0,male,back,0
9957,HAM_0006587,ISIC_0025780,akiec,histo,70.0,male,face,0
9793,HAM_0005505,ISIC_0024450,akiec,histo,50.0,male,upper extremity,0
9758,HAM_0003455,ISIC_0027896,akiec,histo,75.0,male,hand,0
9875,HAM_0005459,ISIC_0029268,akiec,histo,85.0,male,upper extremity,0


In [10]:
print(skin_df_balanced['label'].value_counts())

label
0    500
1    500
2    500
3    500
4    500
5    500
6    500
Name: count, dtype: int64


In [11]:
image_path = {os.path.splitext(os.path.basename(x))[0]: x
                     for x in glob(os.path.join('HAM10000/', '*', '*.jpg'))}

In [16]:
#Define the path and add as a new column
skin_df_balanced['path'] = skin_df['image_id'].map(image_path.get)
#Use the path to read images.
skin_df_balanced['image'] = skin_df_balanced['path'].map(lambda x: np.asarray(Image.open(x).resize((SIZE,SIZE))))

AttributeError: 'NoneType' object has no attribute 'read'

In [133]:
#convert dataframe column of images into numpy array
X = np.asarray(skin_df_balanced['image'].tolist())
X = X/255.  # scale values to 0-1. Using standardscaler or other scaling methods.
Y=skin_df_balanced['label']  #assign label values to Y
Y_cat = to_categorical(Y, num_classes=7) #Convert to categorical as this is a multiclass classification problem
#Split to training and testing
x_train, x_test, y_train, y_test = train_test_split(X, Y_cat, test_size=0.2, random_state=42)

x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.25, random_state=42) # 0.25 x 0.8 = 0.2

#splitting data into test, validation, and train

In [134]:
num_classes = 7

model = Sequential()
model.add(Conv2D(256, (3, 3), activation="relu", input_shape=(SIZE, SIZE, 3)))
model.add(MaxPool2D(pool_size=(2, 2)))  
model.add(Dropout(0.3))

model.add(Conv2D(128, (3, 3),activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))  
model.add(Dropout(0.3))

model.add(Conv2D(64, (3, 3),activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))  
model.add(Dropout(0.3))
model.add(Flatten())

model.add(Dense(32))
model.add(Dense(7, activation='softmax'))
model.summary()

#creating the model

  super().__init__(


In [135]:
model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['acc'])

#assign loss function and optimizers to improve preformance

In [141]:
batch_size = 32
epochs = 75

#assigns values for how the model trains and how many iterations it trains for

In [143]:
history = model.fit(
    x_train, y_train,
    epochs=epochs,
    batch_size = batch_size,
    validation_data=(x_val, y_val),
    verbose=2)
#trains the model

Epoch 1/75
66/66 - 4s - 59ms/step - acc: 0.8386 - loss: 0.4430 - val_acc: 0.7529 - val_loss: 0.8132
Epoch 2/75
66/66 - 4s - 62ms/step - acc: 0.8243 - loss: 0.4637 - val_acc: 0.7529 - val_loss: 0.8231
Epoch 3/75
66/66 - 4s - 61ms/step - acc: 0.8300 - loss: 0.4668 - val_acc: 0.7414 - val_loss: 0.8185
Epoch 4/75
66/66 - 4s - 63ms/step - acc: 0.8210 - loss: 0.4938 - val_acc: 0.7429 - val_loss: 0.8402
Epoch 5/75
66/66 - 4s - 61ms/step - acc: 0.8305 - loss: 0.4500 - val_acc: 0.7457 - val_loss: 0.8147
Epoch 6/75
66/66 - 4s - 64ms/step - acc: 0.8286 - loss: 0.4732 - val_acc: 0.7386 - val_loss: 0.8204
Epoch 7/75
66/66 - 4s - 62ms/step - acc: 0.8333 - loss: 0.4497 - val_acc: 0.7457 - val_loss: 0.8183
Epoch 8/75
66/66 - 4s - 61ms/step - acc: 0.8229 - loss: 0.4695 - val_acc: 0.7286 - val_loss: 0.8145
Epoch 9/75
66/66 - 4s - 61ms/step - acc: 0.8305 - loss: 0.4776 - val_acc: 0.7414 - val_loss: 0.8257
Epoch 10/75
66/66 - 4s - 66ms/step - acc: 0.8300 - loss: 0.4548 - val_acc: 0.7500 - val_loss: 0.8699

In [144]:
model.evaluate(x_test, y_test)

[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - acc: 0.7684 - loss: 0.8684


[0.8559561371803284, 0.7714285850524902]

In [147]:
model.save('skin_cancer_model.keras')

In [155]:
import gradio as gr
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
from PIL import Image

# Load the trained model
model = load_model('skin_cancer_model.h5')

# Define class labels
class_labels = [
    'Actinic keratoses (akiec)', 'Basal cell carcinoma (bcc)', 
    'Benign keratosis-like lesions (bkl)', 'Dermatofibroma (df)', 
    'Melanoma (mel)', 'Melanocytic nevi (nv)', 'Vascular lesions (vasc)'
]

# Define the prediction function
def predict_skin_cancer(img):
    img = img.resize((32, 32))  # Resize image to match the input size of the model
    img = np.array(img) / 255.0  # Normalize the image
    img = np.expand_dims(img, axis=0)  # Add batch dimension

    prediction = model.predict(img)
    predicted_class = class_labels[np.argmax(prediction)]
    confidence = np.max(prediction)

    return predicted_class, confidence

# Create Gradio interface
iface = gr.Interface(
    fn=predict_skin_cancer,
    inputs=gr.Image(type="pil"),
    outputs=[gr.Textbox(label="Predicted Class"), gr.Textbox(label="Confidence")],
    title="SPOT - Medical Image Diagnosis",
    description="Upload an image of a skin lesion and the model will predict the type of skin cancer."
)

# Launch the interface
iface.launch()




Running on local URL:  http://127.0.0.1:7870

To create a public link, set `share=True` in `launch()`.




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
