In [15]:
# Cell 1: Install Required Libraries
!pip install tensorflow kagglehub lime gradio -q

In [16]:
# Cell 2: Imports and GPU Check
import pandas as pd
import numpy as np
import os
from glob import glob
from PIL import Image
import matplotlib.pyplot as plt
import gradio as gr

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

# LIME imports
from lime import lime_image
from skimage.segmentation import mark_boundaries

# kagglehub import
import kagglehub

# Check for GPU
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  print('GPU device not found. Please ensure you have selected a GPU runtime.')
else:
  print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [7]:
# Cell 3: Download and Prepare the Dataset

# --- Configuration ---
IMG_WIDTH, IMG_HEIGHT = 224, 224

# --- Download dataset using kagglehub ---
print("Downloading dataset from KaggleHub...")
dataset_path = kagglehub.dataset_download("kmader/skin-cancer-mnist-ham10000")
print(f"Dataset downloaded to: {dataset_path}")

# --- Load metadata and create file paths ---
metadata_path = os.path.join(dataset_path, 'HAM10000_metadata.csv')
df = pd.read_csv(metadata_path)

image_paths = glob(os.path.join(dataset_path, '*', '*.jpg'))
image_id_to_path = {os.path.splitext(os.path.basename(p))[0]: p for p in image_paths}
df['image_path'] = df['image_id'].map(image_id_to_path.get)

# --- Create a simplified binary target ---
# 1 for malignant (mel, bcc, akiec), 0 for benign
df['is_malignant'] = df['dx'].apply(lambda x: 1 if x in ['mel', 'bcc', 'akiec'] else 0)
df['is_malignant'] = df['is_malignant'].astype(str) # For flow_from_dataframe

df = df.dropna(subset=['image_path']) # Drop rows with missing images

# --- Split the data ---
train_df, val_df = train_test_split(
    df, test_size=0.2, random_state=42, stratify=df['is_malignant']
)

print(f"Training samples: {len(train_df)}")
print(f"Validation samples: {len(val_df)}")

Downloading dataset from KaggleHub...
Using Colab cache for faster access to the 'skin-cancer-mnist-ham10000' dataset.
Dataset downloaded to: /kaggle/input/skin-cancer-mnist-ham10000
Training samples: 8012
Validation samples: 2003


In [9]:
# Cell 4: Create Data Generators and Build the Model

# --- Data Augmentation ---
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

val_datagen = ImageDataGenerator(rescale=1./255)

# --- Create Generators ---
BATCH_SIZE = 32
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col='image_path',
    y_col='is_malignant',
    target_size=(IMG_WIDTH, IMG_HEIGHT),
    batch_size=BATCH_SIZE,
    class_mode='binary'
)

validation_generator = val_datagen.flow_from_dataframe(
    dataframe=val_df,
    x_col='image_path',
    y_col='is_malignant',
    target_size=(IMG_WIDTH, IMG_HEIGHT),
    batch_size=BATCH_SIZE,
    class_mode='binary',
    shuffle=False
)

# --- Model Building (Transfer Learning) ---
def build_model():
    base_model = MobileNetV2(
        weights='imagenet',
        include_top=False,
        input_shape=(IMG_WIDTH, IMG_HEIGHT, 3)
    )
    base_model.trainable = False # Freeze base layers

    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.5)(x)
    predictions = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=base_model.input, outputs=predictions)

    model.compile(
        optimizer=Adam(learning_rate=0.0001),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    return model

model = build_model()
model.summary()

Found 8012 validated image filenames belonging to 2 classes.
Found 2003 validated image filenames belonging to 2 classes.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


In [10]:
# Cell 5: Train the Model

EPOCHS = 10 # You can increase this if you have time

history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // BATCH_SIZE,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // BATCH_SIZE,
    epochs=EPOCHS
)

print("--- Model Training Finished ---")

  self._warn_if_super_not_called()


Epoch 1/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m225s[0m 828ms/step - accuracy: 0.7698 - loss: 0.5379 - val_accuracy: 0.8130 - val_loss: 0.4002
Epoch 2/10
[1m  1/250[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m10s[0m 41ms/step - accuracy: 0.7188 - loss: 0.5444



[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 53ms/step - accuracy: 0.7188 - loss: 0.5444 - val_accuracy: 0.8140 - val_loss: 0.3999
Epoch 3/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 565ms/step - accuracy: 0.8147 - loss: 0.4089 - val_accuracy: 0.8065 - val_loss: 0.3841
Epoch 4/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 55ms/step - accuracy: 0.8438 - loss: 0.4288 - val_accuracy: 0.8070 - val_loss: 0.3844
Epoch 5/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 565ms/step - accuracy: 0.8296 - loss: 0.3791 - val_accuracy: 0.8135 - val_loss: 0.3745
Epoch 6/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 53ms/step - accuracy: 0.8438 - loss: 0.3410 - val_accuracy: 0.8125 - val_loss: 0.3744
Epoch 7/10
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m202s[0m 810ms/step - accuracy: 0.8342 -

In [17]:
# Cell 6: Create the Prediction Function for the App

def preprocess_for_prediction(image):
    """Preprocesses a single image for the model."""
    image = image.resize((224, 224))
    image_array = tf.keras.preprocessing.image.img_to_array(image)
    image_array = np.expand_dims(image_array, axis=0)
    image_array /= 255.0
    return image_array

def get_lime_explanation(image_array, model):
    """Generates a LIME explanation."""
    explainer = lime_image.LimeImageExplainer()

    def lime_prediction_fn(images):
        preds = model.predict(images)
        return np.hstack((1 - preds, preds))

    explanation = explainer.explain_instance(
        image_array[0], lime_prediction_fn, top_labels=1, hide_color=0, num_samples=1000
    )
    temp, mask = explanation.get_image_and_mask(
        explanation.top_labels[0], positive_only=True, num_features=5, hide_rest=False
    )
    return mark_boundaries(temp / 2 + 0.5, mask)

# This is the master function for Gradio
def predict_and_explain(input_image):
    """Takes an image, gets a prediction and explanation, and returns them."""

    # Preprocess, predict, and get confidence
    preprocessed_image = preprocess_for_prediction(input_image)
    prediction = model.predict(preprocessed_image)[0][0]

    # Create the prediction label
    confidence_malignant = prediction
    confidence_benign = 1 - prediction

    if confidence_malignant > 0.5:
        risk_level = "High Risk"
        advice = "⚠️ High Risk Detected: Strongly recommend consulting a dermatologist for a professional evaluation."
    else:
        risk_level = "Low Risk"
        advice = "✅ Low Risk Detected: Continue with regular skin checks and consult a doctor if you notice any changes."

    # Format the confidence scores for display
    confidences = {
        'Malignant': float(confidence_malignant),
        'Benign': float(confidence_benign)
    }

    # Get LIME explanation
    explanation_image = get_lime_explanation(preprocessed_image, model)

    return confidences, explanation_image, advice

In [18]:
# Cell 7: Launch the DermaDetectAI App!

# Define the user interface
with gr.Blocks(theme=gr.themes.Soft()) as app:
    gr.Markdown("# 🔬 DermaDetectAI: Skin Lesion Analyzer")
    gr.Markdown("Upload a skin lesion image to get a risk assessment and an explanation of the prediction. **This is not a medical diagnosis.**")

    with gr.Row():
        image_input = gr.Image(type="pil", label="Upload Image")
        with gr.Column():
            confidence_output = gr.Label(label="Risk Confidence")
            explanation_output = gr.Image(label="Prediction Explanation (LIME)")

    advice_output = gr.Markdown(label="Suggested Action Plan")

    # Connect the UI to the prediction function
    image_input.change(
        predict_and_explain,
        inputs=image_input,
        outputs=[confidence_output, explanation_output, advice_output]
    )

# Launch the app
app.launch(debug=True, share=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://45cc7df23c94ddeac5.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step


  0%|          | 0/1000 [00:00<?, ?it/s]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70

Traceback (most recent call last):
  File "/usr/local/lib/python3.12/dist-packages/gradio/queueing.py", line 759, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/gradio/route_utils.py", line 354, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/gradio/blocks.py", line 2116, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/gradio/blocks.py", line 1623, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/anyio/to_thread.py", line 56, in run_sync
    return await get_async_backend().run_sync_in_worker_thread(
           ^^^^^

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step


  0%|          | 0/1000 [00:00<?, ?it/s]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58

