In [None]:
# ==============================
# Module 1: Install Dependencies
# ==============================
!pip install tensorflow --quiet


In [None]:
# ==============================
# Module 2: Import Libraries
# ==============================
import os
import zipfile
import requests
import shutil
import random
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.applications import DenseNet169
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from IPython.display import display
from ipywidgets import FileUpload
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
import numpy as np
from PIL import Image
import io



In [None]:
# ==============================
# Module 3: Download & Extract Dataset
# ==============================
DATASET_URL = "https://prod-dcd-datasets-cache-zipfiles.s3.eu-west-1.amazonaws.com/mhjyrn35p4-2.zip"
ZIP_FILE = "oral_images.zip"
DATASET_DIR = "dataset"

def download_dataset():
    if not os.path.exists(ZIP_FILE):
        print("Downloading dataset...")
        r = requests.get(DATASET_URL, stream=True)
        with open(ZIP_FILE, "wb") as f:
            shutil.copyfileobj(r.raw, f)
        print("Download complete.")
    else:
        print("ZIP file already exists, skipping download.")

def extract_dataset():
    if not os.path.exists("oral_images"):
        print("Extracting dataset...")
        with zipfile.ZipFile(ZIP_FILE, 'r') as zip_ref:
            zip_ref.extractall("oral_images")
        print("Extraction complete.")
    else:
        print(" Dataset already extracted.")

download_dataset()
extract_dataset()


In [None]:
# ==============================
# Module 4: Organize Dataset
# ==============================
def create_folders():
    for split in ["train", "test"]:
        for cls in ["Cancer", "Non-Cancer"]:
            os.makedirs(os.path.join(DATASET_DIR, split, cls), exist_ok=True)

def move_images(src_dir, train_dir, test_dir, split_ratio=0.8):
    images = [f for f in os.listdir(src_dir) if f.lower().endswith((".jpg", ".png", ".jpeg"))]
    random.shuffle(images)
    split_idx = int(len(images) * split_ratio)
    for img in images[:split_idx]:
        shutil.copy(os.path.join(src_dir, img), os.path.join(train_dir, img))
    for img in images[split_idx:]:
        shutil.copy(os.path.join(src_dir, img), os.path.join(test_dir, img))

def split_dataset():
    print(" Organizing dataset into train/test...")
    benign_path, malignant_path = None, None
    for root, dirs, files in os.walk("oral_images"):
        for d in dirs:
            if "Benign" in d or "benign" in d:
                benign_path = os.path.join(root, d)
            elif "Malignant" in d or "malignant" in d:
                malignant_path = os.path.join(root, d)
    if not benign_path or not malignant_path:
        raise Exception("Could not find Benign/Malignant folders in dataset.")

    move_images(benign_path, os.path.join(DATASET_DIR, "train", "Non-Cancer"),
                os.path.join(DATASET_DIR, "test", "Non-Cancer"))
    move_images(malignant_path, os.path.join(DATASET_DIR, "train", "Cancer"),
                os.path.join(DATASET_DIR, "test", "Cancer"))
    print(" Dataset organized successfully.")

create_folders()
split_dataset()


In [None]:
# ==============================
# Module 5: Train DenseNet169 Model
# ==============================
IMG_SIZE = (128, 128)
BATCH_SIZE = 32
EPOCHS = 10

# Data generators
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    os.path.join(DATASET_DIR, "train"),
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary'
)
test_generator = test_datagen.flow_from_directory(
    os.path.join(DATASET_DIR, "test"),
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary'
)

# Base model
base_model = DenseNet169(weights='imagenet', include_top=False, input_shape=(128, 128, 3))
for layer in base_model.layers:
    layer.trainable = False

# Custom layers
x = Flatten()(base_model.output)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(1, activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=predictions)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Callbacks
os.makedirs("model", exist_ok=True)
checkpoint = ModelCheckpoint("model/densenet169_binary_classifier.h5", monitor='val_accuracy', save_best_only=True, mode='max')
early_stopping = EarlyStopping(monitor='val_loss', patience=5, mode='min')

# Training
history = model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=test_generator,
    callbacks=[checkpoint, early_stopping]
)


In [None]:
# ==============================
# Module 6: Save & Download Model
# ==============================
model.save("final_densenet169_model.h5")
print("Model saved as final_densenet169_model.h5")

from google.colab import files
files.download("final_densenet169_model.h5")


In [None]:


# Load model once
model = load_model("final_densenet169_model.h5")

# Get expected input shape (e.g., 128x128x3)
input_shape = model.input_shape[1:3]  # (height, width)

# Upload widget
upload = FileUpload(accept='image/*', multiple=False)
display(upload)

def handle_upload(change):
    for name, file_info in upload.value.items():
        # Resize to match model input
        img = Image.open(io.BytesIO(file_info['content'])).resize(input_shape)
        img_array = image.img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0)
        img_array /= 255.0

        # Predict
        prediction = model.predict(img_array)[0][0]  # Extract scalar value

        # Decode label
        label = "NonCancerous" if prediction >= 0.5 else "Cancerous"
        confidence = round(prediction * 100, 2) if prediction >= 0.5 else round((1 - prediction) * 100, 2)

        # Output
        print(f"Prediction: {label}")
        print(f"Confidence: {confidence}%")

# Trigger prediction when file is uploaded
upload.observe(handle_upload, names='value')