In [4]:
import pandas as pd

# -------------------------------
# 1. Load dataset
# -------------------------------
df = pd.read_csv("dataset/milk10k_stage1_full.csv")

# -------------------------------
# 2. Define class columns
# -------------------------------
class_cols = [
    "AKIEC", "BCC", "BEN_OTH", "BKL", "DF",
    "INF", "MAL_OTH", "MEL", "NV", "SCCKA", "VASC"
]

# -------------------------------
# 3. Create a single target column
# -------------------------------
df["target"] = df[class_cols].idxmax(axis=1)

# -------------------------------
# 4. Drop old one-hot encoded columns
# -------------------------------
df_clean = df.drop(columns=class_cols)

# -------------------------------
# 5. OPTIONAL: Save cleaned dataset
# -------------------------------
df_clean.to_csv("milk10k_stage1_clean.csv", index=False)

# Preview
df_clean.head()


Unnamed: 0,lesion_id,image_type,isic_id,attribution,copyright_license,image_manipulation,age_approx,sex,skin_tone_class,site,...,MONET_vasculature_vessels,MONET_erythema,MONET_pigmented,MONET_gel_water_drop_fluid_dermoscopy_liquid,MONET_skin_markings_pen_ink_purple_pen,diagnosis,label,image_path,metadata_vector,target
0,IL_0000652,dermoscopic,ISIC_4671410,MILK study team,CC-BY-NC,instrument only,70.0,1,1,3,...,0.016397,0.032357,0.847014,0.138121,0.148776,BCC,1,dataset/MILK10k_Training_Input\IL_0000652\ISIC...,"[70.0, 1, 1, 3]",BCC
1,IL_0003176,dermoscopic,ISIC_5371928,MILK study team,CC-BY-NC,instrument only,45.0,0,5,3,...,0.367882,0.645776,0.122108,0.719937,0.329812,BCC,1,dataset/MILK10k_Training_Input\IL_0003176\ISIC...,"[45.0, 0, 5, 3]",BCC
2,IL_0004688,dermoscopic,ISIC_3624913,MILK study team,CC-BY-NC,instrument only,50.0,1,3,4,...,0.196453,0.519808,0.058424,0.319812,0.262883,BCC,1,dataset/MILK10k_Training_Input\IL_0004688\ISIC...,"[50.0, 1, 3, 4]",BCC
3,IL_0005081,dermoscopic,ISIC_5186409,MILK study team,CC-BY-NC,instrument only,45.0,1,3,3,...,0.444257,0.272836,0.25983,0.384243,0.220156,SCCKA,9,dataset/MILK10k_Training_Input\IL_0005081\ISIC...,"[45.0, 1, 3, 3]",SCCKA
4,IL_0006177,dermoscopic,ISIC_1048297,MILK study team,CC-BY-NC,instrument only,75.0,1,3,7,...,0.391223,0.260695,0.276002,0.283951,0.661556,BCC,1,dataset/MILK10k_Training_Input\IL_0006177\ISIC...,"[75.0, 1, 3, 7]",BCC


In [5]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

weights = compute_class_weight(
    class_weight="balanced",
    classes=np.unique(df["target"]),
    y=df["target"]
)

dict(zip(np.unique(df["target"]), weights))


{'AKIEC': np.float64(1.5721572157215722),
 'BCC': np.float64(0.18888328166678683),
 'BEN_OTH': np.float64(10.826446280991735),
 'BKL': np.float64(0.8756684491978609),
 'DF': np.float64(9.160839160839162),
 'INF': np.float64(9.527272727272727),
 'MAL_OTH': np.float64(52.92929292929293),
 'MEL': np.float64(1.0585858585858585),
 'NV': np.float64(0.6385571533024617),
 'SCCKA': np.float64(1.0071112819527195),
 'VASC': np.float64(10.135396518375241)}

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.utils import resample

# Load cleaned dataset
df = pd.read_csv("milk10k_stage1_clean.csv")

# ------------------------------
# 1. Stratified Train/Val split
# ------------------------------
train_df, val_df = train_test_split(
    df,
    test_size=0.2,
    stratify=df["target"],
    random_state=42
)

# ------------------------------
# 2. Oversample minority classes
# ------------------------------
TARGET_SAMPLES = 300      # desired min samples per class

balanced_list = []

for cls in train_df["target"].unique():
    cls_df = train_df[train_df["target"] == cls]

    if len(cls_df) < TARGET_SAMPLES:
        # Oversample minority class
        cls_df = resample(
            cls_df,
            replace=True,
            n_samples=TARGET_SAMPLES,
            random_state=42
        )
    
    balanced_list.append(cls_df)

# Final balanced training dataframe
train_balanced = pd.concat(balanced_list).sample(frac=1, random_state=42).reset_index(drop=True)

# ------------------------------
# 3. OUTPUT
# ------------------------------
print(train_balanced["target"].value_counts())
print(train_balanced.head())

# ------------------------------
# 4. SAVE FILES
# ------------------------------
train_balanced.to_csv("train_balanced.csv", index=False)
val_df.to_csv("val.csv", index=False)

print("Saved: train_balanced.csv and val.csv")


target
BCC        2018
NV          597
BKL         435
SCCKA       378
MEL         360
INF         300
DF          300
VASC        300
BEN_OTH     300
MAL_OTH     300
AKIEC       300
Name: count, dtype: int64
    lesion_id   image_type       isic_id      attribution copyright_license  \
0  IL_0972770  dermoscopic  ISIC_5214703  MILK study team          CC-BY-NC   
1  IL_3673925  dermoscopic  ISIC_6898183  MILK study team          CC-BY-NC   
2  IL_0375356  dermoscopic  ISIC_4117748  MILK study team          CC-BY-NC   
3  IL_9439639  dermoscopic  ISIC_8126703  MILK study team          CC-BY-NC   
4  IL_1366322  dermoscopic  ISIC_1209022  MILK study team          CC-BY-NC   

  image_manipulation  age_approx  sex  skin_tone_class  site  ...  \
0    instrument only        25.0    1                4     6  ...   
1    instrument only        55.0    0                2     7  ...   
2    instrument only        75.0    1                2     3  ...   
3    instrument only        60.0    0   

In [9]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import numpy as np

class LesionMultimodalDataset(Dataset):
    def __init__(self, df, transform=None, use_embeddings=True):
        """
        df: DataFrame containing paths + features
        transform: torchvision transforms for images
        use_embeddings: whether to load embedding_0 ... embedding_4
        """
        self.df = df.reset_index(drop=True)
        self.transform = transform
        self.use_embeddings = use_embeddings

        # Identify MONET feature columns
        self.monet_cols = [col for col in df.columns if col.startswith("MONET_")]

        # Embedding columns
        if use_embeddings:
            self.embed_cols = [f"embedding_{i}" for i in range(5)]

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        # --------------------------
        # 1. Load Image
        # --------------------------
        image_path = row["image_path"]
        image = Image.open(image_path).convert("RGB")

        if self.transform:
            image = self.transform(image)
        else:
            image = torch.tensor(np.array(image)).permute(2,0,1) / 255.0  # fallback

        # --------------------------
        # 2. MONET clinical features
        # --------------------------
        monet_features = torch.tensor(row[self.monet_cols].values, dtype=torch.float32)

        # --------------------------
        # 3. metadata_vector (already a list like [age, sex, tone, site])
        # --------------------------
        metadata_vec = torch.tensor(eval(row["metadata_vector"]), dtype=torch.float32)

        # --------------------------
        # 4. Embeddings (optional)
        # --------------------------
        if self.use_embeddings:
            embed_vals = torch.tensor(row[self.embed_cols].values, dtype=torch.float32)
        else:
            embed_vals = torch.zeros(5)  # placeholder

        # --------------------------
        # 5. Target label (convert string â†’ integer)
        # --------------------------
        label_map = {
            "AKIEC":0, "BCC":1, "BEN_OTH":2, "BKL":3, "DF":4, 
            "INF":5, "MAL_OTH":6, "MEL":7, "NV":8, "SCCKA":9, "VASC":10
        }
        label = torch.tensor(label_map[row["target"]], dtype=torch.long)

        return {
            "image": image,
            "monet": monet_features,
            "meta": metadata_vec,
            "embedding": embed_vals,
            "label": label
        }


In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from torchvision.models import efficientnet_b3, EfficientNet_B3_Weights
import torch.optim as optim
import pandas as pd
import numpy as np
from PIL import Image

# ==========================================================
# âš¡ FORCE CUDA
# ==========================================================
assert torch.cuda.is_available(), "CUDA GPU NOT FOUND!"
device = torch.device("cuda")
print("Using GPU:", torch.cuda.get_device_name(0))


# ==========================================================
# ðŸ“Œ DATASET CLASS (Multimodal)
# ==========================================================
class LesionMultimodalDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df.reset_index(drop=True)
        self.transform = transform
        self.monet_cols = [c for c in df.columns if c.startswith("MONET_")]

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        # ---------------------
        # Image
        # ---------------------
        img = Image.open(row["image_path"]).convert("RGB")
        img = self.transform(img)

        # ---------------------
        # MONET features
        # ---------------------
        monet = torch.tensor(row[self.monet_cols].values, dtype=torch.float32)

        # ---------------------
        # Metadata
        # ---------------------
        meta = torch.tensor(eval(row["metadata_vector"]), dtype=torch.float32)

        # ---------------------
        # Label
        # ---------------------
        label_map = {
            "AKIEC":0,"BCC":1,"BEN_OTH":2,"BKL":3,"DF":4,
            "INF":5,"MAL_OTH":6,"MEL":7,"NV":8,"SCCKA":9,"VASC":10
        }
        label = torch.tensor(label_map[row["target"]], dtype=torch.long)

        return {"image": img, "monet": monet, "meta": meta, "label": label}


# ==========================================================
# ðŸ“Œ MULTIMODAL MODEL (EfficientNet-B3)
# ==========================================================
class MultimodalEfficientNetB3(nn.Module):
    def __init__(self, monet_dim, meta_dim=4, num_classes=11):
        super().__init__()

        # ---------------------
        # EfficientNet-B3 Backbone
        # ---------------------
        weights = EfficientNet_B3_Weights.IMAGENET1K_V1
        effnet = efficientnet_b3(weights=weights)

        in_feats = effnet.classifier[1].in_features
        effnet.classifier = nn.Identity()
        self.image_backbone = effnet

        self.image_proj = nn.Sequential(
            nn.Linear(in_feats, 512),
            nn.ReLU(),
            nn.BatchNorm1d(512),
            nn.Dropout(0.3),
        )

        # ---------------------
        # MONET MLP
        # ---------------------
        self.monet_mlp = nn.Sequential(
            nn.Linear(monet_dim, 128),
            nn.ReLU(),
            nn.BatchNorm1d(128),
        )

        # ---------------------
        # Metadata MLP
        # ---------------------
        self.meta_mlp = nn.Sequential(
            nn.Linear(meta_dim, 32),
            nn.ReLU(),
            nn.BatchNorm1d(32),
        )

        # ---------------------
        # Fusion MLP
        # ---------------------
        self.fusion = nn.Sequential(
            nn.Linear(512 + 128 + 32, 512),
            nn.ReLU(),
            nn.BatchNorm1d(512),
            nn.Dropout(0.4),
            nn.Linear(512, num_classes)
        )

    def forward(self, image, monet, meta):
        x_img = self.image_backbone(image)
        x_img = self.image_proj(x_img)

        x_monet = self.monet_mlp(monet)
        x_meta = self.meta_mlp(meta)

        x = torch.cat([x_img, x_monet, x_meta], dim=1)
        return self.fusion(x)


# ==========================================================
# ðŸ“Œ LOAD DATA
# ==========================================================
train_df = pd.read_csv("train_balanced.csv")
val_df   = pd.read_csv("val.csv")

monet_cols = [c for c in train_df.columns if c.startswith("MONET_")]
monet_dim = len(monet_cols)
print("MONET feature count:", monet_dim)


# ==========================================================
# ðŸ“Œ TRANSFORMS
# ==========================================================
train_tf = transforms.Compose([
    transforms.Resize((300,300)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(0.1,0.1,0.1,0.05),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])

val_tf = transforms.Compose([
    transforms.Resize((300,300)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])


# ==========================================================
# ðŸ“Œ DATALOADERS
# ==========================================================
train_ds = LesionMultimodalDataset(train_df, train_tf)
val_ds   = LesionMultimodalDataset(val_df, val_tf)

train_loader = DataLoader(train_ds, batch_size=16, shuffle=True, num_workers=4, pin_memory=True)
val_loader   = DataLoader(val_ds, batch_size=16, shuffle=False, num_workers=4, pin_memory=True)


# ==========================================================
# ðŸ“Œ MODEL + LOSS + OPTIMIZER
# ==========================================================
model = MultimodalEfficientNetB3(monet_dim).to(device)

# class weights
weights_dict = {
 'AKIEC':1.5721572,'BCC':0.18888328,'BEN_OTH':10.82644628,'BKL':0.87566845,
 'DF':9.16083916,'INF':9.52727272,'MAL_OTH':52.92929292,'MEL':1.05858585,
 'NV':0.63855715,'SCCKA':1.00711128,'VASC':10.13539651
}
class_order = ["AKIEC","BCC","BEN_OTH","BKL","DF","INF","MAL_OTH","MEL","NV","SCCKA","VASC"]
w = torch.tensor([weights_dict[c] for c in class_order], dtype=torch.float32).cuda()

criterion = nn.CrossEntropyLoss(weight=w)
optimizer = optim.AdamW(model.parameters(), lr=2e-4)


# ==========================================================
# ðŸ“Œ TRAINING + SAVE BEST MODEL
# ==========================================================
best_acc = 0

for epoch in range(10):
    model.train()
    total, correct = 0, 0

    for batch in train_loader:
        img  = batch["image"].cuda()
        mon  = batch["monet"].cuda()
        meta = batch["meta"].cuda()
        lbl  = batch["label"].cuda()

        optimizer.zero_grad()
        logits = model(img, mon, meta)
        loss = criterion(logits, lbl)
        loss.backward()
        optimizer.step()

        pred = logits.argmax(1)
        correct += (pred == lbl).sum().item()
        total += lbl.size(0)

    train_acc = correct / total
    print(f"[Epoch {epoch+1}] Train Acc: {train_acc:.4f}")

    # ----------------- Validation -----------------
    model.eval()
    total, correct = 0, 0
    with torch.no_grad():
        for batch in val_loader:
            img  = batch["image"].cuda()
            mon  = batch["monet"].cuda()
            meta = batch["meta"].cuda()
            lbl  = batch["label"].cuda()

            logits = model(img, mon, meta)
            pred = logits.argmax(1)
            correct += (pred == lbl).sum().item()
            total += lbl.size(0)

    val_acc = correct / total
    print(f"[Epoch {epoch+1}] Val Acc: {val_acc:.4f}")

    # SAVE BEST MODEL
    if val_acc > best_acc:
        best_acc = val_acc
        torch.save({
            "model_state_dict": model.state_dict(),
            "optimizer_state_dict": optimizer.state_dict(),
            "epoch": epoch,
        }, "best_multimodal_effb3.pth")

        print("ðŸ”¥ Saved best model -> best_multimodal_effb3.pth")


print("Training complete! Best Val Acc:", best_acc)


Using GPU: NVIDIA GeForce RTX 4060 Laptop GPU
MONET feature count: 7


In [1]:
import torch
import torch.nn.functional as F
from torchvision import transforms
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import cv2
from ipywidgets import FileUpload, VBox, Button
from IPython.display import display

# ========================================
# LOAD TRAINED MODEL
# ========================================
from train import MultimodalEfficientNetB3   # your model definition

CHECKPOINT = "best_multimodal_effb3.pth"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
MONET_DIM = 20   # <-- Update if different

model = MultimodalEfficientNetB3(monet_dim=MONET_DIM).to(DEVICE)
checkpoint = torch.load(CHECKPOINT, map_location=DEVICE)
model.load_state_dict(checkpoint["model_state_dict"])
model.eval()

print("Model Loaded âœ”")


# ========================================
# GRAD-CAM IMPLEMENTATION
# ========================================
class GradCAM:
    def __init__(self, model, target_layer):
        self.model = model
        self.target_layer = target_layer
        self.gradients = None
        self.activations = None
        
        target_layer.register_forward_hook(self.save_activation)
        target_layer.register_backward_hook(self.save_gradient)

    def save_activation(self, module, input, output):
        self.activations = output

    def save_gradient(self, module, grad_input, grad_output):
        self.gradients = grad_output[0]

    def generate(self, img_tensor, monet, meta):
        logits = self.model(img_tensor, monet, meta)
        pred_class = logits.argmax(dim=1).item()

        self.model.zero_grad()
        logits[0, pred_class].backward()

        gradients = self.gradients[0]       # [C,H,W]
        activations = self.activations[0]   # [C,H,W]

        weights = gradients.mean(dim=(1,2))  # GAP

        cam = torch.zeros(activations.shape[1:], dtype=torch.float32).to(DEVICE)

        for w, act in zip(weights, activations):
            cam += w * act

        cam = torch.relu(cam)
        cam = cam.cpu().detach().numpy()
        cam = (cam - cam.min()) / (cam.max() + 1e-8)

        return cam, pred_class


# ========================================
# IMAGE TRANSFORM
# ========================================
transform = transforms.Compose([
    transforms.Resize((300,300)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])


# ========================================
# UPLOAD WIDGET
# ========================================
uploader = FileUpload(accept='.jpg,.png,.jpeg', multiple=False)

def on_upload_change(change):
    if uploader.value:
        file = list(uploader.value.values())[0]
        img_bytes = file['content']
        img = Image.open(BytesIO(img_bytes)).convert("RGB")

        # Display uploaded image
        plt.figure(figsize=(4,4))
        plt.imshow(img)
        plt.title("Uploaded Image")
        plt.axis("off")
        plt.show()

        run_gradcam_on_image(img)


def run_gradcam_on_image(img):
    orig = np.array(img)
    img_tensor = transform(img).unsqueeze(0).to(DEVICE)

    # Dummy MONET + metadata for GradCAM
    monet = torch.zeros((1, MONET_DIM)).float().to(DEVICE)
    meta  = torch.tensor([[30,1,3,5]], dtype=torch.float32).to(DEVICE)

    # GradCAM target
    target_layer = model.image_backbone.features[-1]
    cam_gen = GradCAM(model, target_layer)

    cam, pred_class = cam_gen.generate(img_tensor, monet, meta)

    cam_resized = cv2.resize(cam, (orig.shape[1], orig.shape[0]))
    heatmap = cv2.applyColorMap(np.uint8(cam_resized * 255), cv2.COLORMAP_JET)
    overlay = (orig * 0.6 + heatmap * 0.4).astype(np.uint8)

    # Display results
    plt.figure(figsize=(15,5))

    plt.subplot(1,3,1)
    plt.imshow(orig)
    plt.title("Original")
    plt.axis("off")

    plt.subplot(1,3,2)
    plt.imshow(cam_resized, cmap="jet")
    plt.title("Grad-CAM Heatmap")
    plt.axis("off")

    plt.subplot(1,3,3)
    plt.imshow(overlay)
    plt.title("Overlay")
    plt.axis("off")

    plt.show()

    print(f"Predicted Class Index: {pred_class}")


uploader.observe(on_upload_change, names='value')

display(VBox([uploader]))


ModuleNotFoundError: No module named 'ipywidgets'