In [None]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm

# test pytorch device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda


In [13]:
df = pd.read_csv("../data/camelyon17_v1.0/metadata.csv")

In [14]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 455954 entries, 0 to 455953
Data columns (total 9 columns):
 #   Column      Non-Null Count   Dtype
---  ------      --------------   -----
 0   Unnamed: 0  455954 non-null  int64
 1   patient     455954 non-null  int64
 2   node        455954 non-null  int64
 3   x_coord     455954 non-null  int64
 4   y_coord     455954 non-null  int64
 5   tumor       455954 non-null  int64
 6   slide       455954 non-null  int64
 7   center      455954 non-null  int64
 8   split       455954 non-null  int64
dtypes: int64(9)
memory usage: 31.3 MB


In [None]:
def make_path(row):
    p = f"{row.patient:03d}"
    return (
        f"..data/camelyon17_v1.0/patches/"
        f"patient_{p}_node_{row.node}/"
        f"patch_patient_{p}_node_{row.node}_x_{row.x_coord}_y_{row.y_coord}.png"
    )

df["img_path"] = df.apply(make_path, axis=1)
df.head()

Unnamed: 0.1,Unnamed: 0,patient,node,x_coord,y_coord,tumor,slide,center,split,img_path
0,0,4,4,3328,21792,1,0,0,0,data/camelyon17_v1.0/patches/patient_004_node_...
1,1,4,4,3200,22272,1,0,0,0,data/camelyon17_v1.0/patches/patient_004_node_...
2,2,4,4,3168,22272,1,0,0,0,data/camelyon17_v1.0/patches/patient_004_node_...
3,3,4,4,3328,21760,1,0,0,0,data/camelyon17_v1.0/patches/patient_004_node_...
4,4,4,4,3232,22240,1,0,0,0,data/camelyon17_v1.0/patches/patient_004_node_...


In [10]:
cat_cols = ["patient", "node", "slide", "center"]
cont_cols = ["x_coord", "y_coord"]

# Create label encoders for categorical columns to map to 0-indexed consecutive integers
from sklearn.preprocessing import LabelEncoder

label_encoders = {}
for col in cat_cols:
    le = LabelEncoder()
    df[f"{col}_encoded"] = le.fit_transform(df[col])
    label_encoders[col] = le
    print(f"{col}: {df[col].nunique()} unique values -> encoded to 0-{len(le.classes_)-1}")

# Update to use encoded columns
cat_cols_encoded = [f"{col}_encoded" for col in cat_cols]

# IMPORTANT: Recreate train/val splits AFTER encoding
train_df = df[df["split"] == 0].copy()
val_df = df[df["split"] == 1].copy()

print("\nTrain set size:", len(train_df))
print("Validation set size:", len(val_df))

scaler = StandardScaler()
train_cont = scaler.fit_transform(train_df[cont_cols])
val_cont = scaler.transform(val_df[cont_cols])

patient: 43 unique values -> encoded to 0-42
node: 5 unique values -> encoded to 0-4
slide: 50 unique values -> encoded to 0-49
center: 5 unique values -> encoded to 0-4

Train set size: 410359
Validation set size: 45595


In [11]:
class CamelyonDataset(Dataset):
    def __init__(self, df, cont_array, transform=None):
        self.df = df.reset_index(drop=True)
        self.cont = cont_array.astype(np.float32)
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = Image.open(row.img_path).convert("RGB")
        if self.transform:
            img = self.transform(img)

        # Use encoded categorical columns
        cat = torch.tensor([int(row[col]) for col in cat_cols_encoded], dtype=torch.long)
        cont = torch.tensor(self.cont[idx], dtype=torch.float32)
        y = torch.tensor(row.tumor, dtype=torch.float32)
        return img, cat, cont, y

In [12]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

train_ds = CamelyonDataset(train_df, train_cont, transform=transform)
val_ds = CamelyonDataset(val_df, val_cont, transform=transform)

# On Windows, num_workers must be 0 to avoid multiprocessing issues in notebooks
train_loader = DataLoader(train_ds, batch_size=64, shuffle=True, num_workers=0)
val_loader = DataLoader(val_ds, batch_size=64, shuffle=False, num_workers=0)

In [13]:
class MultiModalModel(nn.Module):
    def __init__(self, cat_sizes, cont_dim, emb_dim=8, hidden=128):
        super().__init__()
        # CNN
        self.cnn = models.resnet18(weights=None)
        self.cnn.fc = nn.Identity()
        cnn_out = 512

        # Embeddings categoriels
        self.embeddings = nn.ModuleList([
            nn.Embedding(size, emb_dim) for size in cat_sizes
        ])
        emb_out = emb_dim * len(cat_sizes)

        # MLP meta
        self.meta_mlp = nn.Sequential(
            nn.Linear(emb_out + cont_dim, hidden),
            nn.ReLU(),
            nn.Dropout(0.2),
        )

        # Tete finale
        self.classifier = nn.Sequential(
            nn.Linear(cnn_out + hidden, 1)
        )

    def forward(self, img, cat, cont):
        img_feat = self.cnn(img)
        emb_list = [emb(cat[:, i]) for i, emb in enumerate(self.embeddings)]
        meta = torch.cat(emb_list + [cont], dim=1)
        meta_feat = self.meta_mlp(meta)
        fused = torch.cat([img_feat, meta_feat], dim=1)
        logits = self.classifier(fused).squeeze(1)
        return logits


In [14]:
cat_sizes = [df[c].nunique() for c in cat_cols_encoded]
print("Embedding sizes:", cat_sizes)

model = MultiModalModel(cat_sizes, cont_dim=len(cont_cols)).to(device)

criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

Embedding sizes:

 [43, 5, 50, 5]


In [15]:
def run_epoch(loader, train=True):
    model.train() if train else model.eval()
    total_loss = 0.0
    correct = 0
    total = 0

    mode = "Train" if train else "Val"
    pbar = tqdm(loader, desc=f"{mode}", leave=True)

    for imgs, cats, conts, y in pbar:
        imgs, cats, conts, y = imgs.to(device), cats.to(device), conts.to(device), y.to(device)
        logits = model(imgs, cats, conts)
        loss = criterion(logits, y)

        if train:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        total_loss += loss.item() * y.size(0)
        preds = (torch.sigmoid(logits) > 0.5).float()
        correct += (preds == y).sum().item()
        total += y.size(0)

        # Update progress bar with current metrics
        pbar.set_postfix({
            'loss': f'{total_loss/total:.4f}',
            'acc': f'{correct/total:.4f}'
        })

    return total_loss / total, correct / total

print("Starting training...")
for epoch in range(5):
    print(f"\nEpoch {epoch+1}/5")
    tr_loss, tr_acc = run_epoch(train_loader, train=True)
    va_loss, va_acc = run_epoch(val_loader, train=False)
    print(f"Epoch {epoch+1} Summary | Train Loss: {tr_loss:.4f} Acc: {tr_acc:.4f} | Val Loss: {va_loss:.4f} Acc: {va_acc:.4f}")

print("\nTraining completed!")

Starting training...

Epoch 1/5


Train: 100%|██████████| 6412/6412 [1:03:17<00:00,  1.69it/s, loss=0.0926, acc=0.9660]
Val: 100%|██████████| 713/713 [07:10<00:00,  1.66it/s, loss=0.0718, acc=0.9740]


Epoch 1 Summary | Train Loss: 0.0926 Acc: 0.9660 | Val Loss: 0.0718 Acc: 0.9740

Epoch 2/5


Train: 100%|██████████| 6412/6412 [56:38<00:00,  1.89it/s, loss=0.0499, acc=0.9824]
Val: 100%|██████████| 713/713 [07:27<00:00,  1.59it/s, loss=0.0397, acc=0.9861]


Epoch 2 Summary | Train Loss: 0.0499 Acc: 0.9824 | Val Loss: 0.0397 Acc: 0.9861

Epoch 3/5


Train: 100%|██████████| 6412/6412 [1:04:30<00:00,  1.66it/s, loss=0.0384, acc=0.9865]
Val: 100%|██████████| 713/713 [07:12<00:00,  1.65it/s, loss=0.0382, acc=0.9862]


Epoch 3 Summary | Train Loss: 0.0384 Acc: 0.9865 | Val Loss: 0.0382 Acc: 0.9862

Epoch 4/5


Train: 100%|██████████| 6412/6412 [1:00:55<00:00,  1.75it/s, loss=0.0314, acc=0.9891]
Val: 100%|██████████| 713/713 [06:48<00:00,  1.74it/s, loss=0.0586, acc=0.9784]


Epoch 4 Summary | Train Loss: 0.0314 Acc: 0.9891 | Val Loss: 0.0586 Acc: 0.9784

Epoch 5/5


Train: 100%|██████████| 6412/6412 [1:01:52<00:00,  1.73it/s, loss=0.0254, acc=0.9910]
Val: 100%|██████████| 713/713 [07:49<00:00,  1.52it/s, loss=0.0324, acc=0.9882]

Epoch 5 Summary | Train Loss: 0.0254 Acc: 0.9910 | Val Loss: 0.0324 Acc: 0.9882

Training completed!





In [16]:
# SAVE THE MODEL
torch.save(model.state_dict(), "multimodal_camelyon_model.pth")