In [1]:
import os
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
import torchvision.models as models

In [2]:
class CXRDataset(Dataset):
    def __init__(self, csv_path, image_dir, transform=None, clinical_cols=None, label_col="label"):
        self.image_dir = image_dir
        self.transform = transform
        self.clinical_cols = clinical_cols
        self.label_col = label_col

        # Load CSV
        df = pd.read_csv(csv_path)

        # Ensure clinical columns are numeric
        for col in clinical_cols:
            df[col] = pd.to_numeric(df[col], errors="coerce")  # convert non-numeric → NaN
        df = df.dropna(subset=clinical_cols + [label_col])     # drop rows missing clinical data or label

        # Only keep images that exist in image_dir
        df["image_path"] = df["image_id"].apply(lambda x: os.path.join(image_dir, x))
        df = df[df["image_path"].apply(os.path.exists)].reset_index(drop=True)

        # Store cleaned dataframe
        self.df = df

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        # ---- Load Image ----
        img = Image.open(row["image_path"]).convert("RGB")
        if self.transform:
            img = self.transform(img)

        # ---- Clinical features ----
        # Convert to numeric first, then fill NaN and cast
        clinical_vals = pd.to_numeric(row[self.clinical_cols], errors="coerce") \
                           .fillna(0).to_numpy(dtype="float32")
        clinical = torch.tensor(clinical_vals, dtype=torch.float)

        # ---- Label ----
        label = torch.tensor(row[self.label_col], dtype=torch.long)

        return img, clinical, label

In [3]:
# -------------------------------
# 2️⃣ Model: CNN + Clinical MLP + Fusion
# -------------------------------
class CXRClinicalFusionModel(nn.Module):
    def __init__(self, num_tabular_features, num_classes):
        super().__init__()
        # CNN backbone
        base = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)  # Loads a pretrained ResNet-18.
        self.cnn_encoder = nn.Sequential(*list(base.children())[:-1])   # Removes the final fully connected layer (fc) so we only keep the convolutional feature extractor.
        # self.cnn_encoder now outputs a feature vector for each image.
        self.cnn_dim = base.fc.in_features   # is equal to number of features produced by ResNet (usually 512).

        # MLP for clinical features
        self.mlp = nn.Sequential(
            nn.Linear(num_tabular_features, 64),
            nn.ReLU(),
            nn.BatchNorm1d(64),
            nn.Linear(64, 32),
            nn.ReLU()
        )

        # Fusion classifier
        self.classifier = nn.Sequential(
            nn.Linear(self.cnn_dim + 32, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, num_classes)
        )

    def forward(self, image, clinical):
        img_feat = self.cnn_encoder(image).view(image.size(0), -1)
        tab_feat = self.mlp(clinical)
        fused = torch.cat([img_feat, tab_feat], dim=1)
        logits = self.classifier(fused)
        return logits


In [4]:
# -------------------------------
# 3️⃣ Image Transforms
# -------------------------------
train_tf = T.Compose([
    T.Resize((224, 224)),
    T.RandomHorizontalFlip(),
    T.RandomRotation(5),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225])
])
val_tf = T.Compose([
    T.Resize((224, 224)),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225])
])


In [5]:
# -------------------------------
# 4️⃣ Dataset and Dataloader
# -------------------------------
clinical_cols = ["age", "gender", 'rotation', 'exposure']  # example columns
label_col = "label"
img_dir = r"C:\Users\sureb\my_data\images\primary_data"
img_dir1= r"C:\Users\sureb\my_data\images\secondary_data"

train_data = CXRDataset(
    csv_path=  r"C:\Users\sureb\my_data\primary_data.csv",
    image_dir=img_dir,
    transform=train_tf,
    clinical_cols=clinical_cols,
    label_col=label_col
)
val_data = CXRDataset(
    csv_path= r"C:\Users\sureb\my_data\secondary_data.csv",
    image_dir=img_dir1,
    transform=val_tf,
    clinical_cols=clinical_cols,
    label_col=label_col
)

train_loader = DataLoader(train_data, batch_size=16, shuffle=True)
val_loader = DataLoader(val_data, batch_size=16, shuffle=False)

In [6]:
# -------------------------------
# 5️⃣ Training Setup
# -------------------------------
device = "cuda" if torch.cuda.is_available() else "cpu"

model = CXRClinicalFusionModel(
    num_tabular_features=len(clinical_cols),
    num_classes=3
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

In [7]:
def train_one_epoch(model, loader, optimizer, criterion):
    model.train()
    total_loss, correct, total = 0, 0, 0
    for images, clinical, labels in loader:
        images, clinical, labels = images.to(device), clinical.to(device), labels.to(device)

        optimizer.zero_grad()             # PyTorch accumulates gradients by default, so we must zero them every batch.
        outputs = model(images, clinical)  # PyTorch internally calls: forward(images, clinical) method. 
        loss = criterion(outputs, labels)
        loss.backward()       # Computes gradients of the loss w.r.t. model parameters.
        optimizer.step()         # Optimizer applies gradients to update the model’s parameters. This is the actual learning step.

        total_loss += loss.item() * images.size(0)
        _, preds = outputs.max(1)
        correct += preds.eq(labels).sum().item()
        total += labels.size(0)
    return total_loss / total, correct / total

In [8]:
def evaluate(model, loader, criterion):
    model.eval()
    total_loss, correct, total = 0, 0, 0
    with torch.no_grad():
        for images, clinical, labels in loader:
            images, clinical, labels = images.to(device), clinical.to(device), labels.to(device)
            outputs = model(images, clinical)
            loss = criterion(outputs, labels)

            total_loss += loss.item() * images.size(0)
            _, preds = outputs.max(1)
            correct += preds.eq(labels).sum().item()
            total += labels.size(0)
    return total_loss / total, correct / total


In [9]:
num_epochs = 30
for epoch in range(num_epochs):
    train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion)
    val_loss, val_acc = evaluate(model, val_loader, criterion)
    print(f"Epoch {epoch+1}/{num_epochs} | "
          f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f} | "
          f"Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")


Epoch 1/30 | Train Loss: 0.9497 | Train Acc: 0.5095 | Val Loss: 1.0992 | Val Acc: 0.4480
Epoch 2/30 | Train Loss: 0.7395 | Train Acc: 0.7095 | Val Loss: 0.9796 | Val Acc: 0.4842
Epoch 3/30 | Train Loss: 0.5242 | Train Acc: 0.8238 | Val Loss: 1.0410 | Val Acc: 0.4796
Epoch 4/30 | Train Loss: 0.4020 | Train Acc: 0.8476 | Val Loss: 1.3450 | Val Acc: 0.4253
Epoch 5/30 | Train Loss: 0.3705 | Train Acc: 0.9048 | Val Loss: 1.1616 | Val Acc: 0.4751
Epoch 6/30 | Train Loss: 0.2635 | Train Acc: 0.9000 | Val Loss: 1.4017 | Val Acc: 0.3665
Epoch 7/30 | Train Loss: 0.2066 | Train Acc: 0.9238 | Val Loss: 1.9573 | Val Acc: 0.2670
Epoch 8/30 | Train Loss: 0.2197 | Train Acc: 0.9143 | Val Loss: 1.6093 | Val Acc: 0.2896
Epoch 9/30 | Train Loss: 0.1455 | Train Acc: 0.9571 | Val Loss: 1.3904 | Val Acc: 0.3575
Epoch 10/30 | Train Loss: 0.1801 | Train Acc: 0.9381 | Val Loss: 1.5583 | Val Acc: 0.4570
Epoch 11/30 | Train Loss: 0.1962 | Train Acc: 0.9286 | Val Loss: 1.4623 | Val Acc: 0.4525
Epoch 12/30 | Train