In [1]:
!pip install torch torchvision SimpleITK pydicom

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [2]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision.models.video import r3d_18
import SimpleITK as sitk
from sklearn.model_selection import train_test_split
from tqdm import tqdm

# Dataset root
DATA_ROOT = "/kaggle/input/medical-deepfakes-lung-cancer"
LABELS_CSV = os.path.join(DATA_ROOT, "labels_exp1.csv")
SCANS_DIR = os.path.join(DATA_ROOT, "CT_Scans", "EXP1_blind")

# Cube size
CUBE_SHAPE = (32, 32, 32)

# Training configuration
BATCH_SIZE = 4
EPOCHS = 10
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", DEVICE)


Device: cuda


In [3]:
df = pd.read_csv(LABELS_CSV)

def create_label(t):
    return 1 if "F" in t else 0   # Fake = 1, Real = 0

df["label"] = df["type"].apply(create_label)

print("Total samples:", len(df))


Total samples: 133


In [4]:
class CTDataset(Dataset):
    def __init__(self, df, scans_dir, cube_shape=(32,32,32)):
        self.df = df.reset_index(drop=True)
        self.scans_dir = scans_dir
        self.cube_shape = cube_shape
        self.cache = {}

    def load_scan(self, uuid):
        if uuid in self.cache:
            return self.cache[uuid]

        folder = os.path.join(self.scans_dir, uuid)
        reader = sitk.ImageSeriesReader()
        dicom_files = reader.GetGDCMSeriesFileNames(folder)
        reader.SetFileNames(dicom_files)
        img = sitk.GetArrayFromImage(reader.Execute())  # shape: (Z, Y, X)

        self.cache[uuid] = img
        return img

    def extract_cube(self, scan, cz, cy, cx):
        dz, dy, dx = self.cube_shape
        z1, z2 = cz - dz//2, cz + dz//2
        y1, y2 = cy - dy//2, cy + dy//2
        x1, x2 = cx - dx//2, cx + dx//2

        # Pad if out of range
        pad = ((dz, dz), (dy, dy), (dx, dx))
        padded = np.pad(scan, pad, mode='constant', constant_values=-1000)

        # Shift to padded coordinates
        z1 += dz; z2 += dz
        y1 += dy; y2 += dy
        x1 += dx; x2 += dx

        cube = padded[z1:z2, y1:y2, x1:x2]

        # Normalize HU to [0,1]
        cube = (cube - (-1000)) / (400 - (-1000))
        cube = np.clip(cube, 0, 1)

        return cube.astype(np.float32)

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        uuid = str(row["uuid"])
    
        scan = self.load_scan(uuid)
    
        cube = self.extract_cube(
            scan,
            int(row["slice"]),
            int(row["y"]),
            int(row["x"])
        )
    
        cube = torch.tensor(cube).unsqueeze(0)  # (1,32,32,32)
    
        # FIX → expand channel dimension from 1 → 3
        cube = cube.repeat(3, 1, 1, 1)  # (3,32,32,32)
    
        label = torch.tensor(row["label"]).float()
        return cube, label


In [5]:
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

train_dataset = CTDataset(train_df, SCANS_DIR)
val_dataset = CTDataset(val_df, SCANS_DIR)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

print("Train size:", len(train_dataset))
print("Val size:", len(val_dataset))


Train size: 106
Val size: 27


In [6]:
model = r3d_18(pretrained=True)
model.fc = nn.Linear(512, 1)   # Binary output

model = model.to(DEVICE)

criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)


Downloading: "https://download.pytorch.org/models/r3d_18-b3b3357e.pth" to /root/.cache/torch/hub/checkpoints/r3d_18-b3b3357e.pth
100%|██████████| 127M/127M [00:00<00:00, 224MB/s] 


In [7]:
for epoch in range(EPOCHS):
    model.train()
    train_loss = []
    train_acc = []

    for cubes, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}"):
        cubes = cubes.to(DEVICE)
        labels = labels.to(DEVICE).unsqueeze(1)

        logits = model(cubes)
        loss = criterion(logits, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        preds = (torch.sigmoid(logits) > 0.5).float()
        acc = (preds == labels).float().mean().item()

        train_loss.append(loss.item())
        train_acc.append(acc)

    print(f"Train Loss: {np.mean(train_loss):.4f} | Train Acc: {np.mean(train_acc):.4f}")

    # Validation
    model.eval()
    val_acc = []
    val_loss = []

    with torch.no_grad():
        for cubes, labels in val_loader:
            cubes = cubes.to(DEVICE)
            labels = labels.to(DEVICE).unsqueeze(1)

            logits = model(cubes)
            loss = criterion(logits, labels)

            preds = (torch.sigmoid(logits) > 0.5).float()
            acc = (preds == labels).float().mean().item()

            val_loss.append(loss.item())
            val_acc.append(acc)

    print(f"Val Loss: {np.mean(val_loss):.4f} | Val Acc: {np.mean(val_acc):.4f}")


Epoch 1/10: 100%|██████████| 27/27 [05:57<00:00, 13.23s/it]


Train Loss: 0.5225 | Train Acc: 0.7130
Val Loss: 0.3020 | Val Acc: 0.8452


Epoch 2/10: 100%|██████████| 27/27 [00:09<00:00,  2.92it/s]


Train Loss: 0.3018 | Train Acc: 0.8241
Val Loss: 0.2967 | Val Acc: 0.8214


Epoch 3/10: 100%|██████████| 27/27 [00:09<00:00,  2.95it/s]


Train Loss: 0.1908 | Train Acc: 0.9444
Val Loss: 0.3694 | Val Acc: 0.7857


Epoch 4/10: 100%|██████████| 27/27 [00:09<00:00,  2.94it/s]


Train Loss: 0.3177 | Train Acc: 0.8704
Val Loss: 0.4306 | Val Acc: 0.7738


Epoch 5/10: 100%|██████████| 27/27 [00:09<00:00,  2.96it/s]


Train Loss: 0.2167 | Train Acc: 0.9259
Val Loss: 0.3048 | Val Acc: 0.8214


Epoch 6/10: 100%|██████████| 27/27 [00:09<00:00,  2.96it/s]


Train Loss: 0.1777 | Train Acc: 0.9167
Val Loss: 0.2589 | Val Acc: 0.8571


Epoch 7/10: 100%|██████████| 27/27 [00:09<00:00,  2.95it/s]


Train Loss: 0.2533 | Train Acc: 0.9074
Val Loss: 0.3373 | Val Acc: 0.9286


Epoch 8/10: 100%|██████████| 27/27 [00:09<00:00,  2.94it/s]


Train Loss: 0.3282 | Train Acc: 0.8704
Val Loss: 0.4357 | Val Acc: 0.8929


Epoch 9/10: 100%|██████████| 27/27 [00:09<00:00,  2.96it/s]


Train Loss: 0.2141 | Train Acc: 0.9444
Val Loss: 0.2155 | Val Acc: 0.8929


Epoch 10/10: 100%|██████████| 27/27 [00:09<00:00,  2.96it/s]


Train Loss: 0.1301 | Train Acc: 0.9444
Val Loss: 0.1898 | Val Acc: 0.8929


In [9]:
# Save the trained model
MODEL_PATH = "rsmodel.pth"
torch.save(model.state_dict(), MODEL_PATH)
print("Model saved to:", MODEL_PATH)

Model saved to: rsmodel.pth
