In [None]:
!python -V
!pip -V
!python -c "import sys, pkgutil; print('numpy', pkgutil.find_loader('numpy') is not None); print('torch', pkgutil.find_loader('torch') is not None)"
!pip install --upgrade --no-deps timm pylibjpeg pylibjpeg-libjpeg pylibjpeg-openjpeg
!pip install --upgrade --no-deps pylibjpeg==2.1.0 pylibjpeg-libjpeg==2.3.0 pylibjpeg-openjpeg==2.5.0 || true
!pip check || true

Python 3.11.13
pip 24.1.2 from /usr/local/lib/python3.11/dist-packages/pip (python 3.11)
numpy True
torch True
Collecting timm
  Downloading timm-1.0.20-py3-none-any.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.7/61.7 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pylibjpeg
  Downloading pylibjpeg-2.1.0-py3-none-any.whl.metadata (7.9 kB)
Collecting pylibjpeg-libjpeg
  Downloading pylibjpeg_libjpeg-2.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.8 kB)
Collecting pylibjpeg-openjpeg
  Downloading pylibjpeg_openjpeg-2.5.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (5.8 kB)
Downloading timm-1.0.20-py3-none-any.whl (2.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m80.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pylibjpeg-2.1.0-py3-none-any.whl (25 kB)
Downloading pylibjpeg_libjpeg-2.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x

In [None]:
import os, sys
os.kill(os.getpid(), 9)

In [27]:
import os
import numpy as np
import pandas as pd
import pydicom
from PIL import Image
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torchvision import transforms
from torchvision.models import vit_b_16, ViT_B_16_Weights
from torch.cuda.amp import GradScaler, autocast
from tqdm import tqdm

In [29]:
DATA_PATH = "/kaggle/input/rsna-2022-cervical-spine-fracture-detection"
TRAIN_IMG_DIR = os.path.join(DATA_PATH, "train_images")
train_df = pd.read_csv(os.path.join(DATA_PATH, "train.csv"))
study_ids = train_df["StudyInstanceUID"].unique()
np.random.seed(33)
np.random.shuffle(study_ids)
split_idx = int(len(study_ids) * 0.8)
train_studies = study_ids[:split_idx]
val_studies = study_ids[split_idx:]
train_df_split = train_df[train_df["StudyInstanceUID"].isin(train_studies)]
val_df_split = train_df[train_df["StudyInstanceUID"].isin(val_studies)]

weights = ViT_B_16_Weights.IMAGENET1K_V1
mean = weights.meta.get("mean", [0.485, 0.456, 0.406])
std = weights.meta.get("std", [0.229, 0.224, 0.225])

## Modelo ViT

In [30]:
def find_optimal_thresholds(model, loader, device):
    model.eval()
    all_probs = []
    all_labels = []
    
    with torch.no_grad():
        for imgs, labels in loader:
            imgs = imgs.to(device)
            out = model(imgs)
            probs = torch.sigmoid(out)
            all_probs.append(probs.cpu())
            all_labels.append(labels.cpu())
    
    all_probs = torch.cat(all_probs).numpy()
    all_labels = torch.cat(all_labels).numpy()
    
    optimal_thresholds = []
    for i in range(all_labels.shape[1]):
        best_f1 = 0
        best_thresh = 0.5
        for thresh in np.arange(0.3, 0.7, 0.05):
            preds = (all_probs[:, i] > thresh).astype(int)
            f1 = f1_score(all_labels[:, i], preds, zero_division=0)
            if f1 > best_f1:
                best_f1 = f1
                best_thresh = thresh
        optimal_thresholds.append(best_thresh)
    
    return optimal_thresholds

class EarlyStopping:
    def __init__(self, patience=5, min_delta=0.001, mode='max'):
        self.patience = patience
        self.min_delta = min_delta
        self.mode = mode
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        
    def __call__(self, score):
        if self.best_score is None:
            self.best_score = score
        elif self.mode == 'max':
            if score < self.best_score + self.min_delta:
                self.counter += 1
                if self.counter >= self.patience:
                    self.early_stop = True
            else:
                self.best_score = score
                self.counter = 0
        return self.early_stop

class CervicalSliceDataset(Dataset):
    def __init__(self, df, root, transform=None, num_slices=5):
        self.df = df
        self.root = root
        self.transform = transform
        self.num_slices = num_slices
        self.study_ids = df["StudyInstanceUID"].unique().tolist()
    def __len__(self):
        return len(self.study_ids)
    def __getitem__(self, idx):
        study = self.study_ids[idx]
        folder = os.path.join(self.root, study)
        files = sorted([f for f in os.listdir(folder) if f.endswith(".dcm")])
        if len(files) == 0:
            raise RuntimeError(folder)
        mid = len(files) // 2
        idxs = []
        if self.num_slices >= 3:
            idxs = [max(0, mid-1), mid, min(len(files)-1, mid+1)]
        else:
            idxs = [mid, mid, mid]
        slices = []
        for i in idxs:
            path = os.path.join(folder, files[i])
            ds = pydicom.dcmread(path)
            slope = float(getattr(ds, "RescaleSlope", 1.0))
            intercept = float(getattr(ds, "RescaleIntercept", 0.0))
            arr = ds.pixel_array.astype(np.float32) * slope + intercept
            arr = arr - arr.min()
            if arr.max() > 0:
                arr = arr / arr.max()
            arr = (arr * 255).astype(np.uint8)
            img = Image.fromarray(arr).convert("L")
            if self.transform:
                img = self.transform(img.convert("RGB"))
            slices.append(img)
        img_tensor = torch.stack(slices)
        img_tensor = torch.mean(img_tensor, dim=0)
        row = self.df[self.df["StudyInstanceUID"]==study].iloc[0]
        labels = torch.zeros(8, dtype=torch.float32)
        labels[0] = row["patient_overall"]
        for i in range(1,8):
            labels[i] = row[f"C{i}"]
        return img_tensor, labels

train_transforms = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.RandomRotation(12),
    transforms.RandomHorizontalFlip(0.2),
    transforms.ColorJitter(brightness=0.15, contrast=0.15),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

val_transforms = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])


train_ds = CervicalSliceDataset(train_df_split, TRAIN_IMG_DIR, transform=train_transforms, num_slices=5)
val_ds = CervicalSliceDataset(val_df_split, TRAIN_IMG_DIR, transform=val_transforms, num_slices=5)

train_loader = DataLoader(train_ds, batch_size=128, shuffle=True, num_workers=0, pin_memory=False)
val_loader = DataLoader(val_ds, batch_size=128, shuffle=False, num_workers=0, pin_memory=False)


In [31]:
pos_counts = train_df_split.iloc[:, 1:9].sum().values
pos_counts = np.clip(pos_counts, 1, None)
neg_counts = len(train_df_split) - pos_counts
pos_weight = torch.tensor((neg_counts / pos_counts).astype(np.float32))
pos_weight = torch.clamp(pos_weight, max=100.0)

labels_for_sampler = train_df_split.iloc[:, 1:9].sum(axis=1).values
sample_weights = 1.0 / (labels_for_sampler + 1.0)
sample_weights = sample_weights / sample_weights.sum()
sampler = None
try:
    class_counts = train_df_split.iloc[:, 1:9].sum(axis=0).values
    sample_weights_per_study = []
    for s in train_ds.study_ids:
        row = train_df_split[train_df_split["StudyInstanceUID"]==s].iloc[0]
        c = row.iloc[1:9].sum()
        sample_weights_per_study.append(1.0/(c+1.0))
    sampler = WeightedRandomSampler(sample_weights_per_study, num_samples=len(sample_weights_per_study), replacement=True)
except Exception:
    sampler = None

batch_size = 64
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=(sampler is None), sampler=sampler, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = vit_b_16(weights=weights)
in_features = model.heads.head.in_features if hasattr(model, "heads") else model.head.in_features
try:
    model.heads.head = torch.nn.Linear(in_features, 8)
except Exception:
    model.head = torch.nn.Linear(in_features, 8)
model = model.to(device)

for param in model.parameters():
    param.requires_grad = True

def freeze_backbone(m):
    for n,p in m.named_parameters():
        if "head" not in n and "heads" not in n:
            p.requires_grad = False

freeze_backbone(model)

In [32]:
criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weight.to(device))
optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=2e-4, weight_decay=0.01)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=10, eta_min=1e-6)
scaler = GradScaler() if torch.cuda.is_available() else None
accum_steps = 1

def train_epoch(model, loader, optimizer, criterion, device, scaler=None, accum_steps=1):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    optimizer.zero_grad()
    for step, (imgs, labels) in enumerate(tqdm(loader, desc="Training")):
        imgs = imgs.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)
        if scaler is not None:
            with autocast():
                out = model(imgs)
                loss = criterion(out, labels) / accum_steps
            scaler.scale(loss).backward()
            if (step + 1) % accum_steps == 0:
                scaler.step(optimizer)
                scaler.update()
                optimizer.zero_grad()
        else:
            out = model(imgs)
            loss = criterion(out, labels) / accum_steps
            loss.backward()
            if (step + 1) % accum_steps == 0:
                optimizer.step()
                optimizer.zero_grad()
        running_loss += loss.item() * imgs.size(0) * accum_steps
        preds = (torch.sigmoid(out) > 0.5).float()
        correct += (preds == labels).sum().item()
        total += labels.numel()
    return running_loss / len(loader.dataset), correct / total

def validate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for imgs, labels in tqdm(loader, desc="Validation"):
            imgs = imgs.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)
            with autocast(enabled=(scaler is not None)):
                out = model(imgs)
                loss = criterion(out, labels)
            running_loss += loss.item() * imgs.size(0)
            preds = (torch.sigmoid(out) > 0.5).float()
            correct += (preds == labels).sum().item()
            total += labels.numel()
    return running_loss / len(loader.dataset), correct / total

results = []
best_val_acc = 0.0
unfreeze_epoch = 3
for epoch in range(1, 16):
    if epoch == unfreeze_epoch:
        for p in model.parameters():
            p.requires_grad = True
        optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5, weight_decay=0.01)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=12, eta_min=1e-6)
    train_loss, train_acc = train_epoch(model, train_loader, optimizer, criterion, device, scaler, accum_steps)
    val_loss, val_acc = validate(model, val_loader, criterion, device)
    scheduler.step()
    results.append({'epoch': epoch, 'train_loss': train_loss, 'train_accuracy': train_acc, 'val_loss': val_loss, 'val_accuracy': val_acc, 'lr': optimizer.param_groups[0]['lr']})
    print(f"Epoch {epoch}: train_loss={train_loss:.4f}, train_acc={train_acc:.4f}, val_loss={val_loss:.4f}, val_acc={val_acc:.4f}, lr={optimizer.param_groups[0]['lr']:.2e}")
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), 'best_vit_model_pre.pth')

pd.DataFrame(results).to_csv('training_results_vit.csv', index=False)
print(best_val_acc)

  scaler = GradScaler() if torch.cuda.is_available() else None
  with autocast():
Training: 100%|██████████| 26/26 [00:40<00:00,  1.55s/it]
  with autocast(enabled=(scaler is not None)):
Validation: 100%|██████████| 7/7 [00:12<00:00,  1.83s/it]


Epoch 1: train_loss=0.7961, train_acc=0.8429, val_loss=1.4476, val_acc=0.8308, lr=1.95e-04


Training: 100%|██████████| 26/26 [00:34<00:00,  1.31s/it]
Validation: 100%|██████████| 7/7 [00:07<00:00,  1.08s/it]


Epoch 2: train_loss=0.7747, train_acc=0.9318, val_loss=1.4919, val_acc=0.8363, lr=1.81e-04


Training: 100%|██████████| 26/26 [00:35<00:00,  1.36s/it]
Validation: 100%|██████████| 7/7 [00:07<00:00,  1.06s/it]


Epoch 3: train_loss=0.7290, train_acc=0.9338, val_loss=1.6160, val_acc=0.8441, lr=9.85e-06


Training: 100%|██████████| 26/26 [00:35<00:00,  1.37s/it]
Validation: 100%|██████████| 7/7 [00:07<00:00,  1.08s/it]


Epoch 4: train_loss=0.7596, train_acc=0.9352, val_loss=1.5337, val_acc=0.8416, lr=9.40e-06


Training: 100%|██████████| 26/26 [00:36<00:00,  1.40s/it]
Validation: 100%|██████████| 7/7 [00:07<00:00,  1.07s/it]


Epoch 5: train_loss=0.7523, train_acc=0.9318, val_loss=1.5165, val_acc=0.8431, lr=8.68e-06


Training: 100%|██████████| 26/26 [00:35<00:00,  1.37s/it]
Validation: 100%|██████████| 7/7 [00:07<00:00,  1.06s/it]


Epoch 6: train_loss=0.7005, train_acc=0.9347, val_loss=1.6251, val_acc=0.8431, lr=7.75e-06


Training: 100%|██████████| 26/26 [00:35<00:00,  1.35s/it]
Validation: 100%|██████████| 7/7 [00:07<00:00,  1.06s/it]


Epoch 7: train_loss=0.6939, train_acc=0.9358, val_loss=1.5928, val_acc=0.8444, lr=6.66e-06


Training: 100%|██████████| 26/26 [00:35<00:00,  1.36s/it]
Validation: 100%|██████████| 7/7 [00:07<00:00,  1.05s/it]


Epoch 8: train_loss=0.6867, train_acc=0.9359, val_loss=1.5408, val_acc=0.8441, lr=5.50e-06


Training: 100%|██████████| 26/26 [00:34<00:00,  1.32s/it]
Validation: 100%|██████████| 7/7 [00:07<00:00,  1.03s/it]


Epoch 9: train_loss=0.6777, train_acc=0.9302, val_loss=1.5703, val_acc=0.8428, lr=4.34e-06


Training: 100%|██████████| 26/26 [00:36<00:00,  1.39s/it]
Validation: 100%|██████████| 7/7 [00:07<00:00,  1.05s/it]


Epoch 10: train_loss=0.6447, train_acc=0.9386, val_loss=1.6017, val_acc=0.8431, lr=3.25e-06


Training: 100%|██████████| 26/26 [00:35<00:00,  1.35s/it]
Validation: 100%|██████████| 7/7 [00:07<00:00,  1.06s/it]


Epoch 11: train_loss=0.6113, train_acc=0.9381, val_loss=1.6483, val_acc=0.8434, lr=2.32e-06


Training: 100%|██████████| 26/26 [00:35<00:00,  1.37s/it]
Validation: 100%|██████████| 7/7 [00:07<00:00,  1.05s/it]


Epoch 12: train_loss=0.6418, train_acc=0.9322, val_loss=1.6165, val_acc=0.8428, lr=1.60e-06


Training: 100%|██████████| 26/26 [00:36<00:00,  1.42s/it]
Validation: 100%|██████████| 7/7 [00:07<00:00,  1.07s/it]


Epoch 13: train_loss=0.6049, train_acc=0.9368, val_loss=1.6518, val_acc=0.8431, lr=1.15e-06


Training: 100%|██████████| 26/26 [00:35<00:00,  1.38s/it]
Validation: 100%|██████████| 7/7 [00:07<00:00,  1.06s/it]


Epoch 14: train_loss=0.6326, train_acc=0.9330, val_loss=1.6314, val_acc=0.8422, lr=1.00e-06


Training: 100%|██████████| 26/26 [00:36<00:00,  1.41s/it]
Validation: 100%|██████████| 7/7 [00:07<00:00,  1.03s/it]

Epoch 15: train_loss=0.6804, train_acc=0.9278, val_loss=1.5871, val_acc=0.8425, lr=1.15e-06
0.8443688118811881





In [33]:
import pandas as pd

df = pd.read_csv('/kaggle/working/training_results_vit.csv')
df.head(15)


Unnamed: 0,epoch,train_loss,train_accuracy,val_loss,val_accuracy,lr
0,1,0.7961,0.842879,1.447615,0.830755,0.000195
1,2,0.774688,0.931811,1.491851,0.836324,0.000181
2,3,0.729016,0.933824,1.615965,0.844059,1e-05
3,4,0.759563,0.935217,1.533693,0.841584,9e-06
4,5,0.752276,0.931811,1.516464,0.843131,9e-06
5,6,0.700549,0.934675,1.625093,0.843131,8e-06
6,7,0.693887,0.935836,1.592806,0.844369,7e-06
7,8,0.686654,0.935913,1.540804,0.844059,6e-06
8,9,0.677727,0.930186,1.570296,0.842822,4e-06
9,10,0.64472,0.938622,1.601733,0.843131,3e-06
