# Step 5: EfficientNet-B4 Backbone Upgrade
## Ablation C ‚Äî Backbone Capacity Contribution

**Step 4 lesson:** Temporal modeling on weak B0 features makes things worse (0.6135 ‚Üí 0.5524).  
The GRU amplifies noise when spatial features are not rich enough.  
**Conclusion: need strong spatial features FIRST, temporal module SECOND.**

**This notebook:** Same architecture as Step 3 (frame-level, no temporal module),  
but upgrading from EfficientNet-B0 to EfficientNet-B4.

| Model | Params | ImageNet Top-1 | Feature Dim |
|-------|--------|---------------|-------------|
| EfficientNet-B0 | 5.3M | 77.7% | 1280 |
| EfficientNet-B4 | 19.3M | 83.4% | 1792 |

B4 has 3.6x more parameters and 5.7% better ImageNet accuracy.  
Richer features = better face identity representation = clearer temporal signal for Step 6.

**Expected:** Celeb-DF AUC 0.72-0.78 (vs 0.6135 with B0)  
**Paper role:** Ablation C ‚Äî proves backbone capacity is a key contributor

In [None]:
# 1. Check for ECC errors and retired pages
!nvidia-smi -q -d ECC,PAGE_RETIREMENT

# 2. Check current GPU status
!nvidia-smi

In [None]:
import torch

def test_gpu_health():
    if not torch.cuda.is_available():
        print("‚ùå ERROR: GPU not detected by PyTorch.")
        return
    
    try:
        # Move a large tensor to GPU and perform a heavy operation
        device = torch.device("cuda")
        x = torch.randn(2048, 2048, device=device)
        y = torch.matmul(x, x)
        torch.cuda.synchronize() # Wait for kernels to finish
        print(f"‚úÖ SUCCESS: {torch.cuda.get_device_name(0)} is healthy.")
        print(f"VRAM Allocated: {torch.cuda.memory_allocated()/1e9:.2f} GB")
    except Exception as e:
        print(f"‚ùå HARDWARE TEST FAILED: {e}")

test_gpu_health()

## Section 1 ‚Äî Setup

In [None]:
import os, json, random, time, warnings, sys
from pathlib import Path
import numpy as np
import cv2
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.metrics import roc_auc_score, roc_curve
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
warnings.filterwarnings('ignore')

SEED = 42
random.seed(SEED); np.random.seed(SEED)
torch.manual_seed(SEED); torch.cuda.manual_seed_all(SEED)

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device : {DEVICE}")
if torch.cuda.is_available():
    print(f"GPU    : {torch.cuda.get_device_name(0)}")
    print(f"VRAM   : {torch.cuda.get_device_properties(0).total_memory/1e9:.1f} GB")

OUTPUT_DIR = Path('/kaggle/working/step5')
CKPT_DIR   = OUTPUT_DIR / 'checkpoints'
PLOTS_DIR  = OUTPUT_DIR / 'plots'
for d in [OUTPUT_DIR, CKPT_DIR, PLOTS_DIR]:
    d.mkdir(parents=True, exist_ok=True)
print(f"Outputs ‚Üí {OUTPUT_DIR}")


In [None]:
CFG = {
    # Data ‚Äî same as Step 3
    'img_size':        224,
    'n_frames':        4,
    'n_train_real':    600,
    'n_train_fake':    600,
    'n_val_each':      50,

    # Model ‚Äî B4 upgrade
    'backbone':        'efficientnet_b4',
    'dropout':         0.4,

    # Training ‚Äî fixed LR (was 5e-5, too low)
    'epochs':          25,
    'batch_size':      24,
    'lr':              2e-4,    # ‚Üê was 5e-5, head gets 2e-4, backbone gets 2e-5
    'weight_decay':    1e-4,
    'warmup_epochs':   5,       # ‚Üê was 3, longer warmup stabilizes B4 at higher LR
    'label_smoothing': 0.0,
}

TRAIN_METHODS = ['Deepfakes', 'Face2Face', 'FaceSwap', 'NeuralTextures']

print("Config:")
for k, v in CFG.items():
    print(f"  {k:22s}: {v}")
print(f"Train methods: {TRAIN_METHODS}")

## Section 2 ‚Äî Dataset Paths & ID-Based Splits

In [None]:
KAGGLE_INPUT = Path('/kaggle/input')

def locate_ff_root(base):
    known = base / 'datasets' / 'xdxd003' / 'ff-c23' / 'FaceForensics++_C23'
    if known.exists(): return known
    for d in sorted(base.rglob('*')):
        if d.is_dir():
            if sum(1 for m in ['Deepfakes','Face2Face','FaceSwap'] if (d/m).exists()) >= 2:
                return d
    return None

def locate_celeb_root(base):
    known = base / 'datasets' / 'reubensuju' / 'celeb-df-v2'
    if known.exists(): return known
    for d in sorted(base.rglob('*')):
        if d.is_dir() and (d/'Celeb-real').exists(): return d
    return None

FF_ROOT    = locate_ff_root(KAGGLE_INPUT)
CELEB_ROOT = locate_celeb_root(KAGGLE_INPUT)
print(f"FF++    : {FF_ROOT}")
print(f"Celeb-DF: {CELEB_ROOT}")

FF_REAL = sorted(FF_ROOT.rglob('original*/*.mp4')) if FF_ROOT else []
if not FF_REAL and FF_ROOT:
    FF_REAL = sorted(p for p in FF_ROOT.rglob('*.mp4') if 'original' in str(p).lower())

FF_FAKE_BY_METHOD = {}
for method in TRAIN_METHODS:
    paths = sorted((FF_ROOT/method).glob('*.mp4')) if FF_ROOT and (FF_ROOT/method).exists() else []
    FF_FAKE_BY_METHOD[method] = paths
    print(f"  FF++/{method:20s}: {len(paths)} videos")
print(f"  FF++/{'real':20s}: {len(FF_REAL)} videos")

CDF_REAL, CDF_FAKE = [], []
if CELEB_ROOT:
    CDF_REAL = (sorted((CELEB_ROOT/'Celeb-real').glob('*.mp4')) +
                sorted((CELEB_ROOT/'YouTube-real').glob('*.mp4')))
    CDF_FAKE = sorted((CELEB_ROOT/'Celeb-synthesis').glob('*.mp4'))
    print(f"  Celeb-DF real: {len(CDF_REAL)} | fake: {len(CDF_FAKE)}")


In [None]:
# ‚îÄ‚îÄ ID-based split ‚Äî same logic as Step 3 (proven clean) ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
def get_video_id(path):
    return Path(path).stem.split('_')[0]

rng = random.Random(SEED)
all_ids = sorted(set(get_video_id(p) for p in FF_REAL))
rng.shuffle(all_ids)
n_train_ids = int(len(all_ids) * 0.75)
train_ids   = set(all_ids[:n_train_ids])
val_ids     = set(all_ids[n_train_ids:])
print(f"Video IDs ‚Äî train: {len(train_ids)}, val: {len(val_ids)} (zero overlap)")

n_per_method = CFG['n_train_fake'] // len(TRAIN_METHODS)

train_real_pool = [p for p in FF_REAL if get_video_id(p) in train_ids]
train_real      = rng.sample(train_real_pool, min(CFG['n_train_real'], len(train_real_pool)))
TRAIN_DATA      = [(p, 0) for p in train_real]
for method in TRAIN_METHODS:
    pool   = [p for p in FF_FAKE_BY_METHOD[method] if get_video_id(p) in train_ids]
    picked = rng.sample(pool, min(n_per_method, len(pool)))
    TRAIN_DATA += [(p, 1) for p in picked]
rng.shuffle(TRAIN_DATA)

val_real_pool = [p for p in FF_REAL if get_video_id(p) in val_ids]
val_real      = rng.sample(val_real_pool, min(CFG['n_val_each'], len(val_real_pool)))
VAL_DATA      = [(p, 0) for p in val_real]
for method in TRAIN_METHODS:
    pool   = [p for p in FF_FAKE_BY_METHOD[method] if get_video_id(p) in val_ids]
    picked = rng.sample(pool, min(CFG['n_val_each']//len(TRAIN_METHODS), len(pool)))
    VAL_DATA += [(p, 1) for p in picked]
rng.shuffle(VAL_DATA)

n_cdf    = min(200, len(CDF_REAL), len(CDF_FAKE))
CDF_TEST = ([(p,0) for p in rng.sample(CDF_REAL, n_cdf)] +
            [(p,1) for p in rng.sample(CDF_FAKE,  n_cdf)])

print(f"Train: {sum(1 for _,l in TRAIN_DATA if l==0)} real + "
      f"{sum(1 for _,l in TRAIN_DATA if l==1)} fake = {len(TRAIN_DATA)}")
print(f"Val  : {sum(1 for _,l in VAL_DATA   if l==0)} real + "
      f"{sum(1 for _,l in VAL_DATA   if l==1)} fake = {len(VAL_DATA)}")
print(f"CDF  : {n_cdf} real + {n_cdf} fake = {len(CDF_TEST)}")


## Section 3 ‚Äî Dataset (same as Step 3, frame-level)

In [None]:
IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD  = [0.229, 0.224, 0.225]

train_tf = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ColorJitter(brightness=0.25, contrast=0.25, saturation=0.15, hue=0.05),
    transforms.RandomGrayscale(p=0.05),
    transforms.ToTensor(),
    transforms.RandomErasing(p=0.1, scale=(0.02, 0.1)),  # ‚Üê AFTER ToTensor (needs tensor)
    transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD),
])
val_tf = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor(),
    transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD),
])

# ‚îÄ‚îÄ Frame cache ‚Äî extract once, save to disk, reload instantly ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
CACHE_DIR = Path('/kaggle/working/frame_cache')
CACHE_DIR.mkdir(exist_ok=True)

def get_cache_path(video_path, n_frames, img_size):
    key = f"{Path(video_path).stem}_{n_frames}_{img_size}.npz"
    return CACHE_DIR / key

def load_frames_cached(video_path, n_frames, img_size):
    """Load from disk cache if exists, otherwise extract from video and cache."""
    cache_path = get_cache_path(video_path, n_frames, img_size)
    
    # Cache hit ‚Äî instant load
    if cache_path.exists():
        data = np.load(cache_path)
        return [data[f'f{i}'] for i in range(len(data.files))]
    
    # Cache miss ‚Äî extract from video
    cap = cv2.VideoCapture(str(video_path))
    if not cap.isOpened(): return None
    total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    if total < 1:
        cap.release(); return None
    positions = np.linspace(0, total-1, n_frames, dtype=int)
    frames = []
    for pos in positions:
        cap.set(cv2.CAP_PROP_POS_FRAMES, int(pos))
        ret, frame = cap.read()
        if not ret: continue
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        h, w = frame.shape[:2]
        frame = frame[int(h*0.05):int(h*0.95), int(w*0.10):int(w*0.90)]
        frame = cv2.resize(frame, (img_size, img_size))
        frames.append(frame)
    cap.release()
    if not frames: return None
    while len(frames) < n_frames: frames.append(frames[-1])
    frames = frames[:n_frames]
    
    # Save to cache
    np.savez_compressed(cache_path, **{f'f{i}': f for i, f in enumerate(frames)})
    return frames


class DeepfakeDataset(Dataset):
    def __init__(self, video_label_pairs, transform, n_frames, img_size):
        self.transform = transform
        self.items = []
        failed = 0
        for path, label in tqdm(video_label_pairs, ncols=80, desc='Loading'):
            frames = load_frames_cached(str(path), n_frames, img_size)
            if frames is None:
                failed += 1; continue
            for f in frames:
                self.items.append((f, label))
        print(f"  {len(self.items)} frames ready ({failed} failed)")

    def __len__(self): return len(self.items)

    def __getitem__(self, idx):
        frame, label = self.items[idx]
        return self.transform(frame), torch.tensor(label, dtype=torch.long)


print("Loading frames (first run: ~15 min, subsequent runs: ~30 sec)...")
t0 = time.time()
train_ds = DeepfakeDataset(TRAIN_DATA, train_tf, CFG['n_frames'], CFG['img_size'])
val_ds   = DeepfakeDataset(VAL_DATA,   val_tf,   CFG['n_frames'], CFG['img_size'])
cdf_ds   = DeepfakeDataset(CDF_TEST,   val_tf,   CFG['n_frames'], CFG['img_size'])
print(f"Done in {time.time()-t0:.1f}s")

train_loader = DataLoader(train_ds, batch_size=CFG['batch_size'],
                          shuffle=True,  num_workers=0, pin_memory=False)
val_loader   = DataLoader(val_ds,   batch_size=CFG['batch_size'],
                          shuffle=False, num_workers=0, pin_memory=False)
cdf_loader   = DataLoader(cdf_ds,   batch_size=CFG['batch_size'],
                          shuffle=False, num_workers=0, pin_memory=False)

cache_count = len(list(CACHE_DIR.glob('*.npz')))
print(f"Cache: {cache_count} files in {CACHE_DIR}")
print(f"Train frames: {len(train_ds)} | Val: {len(val_ds)} | CDF: {len(cdf_ds)}")
x, y = next(iter(train_loader))
print(f"Batch: x={x.shape}, labels={y.unique().tolist()}")

## Section 4 ‚Äî Model: EfficientNet-B4

EfficientNet-B4 pretrained on ImageNet-1K.  
Feature dim: 1792 (vs 1280 for B0).  
19.3M params (vs 5.3M for B0).  

Two-layer head with stronger regularization for the larger backbone.

In [None]:
class DeepfakeDetectorB4(nn.Module):
    def __init__(self, dropout=CFG['dropout']):
        super().__init__()

        # EfficientNet-B4 pretrained
        effnet = models.efficientnet_b4(
            weights=models.EfficientNet_B4_Weights.IMAGENET1K_V1)
        in_features = effnet.classifier[1].in_features  # 1792

        # Replace classifier
        effnet.classifier = nn.Sequential(
            nn.Dropout(p=dropout),
            nn.Linear(in_features, 512),
            nn.GELU(),
            nn.Dropout(p=dropout * 0.5),
            nn.Linear(512, 2),
        )
        self.model = effnet

        # Separate backbone and head for differential LR
        self.backbone_params = list(effnet.features.parameters())
        self.head_params     = list(effnet.classifier.parameters())

        print(f"Backbone (B4 features): {sum(p.numel() for p in self.backbone_params)/1e6:.2f}M params")
        print(f"Head:                   {sum(p.numel() for p in self.head_params)/1e6:.2f}M params")

    def forward(self, x):
        return self.model(x)

    def get_param_groups(self, base_lr):
        return [
            {'params': self.backbone_params, 'lr': base_lr / 10},  # very low for pretrained
            {'params': self.head_params,     'lr': base_lr},
        ]


model = DeepfakeDetectorB4().to(DEVICE)
total = sum(p.numel() for p in model.parameters())
print(f"Total: {total/1e6:.2f}M params on {DEVICE}")

with torch.no_grad():
    out = model(torch.randn(2, 3, 224, 224).to(DEVICE))
    print(f"Forward: (2,3,224,224) ‚Üí {out.shape} ‚úì")

# VRAM check
if torch.cuda.is_available():
    allocated = torch.cuda.memory_allocated() / 1e9
    print(f"VRAM after model init: {allocated:.2f} GB")


## Section 5 ‚Äî Training

In [None]:
criterion = nn.CrossEntropyLoss(label_smoothing=CFG['label_smoothing'])
optimizer = torch.optim.AdamW(
    model.get_param_groups(CFG['lr']),
    weight_decay=CFG['weight_decay'])

def lr_lambda(epoch):
    if epoch < CFG['warmup_epochs']:
        return (epoch+1) / CFG['warmup_epochs']
    progress = (epoch - CFG['warmup_epochs']) / max(1, CFG['epochs'] - CFG['warmup_epochs'])
    return 0.5 * (1 + np.cos(np.pi * progress))

scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda)


def train_epoch(model, loader):
    model.train()
    total_loss, correct, total = 0.0, 0, 0
    for x, y in loader:
        x, y = x.to(DEVICE), y.to(DEVICE)
        optimizer.zero_grad()
        logits = model(x)
        loss   = criterion(logits, y)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
        optimizer.step()
        total_loss += loss.item()
        correct    += (logits.detach().argmax(1) == y).sum().item()
        total      += y.size(0)
    return total_loss / len(loader), correct / total


def evaluate(model, loader):
    model.eval()
    all_labels, all_probs = [], []
    total_loss, n = 0.0, 0
    with torch.no_grad():
        for x, y in loader:
            x, y   = x.to(DEVICE), y.to(DEVICE)
            logits = model(x)
            total_loss += criterion(logits, y).item()
            probs  = F.softmax(logits, dim=1)[:, 1]
            all_probs.extend(probs.cpu().numpy())
            all_labels.extend(y.cpu().numpy())
            n += 1
    labels = np.array(all_labels)
    probs  = np.array(all_probs)
    auc    = roc_auc_score(labels, probs) if len(np.unique(labels)) > 1 else 0.5
    acc    = ((probs > 0.5).astype(int) == labels).mean()
    return {'auc': auc, 'acc': acc, 'loss': total_loss/max(n,1),
            'labels': labels, 'probs': probs}

print(f"‚úÖ Ready ‚Äî {len(train_loader)} steps/epoch, LR={CFG['lr']:.1e}")


In [None]:
import gc, math

# ‚îÄ‚îÄ DataParallel for dual T4 ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ‚îÄ
if torch.cuda.device_count() > 1:
    print(f"Using {torch.cuda.device_count()} GPUs with DataParallel")
    model = nn.DataParallel(model)

def train_epoch(model, loader):
    model.train()
    total_loss, correct, total, skipped = 0.0, 0, 0, 0
    pbar = tqdm(loader, ncols=80, desc='  train', leave=False)
    for x, y in pbar:
        try:
            x, y = x.to(DEVICE), y.to(DEVICE)
            optimizer.zero_grad()
            logits = model(x)
            loss   = criterion(logits, y)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            total_loss += loss.item()
            correct    += (logits.detach().argmax(1) == y).sum().item()
            total      += y.size(0)
            pbar.set_postfix(loss=f'{loss.item():.4f}',
                             acc=f'{correct/total:.3f}')
        except Exception:
            skipped += 1
            gc.collect()
            continue
    if skipped:
        print(f"  ‚ö†Ô∏è  {skipped} batches skipped (GPU error)")
    if total == 0:
        return float('nan'), 0.0
    return total_loss / max(len(loader) - skipped, 1), correct / total


history = {'train_loss':[], 'train_acc':[], 'val_loss':[], 'val_auc':[], 'lr':[]}
best_val_auc, best_epoch = 0.0, 0
start_time = time.time()

print("="*68)
print(f"{'Ep':>3} {'TrLoss':>8} {'TrAcc':>7} {'VaLoss':>8} "
      f"{'VaAUC':>7} {'VaAcc':>7} {'LR':>9} {'t':>5}")
print("="*68)

for epoch in range(CFG['epochs']):
    t0 = time.time()
    print(f"Epoch {epoch+1}/{CFG['epochs']}")
    tr_loss, tr_acc = train_epoch(model, train_loader)
    val_m           = evaluate(model, val_loader)
    scheduler.step()
    lr = optimizer.param_groups[1]['lr']

    history['train_loss'].append(tr_loss)
    history['train_acc'].append(tr_acc)
    history['val_loss'].append(val_m['loss'])
    history['val_auc'].append(val_m['auc'])
    history['lr'].append(lr)

    flag = ' ‚úì' if val_m['auc'] > best_val_auc else ''
    print(f"{epoch+1:>3} {tr_loss:>8.4f} {tr_acc:>7.3f} {val_m['loss']:>8.4f} "
          f"{val_m['auc']:>7.4f} {val_m['acc']:>7.3f} {lr:>9.2e} "
          f"{time.time()-t0:>4.0f}s{flag}")
    sys.stdout.flush()

    if val_m['auc'] > best_val_auc:
        best_val_auc = val_m['auc']
        best_epoch   = epoch + 1
        # DataParallel wraps the model ‚Äî save the inner module
        state = model.module.state_dict() if hasattr(model, 'module') else model.state_dict()
        torch.save({'epoch': epoch, 'model_state': state,
                    'val_auc': best_val_auc, 'cfg': CFG},
                   CKPT_DIR / 'best.pth')

    if math.isnan(tr_loss):
        print("‚ùå NaN loss ‚Äî GPU fully dead, stopping.")
        break

total_time = time.time() - start_time
print("="*68)
print(f"Best val AUC : {best_val_auc:.4f} at epoch {best_epoch}")
print(f"Total time   : {total_time/60:.1f} min")

## Section 6 ‚Äî Evaluation & Full Ablation Table

In [None]:
ckpt = torch.load(CKPT_DIR / 'best.pth', map_location=DEVICE, weights_only=False)

# Handle DataParallel vs plain model mismatch
state_dict = ckpt['model_state']
if hasattr(model, 'module'):
    # model is DataParallel ‚Äî checkpoint was saved as plain ‚Üí add module. prefix
    from collections import OrderedDict
    new_state = OrderedDict()
    for k, v in state_dict.items():
        new_state['module.' + k] = v
    model.load_state_dict(new_state)
else:
    model.load_state_dict(state_dict)

print(f"Loaded best model ‚Äî epoch {ckpt['epoch']+1}, val AUC={ckpt['val_auc']:.4f}")

ff_m  = evaluate(model, val_loader)
cdf_m = evaluate(model, cdf_loader)

ABLATION = {
    'Step 3 ‚Äî B0, frame-level':   {'ff': 0.6850, 'cdf': 0.6135},
    'Step 4 ‚Äî B0 + temporal GRU': {'ff': 0.5954, 'cdf': 0.5524},
    'Step 5 ‚Äî B4, frame-level':   {'ff': ff_m['auc'], 'cdf': cdf_m['auc']},
}

print("\n" + "="*65)
print("ABLATION TABLE ‚Äî ALL STEPS")
print("="*65)
print(f"{'Model':<35} {'FF++ Val':>10} {'Celeb-DF':>10} {'Delta vs S3':>12}")
print("-"*65)
for name, vals in ABLATION.items():
    delta = vals['cdf'] - 0.6135
    print(f"{name:<35} {vals['ff']:>10.4f} {vals['cdf']:>10.4f} {delta:>+12.4f}")
print("="*65)

b4_gain = cdf_m['auc'] - 0.6135
if b4_gain >= 0.08:
    verdict = "üü¢ STRONG B4 ‚Äî Ready to add temporal module in Step 6"
elif b4_gain >= 0.04:
    verdict = "üü° GOOD B4 ‚Äî Solid improvement. Step 6 temporal should push further"
else:
    verdict = "üü° MODEST B4 ‚Äî Consider more epochs or larger training set"
print(f"\n{verdict}")
print(f"\nB4 backbone contribution: {b4_gain:+.4f} vs B0 frame-level baseline")

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(18, 5))
fig.suptitle('Step 5: EfficientNet-B4 Backbone ‚Äî Training Curves & Ablation',
             fontsize=14, fontweight='bold')

x = range(1, len(history['train_loss'])+1)

# Loss
axes[0].plot(x, history['train_loss'], color='#3498db', linewidth=2, label='Train')
axes[0].plot(x, history['val_loss'],   color='#e74c3c', linewidth=2, label='Val')
axes[0].axhline(0.693, color='gray', linestyle=':', alpha=0.5, label='Random')
axes[0].set_title('Loss'); axes[0].set_xlabel('Epoch')
axes[0].legend(); axes[0].grid(True, alpha=0.3)

# AUC with all reference lines
axes[1].plot(x, history['val_auc'], color='#2ecc71', linewidth=2.5, label='B4 Val AUC')
axes[1].axhline(best_val_auc, color='#2ecc71', linestyle='--', alpha=0.5,
                label=f'B4 best={best_val_auc:.4f}')
axes[1].axhline(cdf_m['auc'], color='#e74c3c', linestyle='--', alpha=0.8,
                label=f'B4 CDF={cdf_m["auc"]:.4f}')
axes[1].axhline(0.6135, color='gray', linestyle=':', alpha=0.6,
                label='B0 CDF=0.6135 (Step3)')
axes[1].set_title('Val AUC'); axes[1].set_xlabel('Epoch')
axes[1].set_ylim(0.40, 1.0); axes[1].legend(fontsize=8); axes[1].grid(True, alpha=0.3)

# Ablation bar chart
# Ablation bar chart
names = ['B0\nframe\n(Step3)', 'B0+GRU\n(Step4)', 'B4\nframe\n(Step5)']
cdf_scores = [0.6135, 0.5524, cdf_m['auc']]
colors = ['#95a5a6', '#e74c3c', '#2ecc71']
bars = axes[2].bar(names, cdf_scores, color=colors, alpha=0.8, edgecolor='black', linewidth=0.8)
axes[2].axhline(0.6135, color='gray', linestyle='--', alpha=0.5)
for bar, val in zip(bars, cdf_scores):
    axes[2].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.005,
                 f'{val:.4f}', ha='center', va='bottom', fontsize=11, fontweight='bold')
axes[2].set_title('Ablation: Celeb-DF AUC')
axes[2].set_ylabel('AUC'); axes[2].set_ylim(0.40, 0.90)
axes[2].grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.savefig(PLOTS_DIR / 'step5_results.png', dpi=150, bbox_inches='tight')
plt.show()
print("‚úÖ step5_results.png")


## Section 7 ‚Äî Save Results

In [None]:
results = {
    'model':           'EfficientNet-B4 ImageNet pretrained, frame-level',
    'backbone':        'efficientnet_b4',
    'n_frames':        CFG['n_frames'],
    'train_methods':   TRAIN_METHODS,
    'best_epoch':      best_epoch,
    'ff_val':          {'auc': round(ff_m['auc'],  4), 'acc': round(ff_m['acc'],  4)},
    'celeb_df':        {'auc': round(cdf_m['auc'], 4), 'acc': round(cdf_m['acc'], 4)},
    'b0_baseline_cdf': 0.6135,
    'b4_improvement':  round(cdf_m['auc'] - 0.6135, 4),
    'training_minutes': round(total_time/60, 1),
    'ablation': {
        'step3_b0_frame':   {'cdf': 0.6135, 'ff': 0.6850},
        'step4_b0_gru':     {'cdf': 0.5524, 'ff': 0.5954},
        'step5_b4_frame':   {'cdf': round(cdf_m['auc'],4), 'ff': round(ff_m['auc'],4)},
    }
}

with open(OUTPUT_DIR / 'step5_results.json', 'w') as f:
    json.dump(results, f, indent=2)

print("="*60)
print("STEP 5 COMPLETE")
print("="*60)
print(f"  B0 frame-level (Step 3): CDF AUC = 0.6135")
print(f"  B0 + GRU       (Step 4): CDF AUC = 0.5524")
print(f"  B4 frame-level (Step 5): CDF AUC = {cdf_m['auc']:.4f}")
print(f"  B4 backbone gain        : {results['b4_improvement']:+.4f}")
print()
print("Next: Step 6 ‚Äî Add temporal Mamba module on top of B4")
print("      Expected: +5-10% additional improvement")
print(f"\n‚úÖ Results ‚Üí {OUTPUT_DIR / 'step5_results.json'}")
