In [21]:
# Install dependencies (run in Colab)
!pip install --upgrade pip
!pip install timm pytesseract transformers sentence-transformers scikit-image python-magic
!apt-get install -y tesseract-ocr || true

Collecting pip
  Downloading pip-25.2-py3-none-any.whl.metadata (4.7 kB)
Downloading pip-25.2-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m34.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.1.2
    Uninstalling pip-24.1.2:
      Successfully uninstalled pip-24.1.2
Successfully installed pip-25.2
Collecting pytesseract
  Downloading pytesseract-0.3.13-py3-none-any.whl.metadata (11 kB)
Collecting python-magic
  Downloading python_magic-0.4.27-py2.py3-none-any.whl.metadata (5.8 kB)
Downloading pytesseract-0.3.13-py3-none-any.whl (14 kB)
Downloading python_magic-0.4.27-py2.py3-none-any.whl (13 kB)
Installing collected packages: python-magic, pytesseract
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2/2[0m [pytesseract]
[1A[2KSuccessfully installed pytesseract-0.3.13 python-magic-0.4.27
Reading package lists... Done
Bu

In [22]:
# Imports
import os
import time
import random
import json
from glob import glob
from pathlib import Path
from tqdm import tqdm


import numpy as np
import matplotlib.pyplot as plt


import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from torchvision.transforms import InterpolationMode
from PIL import Image, ImageFilter


import timm
from sklearn.metrics import roc_curve, auc, classification_report, confusion_matrix
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import StandardScaler


import pytesseract

In [2]:
from google.colab import drive
drive.mount('/content/drive')

drive_dataset_path = '/content/drive/MyDrive/DocuForge/dataset'
local_dataset_path = '/content/dataset'

# Function to copy dataset with progress
def copy_dataset(src, dst):
    if not os.path.exists(dst):
        os.makedirs(dst)

    for root, dirs, files in os.walk(src):
        # Recreate directory structure
        rel_path = os.path.relpath(root, src)
        dest_dir = os.path.join(dst, rel_path)
        os.makedirs(dest_dir, exist_ok=True)

        # Copy files with progress bar
        for file in tqdm(files, desc=f"Copying {rel_path}", unit="file"):
            src_file = os.path.join(root, file)
            dest_file = os.path.join(dest_dir, file)
            if not os.path.exists(dest_file):
                shutil.copy2(src_file, dest_file)

# Run it
copy_dataset(drive_dataset_path, local_dataset_path)

print("✅ Dataset copied successfully!")

Mounted at /content/drive


Copying .: 0file [00:00, ?file/s]
Copying test: 0file [00:00, ?file/s]
Copying test/authentic: 100%|██████████| 300/300 [00:06<00:00, 45.92file/s] 
Copying test/forged: 100%|██████████| 300/300 [00:08<00:00, 34.38file/s] 
Copying train: 0file [00:00, ?file/s]
Copying train/forged: 100%|██████████| 1400/1400 [02:02<00:00, 11.46file/s]
Copying train/authentic: 100%|██████████| 1400/1400 [01:28<00:00, 15.85file/s]
Copying val: 0file [00:00, ?file/s]
Copying val/authentic: 100%|██████████| 300/300 [00:05<00:00, 58.11file/s] 
Copying val/forged: 100%|██████████| 300/300 [00:11<00:00, 25.42file/s]

✅ Dataset copied successfully!





In [23]:
data_path = "/content/dataset"

# Configuration
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device:', DEVICE)

# Paths (adjust for your Colab/Drive setup)
DRIVE_DATA_PATH = '/content/drive/MyDrive/DocuForge/dataset' # if mounted
LOCAL_DATA_PATH = '/content/dataset'

IMG_SIZE = 384
BATCH_SIZE = 32
NUM_WORKERS = 4
SAVE_DIR = 'saved_models_hybrid'
os.makedirs(SAVE_DIR, exist_ok=True)

Device: cuda


In [24]:
# Utilities: edge channel and high-pass (noise residual) channel
from skimage import filters, util


def compute_edge_channel(pil_img):
    # input PIL RGB image -> returns single-channel float32 0..1
    gray = pil_img.convert('L')
    arr = np.array(gray).astype(np.float32) / 255.0
    edges = filters.sobel(arr)
    edges = (edges - edges.min()) / (edges.max() - edges.min() + 1e-8)
    return edges.astype(np.float32)


def compute_noise_residual(pil_img):
    # simple high-pass using Laplacian filter from PIL
    gray = pil_img.convert('L')
    lap = gray.filter(ImageFilter.FIND_EDGES)
    arr = np.array(lap).astype(np.float32) / 255.0
    arr = (arr - arr.mean()) / (arr.std() + 1e-8)
    # normalize to 0..1
    arr = (arr - arr.min()) / (arr.max() - arr.min() + 1e-8)
    return arr.astype(np.float32)

In [25]:
# Dataset that returns 4-channel tensor + OCR text
class DocuDataset(Dataset):
    def __init__(self, root_dir, transform=None, ocr_enabled=True, tsvectorizer=None, max_tfidf_features=512):
        self.root_dir = Path(root_dir)
        self.samples = []
        self.transform = transform
        self.ocr_enabled = ocr_enabled
        # gather samples using ImageFolder-like layout
        classes = sorted([d.name for d in self.root_dir.iterdir() if d.is_dir()])
        class_to_idx = {c:i for i,c in enumerate(classes)}
        for cls in classes:
            for p in (self.root_dir/cls).glob('*'):
                if p.suffix.lower() in ['.jpg', '.jpeg', '.png', '.tif', '.tiff', '.bmp']:
                    self.samples.append((str(p), class_to_idx[cls]))
        self.classes = classes
        self.class_to_idx = class_to_idx
        self.tsvectorizer = tsvectorizer

        # If OCR + vectorizer requested but not supplied, build one lazily from all texts
        if self.ocr_enabled and self.tsvectorizer is None:
            print('Building OCR TF-IDF vectorizer from dataset (this may take time)...')
            texts = []
            for p,_ in tqdm(self.samples):
                try:
                    img = Image.open(p)
                    txt = pytesseract.image_to_string(img)
                    texts.append(txt)
                except Exception:
                    texts.append('')
            self.tsvectorizer = TfidfVectorizer(max_features=max_tfidf_features)
            self.tsvectorizer.fit(texts)

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        path, label = self.samples[idx]
        img = Image.open(path).convert('RGB')

        # compute channels
        edge = compute_edge_channel(img)
        residual = compute_noise_residual(img)

        # base transform for image (resize + crop handled by transform)
        if self.transform is not None:
            img_transformed = self.transform(img)  # returns tensor CxHxW for RGB
        else:
            t = transforms.Compose([transforms.Resize((IMG_SIZE, IMG_SIZE)), transforms.ToTensor()])
            img_transformed = t(img)

        # img_transformed is 3xHxW. convert edge/residual to tensors and concat as 4th channel
        edge_t = torch.from_numpy(edge).unsqueeze(0)
        resid_t = torch.from_numpy(residual).unsqueeze(0)

        # Option A: use edge channel (prefer) + RGB => 4 channels where 4th is edge
        # Here we use only one extra channel (edge). If you prefer noise residual instead, swap.
        # We'll use edge as 4th channel; also keep residual as separate OCR-derived feature.
        rgb = img_transformed
        # make sure edge/resid are same HxW as rgb
        if edge_t.shape[1:] != rgb.shape[1:]:
            edge_t = transforms.functional.resize(edge_t, rgb.shape[1:])
            resid_t = transforms.functional.resize(resid_t, rgb.shape[1:])

        four_ch = torch.cat([rgb, edge_t], dim=0)

        # OCR text -> tfidf vector
        ocr_vec = np.zeros(self.tsvectorizer.max_features if hasattr(self.tsvectorizer, 'max_features') else 512, dtype=np.float32)
        if self.ocr_enabled:
            try:
                txt = pytesseract.image_to_string(img)
                vec = self.tsvectorizer.transform([txt]).toarray()[0].astype(np.float32)
                ocr_vec[:vec.shape[0]] = vec
            except Exception:
                pass
        ocr_vec = torch.from_numpy(ocr_vec)

        return four_ch, ocr_vec, torch.tensor(label, dtype=torch.long), path

In [26]:
# Transforms: training with mixup etc. Note mixup applied in training loop
train_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE+32, IMG_SIZE+32)),
    transforms.RandomResizedCrop(IMG_SIZE, scale=(0.7, 1.0), ratio=(0.9,1.1), interpolation=InterpolationMode.BILINEAR),
    transforms.RandomRotation(10, interpolation=InterpolationMode.BILINEAR),
    transforms.ColorJitter(0.3,0.3,0.2,0.05),
    transforms.RandomHorizontalFlip(0.5),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])

In [None]:
# Build a lightweight simple OCR feature vector (fast) vectorizer first on training OCR texts
print('Building simple OCR feature vector (fast) from train OCR texts...')
train_texts = []
for p in tqdm(glob(os.path.join(train_dir, '*', '*'))):
    try:
        img = Image.open(p)
        txt = pytesseract.image_to_string(img)
        train_texts.append(txt)
    except Exception:
        train_texts.append('')


from sklearn.feature_extraction.text import TfidfVectorizer
TF_MAX_FEATURES = 256
tfv = TfidfVectorizer(max_features=TF_MAX_FEATURES)
if len(train_texts) > 0:
    tfv.fit(train_texts)
else:
    tfv.fit([''])

Building simple OCR feature vector (fast) from train OCR texts...


  3%|▎         | 85/2800 [03:26<1:39:37,  2.20s/it]

In [10]:
train_ds = DocuDataset(train_dir, transform=train_transform, ocr_enabled=True, tsvectorizer=tfv, max_tfidf_features=TF_MAX_FEATURES)
val_ds = DocuDataset(val_dir, transform=val_transform, ocr_enabled=True, tsvectorizer=tfv, max_tfidf_features=TF_MAX_FEATURES)
test_ds = DocuDataset(test_dir, transform=val_transform, ocr_enabled=True, tsvectorizer=tfv, max_tfidf_features=TF_MAX_FEATURES)

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, pin_memory=True)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)
test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)

print('Classes:', train_ds.classes)
print('Sizes -> Train:', len(train_ds), 'Val:', len(val_ds), 'Test:', len(test_ds))

[Phase1] Trainable parameters: 524801/24032833


In [11]:
# Model: ConvNeXt-B or Swin-T backbone with modified input channels
class HybridModel(nn.Module):
    def __init__(self, backbone_name='convnext_base', pretrained=True, ocr_vec_size=TF_MAX_FEATURES, hidden_dim=256, num_classes=1):
        super().__init__()
        self.backbone_name = backbone_name
        # create backbone via timm, adapt for 4-channel input
        model = timm.create_model(backbone_name, pretrained=pretrained, num_classes=0, in_chans=4)
        self.backbone = model
        feat_dim = self.backbone.num_features

        # small projection for OCR vector
        self.ocr_proj = nn.Sequential(
            nn.Linear(ocr_vec_size, 256),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256, 128),
            nn.ReLU()
        )

        # fusion
        self.classifier = nn.Sequential(
            nn.Linear(feat_dim + 128, hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(hidden_dim, num_classes)
        )

    def forward(self, x4, ocr_vec):
        # x4: Bx4xHxW
        feats = self.backbone(x4)  # B x feat_dim
        ocr_emb = self.ocr_proj(ocr_vec)
        fused = torch.cat([feats, ocr_emb], dim=1)
        out = self.classifier(fused)
        return out.squeeze(1)

In [12]:
# Instantiate model (choose 'convnext_base' or 'swin_tiny_patch4_window7_224')
BACKBONE = 'convnext_base'  # options: 'convnext_base', 'swin_tiny_patch4_window7_224'
model = HybridModel(backbone_name=BACKBONE, pretrained=True, ocr_vec_size=TF_MAX_FEATURES)
model = model.to(DEVICE)

# Print trainable params
trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
total = sum(p.numel() for p in model.parameters())
print(f'Trainable params: {trainable}/{total}')

In [13]:
# Loss, optimizer with label smoothing
class SmoothBCEWithLogits(nn.Module):
    def __init__(self, smoothing=0.1):
        super().__init__()
        self.smoothing = smoothing
    def forward(self, logits, targets):
        # targets: 0/1
        targets = targets.float()
        with torch.no_grad():
            targets = targets * (1 - self.smoothing) + 0.5 * self.smoothing
        loss = nn.functional.binary_cross_entropy_with_logits(logits, targets.unsqueeze(1))
        return loss

criterion = SmoothBCEWithLogits(smoothing=0.05)

# Optimizer with differential LRs
optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=2, factor=0.5)

In [14]:
# Mixup implementation
def mixup_data(x, ocr, y, alpha=0.4):
    if alpha <= 0:
        return x, ocr, y, None, 1.0
    lam = np.random.beta(alpha, alpha)
    batch_size = x.size()[0]
    index = torch.randperm(batch_size).to(x.device)
    mixed_x = lam * x + (1 - lam) * x[index, :]
    mixed_ocr = lam * ocr + (1 - lam) * ocr[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, mixed_ocr, y_a, y_b, lam

# Mixup criterion
def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

=== Phase 1: training head-only ===


Train: 100%|██████████| 44/44 [01:17<00:00,  1.76s/it]
Eval: 100%|██████████| 10/10 [00:08<00:00,  1.11it/s]


Head Epoch 1/5 | Train Acc=0.690 | Val Acc=0.777 | Train Loss=0.157 | Val Loss=0.139 | time=86.4s
🏆 Best head model updated (Val Acc=0.777)


Train: 100%|██████████| 44/44 [01:14<00:00,  1.69s/it]
Eval: 100%|██████████| 10/10 [00:09<00:00,  1.05it/s]


Head Epoch 2/5 | Train Acc=0.760 | Val Acc=0.822 | Train Loss=0.136 | Val Loss=0.116 | time=83.8s
🏆 Best head model updated (Val Acc=0.822)


Train: 100%|██████████| 44/44 [01:15<00:00,  1.71s/it]
Eval: 100%|██████████| 10/10 [00:10<00:00,  1.05s/it]


Head Epoch 3/5 | Train Acc=0.775 | Val Acc=0.850 | Train Loss=0.125 | Val Loss=0.106 | time=85.9s
🏆 Best head model updated (Val Acc=0.850)


Train: 100%|██████████| 44/44 [01:12<00:00,  1.66s/it]
Eval: 100%|██████████| 10/10 [00:10<00:00,  1.06s/it]


Head Epoch 4/5 | Train Acc=0.770 | Val Acc=0.853 | Train Loss=0.123 | Val Loss=0.102 | time=83.6s
🏆 Best head model updated (Val Acc=0.853)


Train: 100%|██████████| 44/44 [01:15<00:00,  1.71s/it]
Eval: 100%|██████████| 10/10 [00:09<00:00,  1.04it/s]


Head Epoch 5/5 | Train Acc=0.779 | Val Acc=0.848 | Train Loss=0.119 | Val Loss=0.099 | time=84.9s
=== Phase 1 complete ===
Best val acc so far: 0.853


In [15]:
# Training + validation loops with TTA support (for eval)
from copy import deepcopy

def train_one_epoch(model, loader, optimizer, criterion, device, mixup_alpha=0.4):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for x4, ocr, labels, _ in tqdm(loader, desc='Train'):
        x4 = x4.to(device)
        ocr = ocr.to(device)
        labels = labels.to(device)

        # mixup
        if mixup_alpha > 0:
            mixed_x, mixed_ocr, y_a, y_b, lam = mixup_data(x4, ocr, labels, alpha=mixup_alpha)
            outputs = model(mixed_x, mixed_ocr)
            loss = mixup_criterion(criterion, outputs, y_a, y_b, lam)
            preds = (torch.sigmoid(outputs) > 0.5).long()
            # For accuracy estimate, use hard labels from y_a with lam majority
            # This is approximate; final evaluation uses val set without mixup.
            approx_preds = (torch.sigmoid(model(x4, ocr)) > 0.5).long()
            correct += (approx_preds.cpu() == labels.cpu()).sum().item()
            total += labels.size(0)
        else:
            outputs = model(x4, ocr)
            loss = criterion(outputs, labels)
            preds = (torch.sigmoid(outputs) > 0.5).long()
            correct += (preds.cpu().squeeze() == labels.cpu()).sum().item()
            total += labels.size(0)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * x4.size(0)

    avg_loss = running_loss / total
    acc = correct / total if total>0 else 0
    return avg_loss, acc

# TTA inference: perform several augmentations and average probabilities
def tta_predict(model, x4, ocr_vec, tta_transforms):
    model.eval()
    probs = []
    with torch.no_grad():
        for t in tta_transforms:
            # t expects PIL input; but we have tensors. We'll assume transforms are simple flips
            x_aug = x4
            if t == 'orig':
                x_aug = x4
            elif t == 'hflip':
                x_aug = torch.flip(x4, dims=[3])
            elif t == 'vflip':
                x_aug = torch.flip(x4, dims=[2])
            out = model(x_aug.to(DEVICE), ocr_vec.to(DEVICE))
            probs.append(torch.sigmoid(out).cpu().numpy())
    probs = np.stack(probs, axis=0).mean(axis=0)
    return probs


def evaluate(model, loader, criterion, device, tta=True):
    model.eval()
    all_probs = []
    all_preds = []
    all_labels = []
    for x4, ocr, labels, _ in tqdm(loader, desc='Eval'):
        if tta:
            probs = tta_predict(model, x4, ocr, tta_transforms=['orig','hflip'])
        else:
            with torch.no_grad():
                out = model(x4.to(device), ocr.to(device))
                probs = torch.sigmoid(out).cpu().numpy()
        preds = (probs > 0.5).astype(int)
        all_probs.extend(probs.tolist())
        all_preds.extend(preds.tolist())
        all_labels.extend(labels.numpy().tolist())
    return np.array(all_labels), np.array(all_probs), np.array(all_preds)

=== Phase 2: unfreezing layer2, layer3, layer4 and fine-tuning ===
[Phase2] Trainable parameters: 23807489/24032833


In [16]:
# Training loop
EPOCHS = 8
best_auc = 0.0
for epoch in range(EPOCHS):
    t0 = time.time()
    train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion, DEVICE, mixup_alpha=0.4)
    labels_val, probs_val, preds_val = evaluate(model, val_loader, criterion, DEVICE, tta=True)
    fpr, tpr, thr = roc_curve(labels_val, probs_val)
    val_auc = auc(fpr, tpr)
    print(f'Epoch {epoch+1}/{EPOCHS} | Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f} | Val AUC: {val_auc:.4f} | time: {time.time()-t0:.1f}s')

    # Save best by AUC
    if val_auc > best_auc:
        best_auc = val_auc
        torch.save(model.state_dict(), os.path.join(SAVE_DIR, 'best_hybrid.pth'))
        print('Saved best model')

    # LR scheduler step based on val loss ~ use 1 - AUC as proxy
    scheduler.step(1 - val_auc)

print('Training complete. Best val AUC:', best_auc)

In [17]:
# Threshold optimization using ROC on validation set
best_thresh = 0.5
best_f1 = 0.0
from sklearn.metrics import f1_score
labels_val, probs_val, _ = evaluate(model, val_loader, criterion, DEVICE, tta=True)
for thresh in np.linspace(0.1, 0.9, 81):
    preds = (probs_val > thresh).astype(int)
    f1 = f1_score(labels_val, preds)
    if f1 > best_f1:
        best_f1 = f1
        best_thresh = thresh
print('Best threshold by F1 on val:', best_thresh, 'F1:', best_f1)

Train: 100%|██████████| 44/44 [01:16<00:00,  1.75s/it]
Eval: 100%|██████████| 10/10 [00:09<00:00,  1.01it/s]


FT Epoch 1/10 | Train Acc=0.786 | Val Acc=0.862 | Train Loss=0.119 | Val Loss=0.095 | time=86.8s
🏆 Best model updated (Val Acc=0.862)


Train: 100%|██████████| 44/44 [01:17<00:00,  1.76s/it]
Eval: 100%|██████████| 10/10 [00:09<00:00,  1.02it/s]


FT Epoch 2/10 | Train Acc=0.798 | Val Acc=0.868 | Train Loss=0.112 | Val Loss=0.092 | time=87.4s
🏆 Best model updated (Val Acc=0.868)


Train: 100%|██████████| 44/44 [01:19<00:00,  1.82s/it]
Eval: 100%|██████████| 10/10 [00:09<00:00,  1.08it/s]


FT Epoch 3/10 | Train Acc=0.800 | Val Acc=0.872 | Train Loss=0.113 | Val Loss=0.090 | time=89.2s
🏆 Best model updated (Val Acc=0.872)


Train: 100%|██████████| 44/44 [01:18<00:00,  1.79s/it]
Eval: 100%|██████████| 10/10 [00:08<00:00,  1.13it/s]


FT Epoch 4/10 | Train Acc=0.799 | Val Acc=0.872 | Train Loss=0.110 | Val Loss=0.090 | time=87.5s


Train: 100%|██████████| 44/44 [01:20<00:00,  1.83s/it]
Eval: 100%|██████████| 10/10 [00:09<00:00,  1.02it/s]


FT Epoch 5/10 | Train Acc=0.802 | Val Acc=0.873 | Train Loss=0.110 | Val Loss=0.089 | time=90.5s
🏆 Best model updated (Val Acc=0.873)


Train: 100%|██████████| 44/44 [01:17<00:00,  1.76s/it]
Eval: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


FT Epoch 6/10 | Train Acc=0.797 | Val Acc=0.878 | Train Loss=0.107 | Val Loss=0.087 | time=87.8s
🏆 Best model updated (Val Acc=0.878)


Train: 100%|██████████| 44/44 [01:16<00:00,  1.73s/it]
Eval: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


FT Epoch 7/10 | Train Acc=0.804 | Val Acc=0.878 | Train Loss=0.107 | Val Loss=0.085 | time=86.6s


Train: 100%|██████████| 44/44 [01:17<00:00,  1.77s/it]
Eval: 100%|██████████| 10/10 [00:10<00:00,  1.04s/it]


FT Epoch 8/10 | Train Acc=0.817 | Val Acc=0.882 | Train Loss=0.104 | Val Loss=0.083 | time=88.2s
🏆 Best model updated (Val Acc=0.882)


Train: 100%|██████████| 44/44 [01:17<00:00,  1.77s/it]
Eval: 100%|██████████| 10/10 [00:08<00:00,  1.12it/s]


FT Epoch 9/10 | Train Acc=0.816 | Val Acc=0.880 | Train Loss=0.104 | Val Loss=0.085 | time=86.8s


Train: 100%|██████████| 44/44 [01:21<00:00,  1.86s/it]
Eval: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


FT Epoch 10/10 | Train Acc=0.810 | Val Acc=0.885 | Train Loss=0.103 | Val Loss=0.084 | time=92.1s
🏆 Best model updated (Val Acc=0.885)
✅ Two-phase training complete.
Final best val acc: 0.885


In [18]:
# Final evaluation on test set
model.load_state_dict(torch.load(os.path.join(SAVE_DIR, 'best_hybrid.pth'), map_location=DEVICE))
labels_test, probs_test, preds_test = [], [], []
labels_test, probs_test, _ = evaluate(model, test_loader, criterion, DEVICE, tta=True)
final_preds = (probs_test > best_thresh).astype(int)

print('Test classification report:')
print(classification_report(labels_test, final_preds, target_names=train_ds.classes))
cm = confusion_matrix(labels_test, final_preds)
print('Confusion matrix:\n', cm)

# Save metrics
from sklearn.metrics import roc_auc_score, precision_recall_fscore_support
roc_auc = roc_auc_score(labels_test, probs_test)
precision, recall, f1, _ = precision_recall_fscore_support(labels_test, final_preds, average=None, zero_division=0)
metrics = {
    'roc_auc': float(roc_auc),
    'threshold': float(best_thresh),
    'precision_per_class': precision.tolist(),
    'recall_per_class': recall.tolist(),
    'f1_per_class': f1.tolist()
}
with open(os.path.join(SAVE_DIR, 'final_metrics.json'), 'w') as f:
    json.dump(metrics, f, indent=2)
print('Saved final metrics to', os.path.join(SAVE_DIR, 'final_metrics.json'))