In [1]:
# %pip install torch torchvision torchaudio
# %pip install pandas scikit-learn
# %pip install pillow tqdm

In [2]:
#!/usr/bin/env python
import pathlib
import pandas as pd
from sklearn.model_selection import train_test_split

# ---- 1. Config -------------------------------------------------------------
ROOT = pathlib.Path("images")
OUT  = ROOT / "manifest10k.csv"
SEED = 42

# ---- 2. Gather all image paths & labels -----------------------------------
rows = []
for folder, label in [
    ("contents",     0),   # real originals
    ("styles",       0),   # real style-only
    ("stylizations", 1),   # stylized (fake)
]:
    for p in (ROOT / folder).glob("*.jpg"):
        rows.append((str(p), label))

df = pd.DataFrame(rows, columns=["path","label"])
print(f"Found {len(df)} images: {df.label.value_counts().to_dict()}")

# ---- 3. Down-sample to at most 5k per class ------------------------------
max_per_label = 5000
balanced_parts = []
for lbl in (0, 1):
    part = df[df.label == lbl]
    n    = min(len(part), max_per_label)
    balanced_parts.append(part.sample(n=n, random_state=SEED))

balanced = pd.concat(balanced_parts).sample(frac=1, random_state=SEED).reset_index(drop=True)
print(f"After balancing: {balanced.label.value_counts().to_dict()}")

# ---- 4. Stratified train/test split (80/20) -------------------------------
train_df, test_df = train_test_split(
    balanced,
    test_size=0.2,
    random_state=SEED,
    stratify=balanced["label"]
)

train_df["split"] = "train"
test_df ["split"] = "test"

result = pd.concat([train_df, test_df]).reset_index(drop=True)
print(f"Train/test split: {result.split.value_counts().to_dict()}")
print(f"Final class counts: {result.label.value_counts().to_dict()}")

# ---- 5. Save manifest.csv -------------------------------------------------
result.to_csv(OUT, index=False)
print(f"Wrote {len(result)} rows to {OUT}")

Found 10000 images: {0: 5000, 1: 5000}
After balancing: {1: 5000, 0: 5000}
Train/test split: {'train': 8000, 'test': 2000}
Final class counts: {0: 5000, 1: 5000}
Wrote 10000 rows to images\manifest10k.csv


In [3]:
#!/usr/bin/env python
import pandas as pd
from sklearn.model_selection import train_test_split

# ---- 1. Config -------------------------------------------------------------
MANIFEST = "images\\manifest10k.csv"  # or use forward‐slashes: "images/manifest10k.csv"
SEED     = 42

# ---- 2. Load ----------------------------------------------------------------
df = pd.read_csv(MANIFEST)

# ---- 3. Stratified train/test split (80/20) ---------------------------------
train_df, test_df = train_test_split(
    df,
    test_size=0.2,
    random_state=SEED,
    stratify=df["label"]
)

train_df["split"] = "train"
test_df ["split"] = "test"

# ---- 4. Save ---------------------------------------------------------------
out = pd.concat([train_df, test_df]).reset_index(drop=True)
out.to_csv(MANIFEST, index=False)

# ---- 5. Summary ------------------------------------------------------------
print(out["split"].value_counts())


split
train    8000
test     2000
Name: count, dtype: int64


In [4]:
#!/usr/bin/env python
import torch, torchvision
from torch import nn
from torch.utils.data import DataLoader, WeightedRandomSampler
from tqdm import tqdm

from dataset import StyleDetectDataset, train_tf, val_tf

CSV = "images\\manifest10k.csv"

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# ---- 4.1  Datasets & loaders ---------------------------------------------
train_ds = StyleDetectDataset(str(CSV), split="train", transforms=train_tf)
val_ds   = StyleDetectDataset(str(CSV), split="test",  transforms=val_tf)

# Imbalance handling: weight inversely to class freq
labels = [y for _, y in train_ds]
class_count = torch.bincount(torch.tensor(labels))
class_weight = 1.0 / class_count.float()
weights = class_weight[labels]
sampler = WeightedRandomSampler(weights, num_samples=len(train_ds), replacement=True)

train_loader = DataLoader(train_ds, batch_size=64, sampler=sampler,
                          num_workers=4, pin_memory=True)
val_loader   = DataLoader(val_ds, batch_size=64, shuffle=False,
                          num_workers=4, pin_memory=True)

# ---- 4.2  Model -----------------------------------------------------------
model = torchvision.models.resnet50(weights="IMAGENET1K_V2")  # pre-trained
model.fc = nn.Linear(model.fc.in_features, 2)                 # 2-class head
model.to(DEVICE)

# ---- 4.3  Optimiser, loss, sched -----------------------------------------
criterion = nn.CrossEntropyLoss(weight=class_weight.to(DEVICE))
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=20)

# ---- 4.4  Train loop ------------------------------------------------------
EPOCHS = 5
for epoch in range(1, EPOCHS+1):
    model.train(); running_loss = 0.0
    for x, y in tqdm(train_loader, desc=f"Train {epoch}"):
        x, y = x.to(DEVICE), y.to(DEVICE)
        optimizer.zero_grad()
        loss = criterion(model(x), y)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * x.size(0)
    scheduler.step()

    # ---- validation ----
    model.eval(); correct = total = 0
    with torch.no_grad():
        for x, y in val_loader:
            x, y = x.to(DEVICE), y.to(DEVICE)
            preds = model(x).argmax(1)
            correct += (preds == y).sum().item()
            total   += y.size(0)
    acc = correct/total*100
    print(f"Epoch {epoch:02d} | train_loss={running_loss/len(train_ds):.4f} "
          f"| val_acc={acc:.2f}%")


Train 1: 100%|██████████| 125/125 [10:18<00:00,  4.95s/it]


Epoch 01 | train_loss=0.4723 | val_acc=80.40%


Train 2: 100%|██████████| 125/125 [10:22<00:00,  4.98s/it]


Epoch 02 | train_loss=0.3217 | val_acc=80.85%


Train 3: 100%|██████████| 125/125 [10:10<00:00,  4.88s/it]


Epoch 03 | train_loss=0.2396 | val_acc=76.95%


Train 4: 100%|██████████| 125/125 [10:02<00:00,  4.82s/it]


Epoch 04 | train_loss=0.1899 | val_acc=80.30%


Train 5: 100%|██████████| 125/125 [10:19<00:00,  4.95s/it]


Epoch 05 | train_loss=0.1398 | val_acc=78.75%


In [5]:
# after the final epoch finishes
torch.save(model, "resnet_style.pt")   # saves the entire model object
print("✔ model saved → resnet_style.pt")

✔ model saved → resnet_style.pt


In [6]:
# import torch, torchvision.transforms as T
# import torch.serialization
# import torchvision.models.resnet
# from PIL import Image, ImageTk
# import tkinter as tk
# from tkinter import Label


# # ---- 2.1  Pick a device ---------------------------------------------------
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# print("Using device:", device)

# # ---- 2.2  Load the saved model on that device -----------------------------
# with torch.serialization.safe_globals([torchvision.models.resnet.ResNet]):
#     model = torch.load("resnet_style.pt", map_location=device, weights_only=False)
# model.eval()                       # set to inference mode
# model.to(device)                   # make sure weights live on the right GPU/CPU

# # ---- 2.3  Re-use the same val transform ----------------------------------
# tf = T.Compose([
#     T.Resize(256), T.CenterCrop(224),
#     T.ToTensor(),
#     T.Normalize([0.485, 0.456, 0.406],
#                 [0.229, 0.224, 0.225]),
# ])

# # ---- 2.4  Load two images *with safe paths* -------------------------------
# imgA = Image.open(r"images\\contents\\content_1.jpg")
# imgB = Image.open(r"images\\stylizations\\content_1___style_16___700.jpg")

# # ---- 2.5  Apply transformations (only for model input) -------------------
# imgA_tensor = tf(imgA.convert("RGB")).unsqueeze(0).to(device)  # (1, 3, 224, 224)
# imgB_tensor = tf(imgB.convert("RGB")).unsqueeze(0).to(device)  # (1, 3, 224, 224)
# batch = torch.cat([imgA_tensor, imgB_tensor], dim=0).to(device)

# # ---- 2.6  Predict ---------------------------------------------------------
# with torch.no_grad():
#     preds = model(batch).argmax(1).cpu().tolist()  # 0 = real, 1 = stylized

# print("Predictions:", preds)  # e.g. [1, 0]

# # ---- 2.7  Create the UI to display images side by side -------------------

# # Set up the Tkinter window
# root = tk.Tk()
# root.title("Image Comparison")

# # Resize images before converting to Tkinter-compatible format (only for display)
# imgA_resized = imgA.resize((224, 224))  # Resize image to fit in window
# imgB_resized = imgB.resize((224, 224))  # Resize image to fit in window

# # Convert the resized images to Tkinter-compatible format
# imgA_tk = ImageTk.PhotoImage(imgA_resized)
# imgB_tk = ImageTk.PhotoImage(imgB_resized)

# # Store image references globally to prevent garbage collection
# global imgA_tk_ref, imgB_tk_ref
# imgA_tk_ref = imgA_tk  # Store the reference to image A
# imgB_tk_ref = imgB_tk  # Store the reference to image B

# # Create labels to display the images
# labelA = Label(root, image=imgA_tk)
# labelA.grid(row=0, column=0, padx=10, pady=10)

# labelB = Label(root, image=imgB_tk)
# labelB.grid(row=0, column=1, padx=10, pady=10)

# # Add labels for predictions
# pred_labelA = Label(root, text=f"Prediction: {preds[0]}")
# pred_labelA.grid(row=1, column=0)

# pred_labelB = Label(root, text=f"Prediction: {preds[1]}")
# pred_labelB.grid(row=1, column=1)

# # Run the Tkinter main loop
# root.mainloop()


In [7]:
#!/usr/bin/env python

"""
Comprehensive evaluation for ResNet model
Add this cell after your ResNet training is complete
"""

import torch
from torch.utils.data import DataLoader, WeightedRandomSampler
from dataset import StyleDetectDataset, train_tf, val_tf
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, precision_score, recall_score, f1_score
import numpy as np


CSV = "images\\manifest10k.csv"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

val_ds   = StyleDetectDataset(CSV, split="test",  transforms=val_tf)

val_loader   = DataLoader(val_ds, batch_size=32, shuffle=False,
                          num_workers=4, pin_memory=True)

print("="*60)
print("RESNET COMPREHENSIVE EVALUATION")
print("="*60)

# Collect all predictions and true labels
all_preds, all_true, all_probs = [], [], []

model.eval()
with torch.no_grad():
    for x, y in val_loader:
        x, y = x.to(DEVICE), y.to(DEVICE)
        logits = model(x)
        probs = torch.softmax(logits, dim=1)[:, 1]  # Probability of class 1 (stylized)
        preds = logits.argmax(1)
        
        all_preds.extend(preds.cpu().numpy())
        all_true.extend(y.cpu().numpy())
        all_probs.extend(probs.cpu().numpy())

all_preds = np.array(all_preds)
all_true = np.array(all_true)
all_probs = np.array(all_probs)

# Calculate and display metrics
print("\n1. CONFUSION MATRIX:")
print("   Predicted")
print("Actual  0    1")
cm = confusion_matrix(all_true, all_preds)
print("  0    {}  {}".format(cm[0, 0], cm[0, 1]))
print("  1    {}  {}".format(cm[1, 0], cm[1, 1]))

print("\n2. CLASSIFICATION REPORT:")
print(classification_report(all_true, all_preds, digits=3, target_names=['Real', 'Stylized']))

print(f"\n3. ROC-AUC SCORE: {roc_auc_score(all_true, all_probs):.3f}")

# Calculate additional metrics
precision = precision_score(all_true, all_preds)
recall = recall_score(all_true, all_preds)
f1 = f1_score(all_true, all_preds)
accuracy = (all_preds == all_true).mean()

print(f"\n4. ADDITIONAL METRICS:")
print(f"   Precision: {precision:.3f}")
print(f"   Recall:    {recall:.3f}")
print(f"   F1-Score:  {f1:.3f}")
print(f"   Accuracy:  {accuracy:.3f}")

# Class distribution
print(f"\n5. CLASS DISTRIBUTION:")
print(f"   Real images:     {np.sum(all_true == 0)}")
print(f"   Stylized images: {np.sum(all_true == 1)}")

print("\n" + "="*60)
print("Evaluation complete!")
print("="*60)

RESNET COMPREHENSIVE EVALUATION

1. CONFUSION MATRIX:
   Predicted
Actual  0    1
  0    804  196
  1    229  771

2. CLASSIFICATION REPORT:
              precision    recall  f1-score   support

        Real      0.778     0.804     0.791      1000
    Stylized      0.797     0.771     0.784      1000

    accuracy                          0.787      2000
   macro avg      0.788     0.788     0.787      2000
weighted avg      0.788     0.787     0.787      2000


3. ROC-AUC SCORE: 0.876

4. ADDITIONAL METRICS:
   Precision: 0.797
   Recall:    0.771
   F1-Score:  0.784
   Accuracy:  0.787

5. CLASS DISTRIBUTION:
   Real images:     1000
   Stylized images: 1000

Evaluation complete!


In [13]:
#!/usr/bin/env python
# --- YOUR IMPORTS -----------------------------------------------------------
import torch, torchvision
import torch_directml                              # (kept)
from torch import nn
from torch.utils.data import DataLoader, WeightedRandomSampler
from tqdm import tqdm
from dataset import StyleDetectDataset, train_tf, val_tf

from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import numpy as np
import contextlib

# --- CONFIG (kept / small tweaks) ------------------------------------------
CSV = "images\\manifest10k.csv"
# NEW: robust device pick (CUDA -> DirectML -> CPU)
if torch.cuda.is_available():
    DEVICE = torch.device("cuda")
else:
    import torch_directml
    DEVICE = torch_directml.device(0)  # DirectML GPU 0, avoids index bug

print("Using device:", DEVICE)

# ---- Data loaders (unchanged) ----------------------------------------------
train_ds = StyleDetectDataset(CSV, split="train", transforms=train_tf)
val_ds   = StyleDetectDataset(CSV, split="test",  transforms=val_tf)

labels = [y for _, y in train_ds]
class_count = torch.bincount(torch.tensor(labels))
class_weight = 1.0 / class_count.float()
weights = class_weight[labels]
sampler = WeightedRandomSampler(weights, num_samples=len(train_ds), replacement=True)

train_loader = DataLoader(train_ds, batch_size=32, sampler=sampler,
                          num_workers=4, pin_memory=True)
val_loader   = DataLoader(val_ds, batch_size=32, shuffle=False,
                          num_workers=4, pin_memory=True)

# ---- ViT backbone (mostly unchanged) ---------------------------------------
model = torchvision.models.vit_b_16(weights="IMAGENET1K_V1")
model.heads.head = nn.Linear(model.heads.head.in_features, 2)
model.to(DEVICE)

# ---- Loss (NEW: label smoothing) -------------------------------------------
criterion = nn.CrossEntropyLoss(label_smoothing=0.05)

# ---- AMP helpers (NEW: CUDA AMP; best-effort DML; fallback no-AMP) ---------
def get_amp_context_and_scaler(device):
    if isinstance(device, torch.device) and device.type == "cuda":
        scaler = torch.cuda.amp.GradScaler(enabled=True)
        autocast_ctx = lambda: torch.cuda.amp.autocast(dtype=torch.float16)
        return autocast_ctx, scaler

    # DirectML path — disable AMP entirely for stability
    print("DirectML detected — training in fp32 (no AMP).")
    return contextlib.nullcontext, None

autocast, scaler = get_amp_context_and_scaler(DEVICE)

# ---- Param groups & stage helpers (NEW) ------------------------------------
def make_param_groups(model, lr_head, lr_backbone, wd=0.05):
    groups = [
        {"params": list(model.heads.parameters()), "lr": lr_head, "weight_decay": wd},
        {"params": [], "lr": lr_backbone, "weight_decay": wd},
    ]
    for n, p in model.named_parameters():
        if p.requires_grad and not n.startswith("heads"):
            groups[1]["params"].append(p)
    return groups

def freeze_all(model):
    for p in model.parameters():
        p.requires_grad = False

def unfreeze_head(model):
    for p in model.heads.parameters():
        p.requires_grad = True

def unfreeze_last_blocks(model, k=4):
    for p in model.encoder.layers[-k:].parameters():
        p.requires_grad = True

def unfreeze_all(model):
    for p in model.parameters():
        p.requires_grad = True

# ---- Scheduler builder (NEW: warmup -> cosine, step-per-batch) -------------
def build_scheduler(optimizer, steps_total, warmup_ratio=0.05):
    warmup_steps = max(1, int(steps_total * warmup_ratio))
    cosine_steps = max(1, steps_total - warmup_steps)
    warmup = torch.optim.lr_scheduler.LinearLR(
        optimizer, start_factor=0.01, total_iters=warmup_steps
    )
    cosine = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, T_max=cosine_steps
    )
    scheduler = torch.optim.lr_scheduler.SequentialLR(
        optimizer, schedulers=[warmup, cosine], milestones=[warmup_steps]
    )
    return scheduler

# ---- Train/eval loops (NEW: AMP + per-step scheduler) ----------------------
def train_one_epoch(model, loader, optimizer, scheduler, device, scaler, desc):
    model.train()
    running = 0.0
    pbar = tqdm(loader, desc=desc, leave=False)
    for x, y in pbar:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad(set_to_none=True)
        with autocast():  # will be nullcontext on DirectML
            logits = model(x)
            loss = criterion(logits, y)

        if scaler is not None:  # CUDA path
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:  # DirectML path
            loss.backward()
            optimizer.step()

        running += loss.item() * x.size(0)
        scheduler.step()
    return running / len(loader.dataset)


@torch.no_grad()
def evaluate(model, loader, device):
    model.eval()
    correct = total = 0
    all_probs, all_true = [], []
    for x, y in loader:
        x, y = x.to(device), y.to(device)
        logits = model(x)
        probs = torch.softmax(logits, dim=1)[:, 1]
        preds = logits.argmax(1)
        correct += (preds == y).sum().item()
        total   += y.size(0)
        all_probs.extend(probs.cpu().numpy())
        all_true.extend(y.cpu().numpy())
    acc = correct / total if total > 0 else 0.0
    return acc, np.array(all_true), np.array(all_probs)

# ---- STAGED FINE-TUNING (NEW) ----------------------------------------------
# stage_cfgs = [
#     {"name": "Stage A (head only)",
#      "setup": lambda m: (freeze_all(m), unfreeze_head(m)),
#      "lr_head": 1e-3, "lr_backbone": 0.0, "epochs": 3},

#     {"name": "Stage B (last 4 blocks + head)",
#      "setup": lambda m: (freeze_all(m), unfreeze_head(m), unfreeze_last_blocks(m, k=4)),
#      "lr_head": 1e-3, "lr_backbone": 1e-4, "epochs": 5},

#     {"name": "Stage C (full model)",
#      "setup": lambda m: unfreeze_all(m),
#      "lr_head": 3e-5, "lr_backbone": 3e-5, "epochs": 8},
# ]

# ---- FAST RUN CONFIG (shorter epochs) ----
stage_cfgs = [
    {"name": "Stage A (head only)",
     "setup": lambda m: (freeze_all(m), unfreeze_head(m)),
     "lr_head": 1e-3, "lr_backbone": 0.0, "epochs": 1},  # was 3

    {"name": "Stage B (last 4 blocks + head)",
     "setup": lambda m: (freeze_all(m), unfreeze_head(m), unfreeze_last_blocks(m, k=4)),
     "lr_head": 1e-3, "lr_backbone": 1e-4, "epochs": 2},  # was 5

    {"name": "Stage C (full model)",
     "setup": lambda m: unfreeze_all(m),
     "lr_head": 3e-5, "lr_backbone": 3e-5, "epochs": 3},  # was 8
]



best_val = 0.0
for stage in stage_cfgs:
    print("\n==>", stage["name"])
    stage["setup"](model)

    # build optimizer with discriminative LRs
    # (backbone group will be empty in Stage A because requires_grad=False)
    optimizer = torch.optim.AdamW(
        make_param_groups(model, lr_head=stage["lr_head"], lr_backbone=stage["lr_backbone"], wd=0.05)
    )

    # per-step scheduler
    steps_per_epoch = max(1, len(train_loader))
    total_steps = steps_per_epoch * stage["epochs"]
    scheduler = build_scheduler(optimizer, total_steps, warmup_ratio=0.05)

    # run epochs
    for epoch in range(1, stage["epochs"] + 1):
        train_loss = train_one_epoch(
            model, train_loader, optimizer, scheduler, DEVICE, scaler,
            desc=f"{stage['name']} | Train {epoch}/{stage['epochs']}"
        )
        val_acc, y_true, y_prob = evaluate(model, val_loader, DEVICE)
        print(f"{stage['name']} | Epoch {epoch:02d} | loss={train_loss:.4f} | val_acc={val_acc*100:.2f}%")
        if val_acc > best_val:
            best_val = val_acc
            torch.save(model.state_dict(), "vit_model_best.pt")
            print("  ↳ Saved best to vit_model_best.pt")

# ---- Final evaluation & reports (minor tweaks) -----------------------------
print("\nEvaluating best checkpoint...")
model.load_state_dict(torch.load("vit_model_best.pt", map_location=DEVICE))
model.to(DEVICE)
val_acc, all_true, all_probs = evaluate(model, val_loader, DEVICE)
print(f"Best Val Acc: {val_acc*100:.2f}%")

# threshold at 0.5 (you can tune threshold via ROC later)
pred_bin = (all_probs > 0.5).astype(np.int64)
print(confusion_matrix(all_true, pred_bin))
print(classification_report(all_true, pred_bin, digits=3))
print("AUC =", roc_auc_score(all_true, all_probs))


Using device: privateuseone:0
DirectML detected — training in fp32 (no AMP).

==> Stage A (head only)


                                                                        

RuntimeError: 0 <= device.index() && device.index() < static_cast<c10::DeviceIndex>(device_ready_queues_.size()) INTERNAL ASSERT FAILED at "C:\\actions-runner\\_work\\pytorch\\pytorch\\builder\\windows\\pytorch\\torch\\csrc\\autograd\\engine.cpp":1451, please report a bug to PyTorch. 

In [11]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Evaluate a saved ViT (or any binary model) and pick the best threshold
according to F1-score on the *test* split.
-----------------------------------------------------------------------
Usage:
    python eval_with_threshold.py vit_epoch02.pt
"""

import sys, torch, numpy as np
import torchvision.transforms as T
import torch.serialization
import torchvision.models.resnet
from sklearn.metrics import (
    precision_recall_curve,
    classification_report,
    confusion_matrix,
    roc_auc_score,
)
from dataset import StyleDetectDataset, val_tf   # same transform as training

# ---------------------------------------------------------------------
# 1) CONFIG
MODEL_PATH = r"vit_model.pt"            # e.g. "vit_epoch02.pt"
CSV        = r"images\\manifest10k.csv"
DEVICE     = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BATCH_SIZE = 64                         # bigger is fine for inference

# ---------------------------------------------------------------------
# 2) DATA LOADER  (test split only)
test_ds     = StyleDetectDataset(CSV, split="test", transforms=val_tf)
test_loader = torch.utils.data.DataLoader(
    test_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, pin_memory=True
)

print(f"Test samples: {len(test_ds)} (label distribution: "
      f"{np.bincount([y for _,y in test_ds])})")

# ---------------------------------------------------------------------
# 3) LOAD MODEL
model = torch.load(MODEL_PATH, map_location=DEVICE, weights_only = False)
model.eval().to(DEVICE)

# ---------------------------------------------------------------------
# 4) COLLECT PROBABILITIES & LABELS
all_probs, all_true = [], []

with torch.no_grad():
    for x, y in test_loader:
        logits = model(x.to(DEVICE))
        probs  = torch.softmax(logits, dim=1)[:, 1]        # P(class==1)
        all_probs.extend(probs.cpu().numpy())
        all_true.extend(y.cpu().numpy())

all_probs = np.array(all_probs)
all_true  = np.array(all_true)

# ---------------------------------------------------------------------
# 5) FIND THRESHOLD THAT MAXIMISES F1
prec, rec, thresh = precision_recall_curve(all_true, all_probs)
f1 = 2 * prec * rec / (prec + rec + 1e-8)
best_idx   = np.argmax(f1)
best_thr   = thresh[best_idx]
best_f1    = f1[best_idx]

print(f"\n>>>  Best F1 = {best_f1:.3f} at threshold {best_thr:.3f}")

# ---------------------------------------------------------------------
# 6) FINAL METRICS WITH THE NEW THRESHOLD
final_pred = (all_probs >= best_thr).astype(int)

print("\nConfusion matrix:")
print(confusion_matrix(all_true, final_pred))

print("\nClassification report (macro-averaged):")
print(classification_report(all_true, final_pred, digits=3))

print(f"AUC  = {roc_auc_score(all_true, all_probs):.3f}")


Test samples: 2000 (label distribution: [1000 1000])

>>>  Best F1 = 0.776 at threshold 0.459

Confusion matrix:
[[611 389]
 [120 880]]

Classification report (macro-averaged):
              precision    recall  f1-score   support

           0      0.836     0.611     0.706      1000
           1      0.693     0.880     0.776      1000

    accuracy                          0.746      2000
   macro avg      0.765     0.746     0.741      2000
weighted avg      0.765     0.746     0.741      2000

AUC  = 0.806


In [10]:
# #!/usr/bin/env python
# import torch, torchvision.transforms as T
# from PIL import Image
# import sys

# MODEL_PATH = sys.argv[1]    # path to .pt or .pth weights (ResNet or ViT)
# IMG_A      = sys.argv[2]    # first image path
# IMG_B      = sys.argv[3]    # second image path

# # ---- 6.1  Load model (auto-detect backbone type) --------------------------
# model = torch.load(MODEL_PATH)
# model.eval().cuda()

# # ---- 6.2  Same val transforms as training --------------------------------
# tf = T.Compose([
#     T.Resize(256), T.CenterCrop(224),
#     T.ToTensor(), T.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
# ])

# # ---- 6.3  Prepare batch of two -------------------------------------------
# batch = torch.stack([tf(Image.open(p).convert("RGB")) for p in (IMG_A, IMG_B)])
# batch = batch.cuda()

# with torch.no_grad():
#     preds = model(batch).argmax(1).cpu().tolist()  # 0=real,1=fake

# print(f"Prediction: {preds[0]} {preds[1]}  (0 = real, 1 = style-transfer)")