In [None]:
# %pip install torch torchvision torchaudio
# %pip install pandas scikit-learn
# %pip install pillow tqdm

In [None]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Scan Victor Kitov's dataset and create manifest.csv with four columns:
path,label,content_id,style_id
"""
import re, csv, pathlib

# ---- 1.1  Configure root directory & output ------------------------------
ROOT = pathlib.Path("images")  # <- change if needed
OUT  = ROOT / "manifest.csv"

# ---- 1.2  Regex helpers to grab IDs from filenames -----------------------
CONTENT_RE = re.compile(r"content_(\d+)")
STYLE_RE   = re.compile(r"style_(\d+)")
STYL_RE    = re.compile(r"content_(\d+)___style_(\d+)___\d+")

# ---- 1.3  Collect rows ----------------------------------------------------
rows = []

# a) real CONTENT images
for p in (ROOT / "contents").glob("*"):
    cid = int(CONTENT_RE.search(p.name)[1])  # extract content id
    rows.append((str(p), 0, cid, -1))        # -1 style_id means “none”

# b) real STYLE images
for p in (ROOT / "styles").glob("*"):
    sid = int(STYLE_RE.search(p.name)[1])    # extract style id
    rows.append((str(p), 0, -1, sid))        # -1 content_id means “none”

# c) FAKE stylizations
for p in (ROOT / "stylizations").glob("*"):
    cid, sid = map(int, STYL_RE.search(p.name).groups())
    rows.append((str(p), 1, cid, sid))       # label 1 = stylized

# ---- 1.4  Write CSV -------------------------------------------------------
with OUT.open("w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow(["path", "label", "content_id", "style_id"])
    writer.writerows(rows)

print(f"Manifest written → {OUT}, total rows = {len(rows)}")


In [None]:
#!/usr/bin/env python
import pandas as pd, numpy as np
from sklearn.model_selection import train_test_split

MANIFEST = "images\\manifest.csv"
SEED     = 42

df = pd.read_csv(MANIFEST)

# ---- 2.1  Unique IDs ------------------------------------------------------
content_ids = df.loc[df.content_id != -1, "content_id"].unique()
style_ids   = df.loc[df.style_id   != -1, "style_id"  ].unique()

# ---- 2.2  20 % hold-out ---------------------------------------------------
c_train, c_test = train_test_split(content_ids, test_size=0.2, random_state=SEED)
s_train, s_test = train_test_split(style_ids,   test_size=0.2, random_state=SEED)

# ---- 2.3  Boolean masks ---------------------------------------------------
def in_train(row):
    c_ok = (row.content_id == -1) or (row.content_id in c_train)
    s_ok = (row.style_id   == -1) or (row.style_id   in s_train)
    return c_ok and s_ok           # both IDs must be in *train* lists

df["split"] = np.where(df.apply(in_train, axis=1), "train", "test")

# ---- 2.4  Save ------------------------------------------------------------
df.to_csv(MANIFEST, index=False)
print(df["split"].value_counts())

In [None]:
#!/usr/bin/env python
import torch, torchvision
from torch import nn
from torch.utils.data import DataLoader, WeightedRandomSampler
from tqdm import tqdm
from dataset import StyleDetectDataset, train_tf, val_tf

CSV = "images\\manifest.csv"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# ---- 4.1  Datasets & loaders ---------------------------------------------
train_ds = StyleDetectDataset(CSV, split="train", transforms=train_tf)
val_ds   = StyleDetectDataset(CSV, split="test",  transforms=val_tf)

# Imbalance handling: weight inversely to class freq
labels = [y for _,y in train_ds]
class_count = torch.bincount(torch.tensor(labels))
class_weight = 1.0 / class_count.float()
weights = class_weight[labels]
sampler = WeightedRandomSampler(weights, num_samples=len(train_ds), replacement=True)

train_loader = DataLoader(train_ds, batch_size=64, sampler=sampler,
                          num_workers=4, pin_memory=True)
val_loader   = DataLoader(val_ds, batch_size=64, shuffle=False,
                          num_workers=4, pin_memory=True)

# ---- 4.2  Model -----------------------------------------------------------
model = torchvision.models.resnet50(weights="IMAGENET1K_V2")  # pre-trained
model.fc = nn.Linear(model.fc.in_features, 2)                 # 2-class head
model.to(DEVICE)

# ---- 4.3  Optimiser, loss, sched -----------------------------------------
criterion = nn.CrossEntropyLoss(weight=class_weight.to(DEVICE))
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=20)

# ---- 4.4  Train loop ------------------------------------------------------
EPOCHS = 3
for epoch in range(1, EPOCHS+1):
    model.train(); running_loss = 0.0
    for x,y in tqdm(train_loader, desc=f"Train {epoch}"):
        x,y = x.to(DEVICE), y.to(DEVICE)
        optimizer.zero_grad()
        loss = criterion(model(x), y)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * x.size(0)
    scheduler.step()

    # ---- validation ----
    model.eval(); correct = total = 0
    with torch.no_grad():
        for x,y in val_loader:
            x,y = x.to(DEVICE), y.to(DEVICE)
            preds = model(x).argmax(1)
            correct += (preds == y).sum().item()
            total   += y.size(0)
    acc = correct/total*100
    print(f"Epoch {epoch:02d} | train_loss={running_loss/len(train_ds):.4f} "
          f"| val_acc={acc:.2f}%")


In [None]:
# after the final epoch finishes
torch.save(model, "resnet_style.pt")   # saves the entire model object
print("✔ model saved → resnet_style.pt")

In [3]:
import torch, torchvision.transforms as T
import torch.serialization
import torchvision.models.resnet
from PIL import Image, ImageTk
import tkinter as tk
from tkinter import Label


# ---- 2.1  Pick a device ---------------------------------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# ---- 2.2  Load the saved model on that device -----------------------------
with torch.serialization.safe_globals([torchvision.models.resnet.ResNet]):
    model = torch.load("resnet_style.pt", map_location=device, weights_only=False)
model.eval()                       # set to inference mode
model.to(device)                   # make sure weights live on the right GPU/CPU

# ---- 2.3  Re-use the same val transform ----------------------------------
tf = T.Compose([
    T.Resize(256), T.CenterCrop(224),
    T.ToTensor(),
    T.Normalize([0.485, 0.456, 0.406],
                [0.229, 0.224, 0.225]),
])

# ---- 2.4  Load two images *with safe paths* -------------------------------
imgA = Image.open(r"images\\contents\\content_1.jpg")
imgB = Image.open(r"images\stylizations\content_1___style_16___700.jpg")

# ---- 2.5  Apply transformations (only for model input) -------------------
imgA_tensor = tf(imgA.convert("RGB")).unsqueeze(0).to(device)  # (1, 3, 224, 224)
imgB_tensor = tf(imgB.convert("RGB")).unsqueeze(0).to(device)  # (1, 3, 224, 224)
batch = torch.cat([imgA_tensor, imgB_tensor], dim=0).to(device)

# ---- 2.6  Predict ---------------------------------------------------------
with torch.no_grad():
    preds = model(batch).argmax(1).cpu().tolist()  # 0 = real, 1 = stylized

print("Predictions:", preds)  # e.g. [1, 0]

# ---- 2.7  Create the UI to display images side by side -------------------

# Set up the Tkinter window
root = tk.Tk()
root.title("Image Comparison")

# Resize images before converting to Tkinter-compatible format (only for display)
imgA_resized = imgA.resize((224, 224))  # Resize image to fit in window
imgB_resized = imgB.resize((224, 224))  # Resize image to fit in window

# Convert the resized images to Tkinter-compatible format
imgA_tk = ImageTk.PhotoImage(imgA_resized)
imgB_tk = ImageTk.PhotoImage(imgB_resized)

# Store image references globally to prevent garbage collection
global imgA_tk_ref, imgB_tk_ref
imgA_tk_ref = imgA_tk  # Store the reference to image A
imgB_tk_ref = imgB_tk  # Store the reference to image B

# Create labels to display the images
labelA = Label(root, image=imgA_tk)
labelA.grid(row=0, column=0, padx=10, pady=10)

labelB = Label(root, image=imgB_tk)
labelB.grid(row=0, column=1, padx=10, pady=10)

# Add labels for predictions
pred_labelA = Label(root, text=f"Prediction: {preds[0]}")
pred_labelA.grid(row=1, column=0)

pred_labelB = Label(root, text=f"Prediction: {preds[1]}")
pred_labelB.grid(row=1, column=1)

# Run the Tkinter main loop
root.mainloop()


Using device: cpu
Predictions: [0, 1]


In [None]:
import pandas as pd

df = pd.read_csv("images/manifest.csv")
print(df[df.split=="test"].label.value_counts())

In [None]:
#!/usr/bin/env python
import torch, torchvision
from torch import nn
from torch.utils.data import DataLoader, WeightedRandomSampler
from tqdm import tqdm
from dataset import StyleDetectDataset, train_tf, val_tf

CSV = "images\\manifest.csv"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# ---- 5.1  Data loaders (same as ResNet script) ----------------------------
train_ds = StyleDetectDataset(CSV, split="train", transforms=train_tf)
val_ds   = StyleDetectDataset(CSV, split="test",  transforms=val_tf)

labels = [y for _,y in train_ds]
class_count = torch.bincount(torch.tensor(labels))
class_weight = 1.0 / class_count.float()
weights = class_weight[labels]
sampler = WeightedRandomSampler(weights, num_samples=len(train_ds), replacement=True)

train_loader = DataLoader(train_ds, batch_size=32, sampler=sampler,  # ViT uses smaller batch
                          num_workers=4, pin_memory=True)
val_loader   = DataLoader(val_ds, batch_size=32, shuffle=False,
                          num_workers=4, pin_memory=True)

# ---- 5.2  ViT backbone ----------------------------------------------------
model = torchvision.models.vit_b_16(weights="IMAGENET1K_V1")  # ViT-Base
model.heads.head = nn.Linear(model.heads.head.in_features, 2) # 2-class
model.to(DEVICE)

# ---- 5.3  Optim + sched ---------------------------------------------------
# criterion = nn.CrossEntropyLoss(weight=class_weight.to(DEVICE))
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4)   # higher LR for ViT
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=20)

# ---- 5.4  Training loop ---------------------------------------------------
EPOCHS = 3
for epoch in range(1, EPOCHS+1):
    model.train(); running=0
    for x,y in tqdm(train_loader, desc=f"Train {epoch}"):
        x,y = x.to(DEVICE), y.to(DEVICE)
        optimizer.zero_grad()
        loss = criterion(model(x), y)
        loss.backward()
        optimizer.step()
        running += loss.item()*x.size(0)
    scheduler.step()

    # validation
    model.eval(); correct=total=0
    with torch.no_grad():
        for x,y in val_loader:
            x,y = x.to(DEVICE), y.to(DEVICE)
            preds = model(x).argmax(1)
            correct += (preds==y).sum().item()
            total   += y.size(0)
    print(f"Epoch {epoch:02d} | loss={running/len(train_ds):.4f} "
          f"| val_acc={correct/total*100:.2f}%")

from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import numpy as np

all_preds, all_true = [], []

with torch.no_grad():
    for x, y in val_loader:
        logits = model(x.to(DEVICE))
        probs  = torch.softmax(logits, 1)[:, 1]           # P(class=1)
        all_preds.extend(probs.cpu().numpy())
        all_true.extend(y.numpy())

torch.save(model, "vit_model.pt")
print("✔ model saved to vit_model.pt")

print(confusion_matrix(all_true, np.array(all_preds) > 0.5))
print(classification_report(all_true, np.array(all_preds) > 0.5, digits=3))
print("AUC =", roc_auc_score(all_true, all_preds))

In [None]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Evaluate a saved ViT (or any binary model) and pick the best threshold
according to F1-score on the *test* split.
-----------------------------------------------------------------------
Usage:
    python eval_with_threshold.py vit_epoch02.pt
"""

import sys, torch, numpy as np
import torchvision.transforms as T
import torch.serialization
import torchvision.models.resnet
from sklearn.metrics import (
    precision_recall_curve,
    classification_report,
    confusion_matrix,
    roc_auc_score,
)
from dataset import StyleDetectDataset, val_tf   # same transform as training

# ---------------------------------------------------------------------
# 1) CONFIG
MODEL_PATH = "vit_model_epoch02.pt"            # e.g. "vit_epoch02.pt"
CSV        = r"images/manifest.csv"
DEVICE     = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BATCH_SIZE = 64                         # bigger is fine for inference

# ---------------------------------------------------------------------
# 2) DATA LOADER  (test split only)
test_ds     = StyleDetectDataset(CSV, split="test", transforms=val_tf)
test_loader = torch.utils.data.DataLoader(
    test_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, pin_memory=True
)

print(f"Test samples: {len(test_ds)} (label distribution: "
      f"{np.bincount([y for _,y in test_ds])})")

# ---------------------------------------------------------------------
# 3) LOAD MODEL
with torch.serialization.safe_globals([torchvision.models.resnet.ResNet]):
    model = torch.load(MODEL_PATH, map_location=DEVICE, weights_only = False)
model.eval().to(DEVICE)

# ---------------------------------------------------------------------
# 4) COLLECT PROBABILITIES & LABELS
all_probs, all_true = [], []

with torch.no_grad():
    for x, y in test_loader:
        logits = model(x.to(DEVICE))
        probs  = torch.softmax(logits, dim=1)[:, 1]        # P(class==1)
        all_probs.extend(probs.cpu().numpy())
        all_true.extend(y.cpu().numpy())

all_probs = np.array(all_probs)
all_true  = np.array(all_true)

# ---------------------------------------------------------------------
# 5) FIND THRESHOLD THAT MAXIMISES F1
prec, rec, thresh = precision_recall_curve(all_true, all_probs)
f1 = 2 * prec * rec / (prec + rec + 1e-8)
best_idx   = np.argmax(f1)
best_thr   = thresh[best_idx]
best_f1    = f1[best_idx]

print(f"\n>>>  Best F1 = {best_f1:.3f} at threshold {best_thr:.3f}")

# ---------------------------------------------------------------------
# 6) FINAL METRICS WITH THE NEW THRESHOLD
final_pred = (all_probs >= best_thr).astype(int)

print("\nConfusion matrix:")
print(confusion_matrix(all_true, final_pred))

print("\nClassification report (macro-averaged):")
print(classification_report(all_true, final_pred, digits=3))

print(f"AUC  = {roc_auc_score(all_true, all_probs):.3f}")


In [None]:
#!/usr/bin/env python
import torch, torchvision.transforms as T
from PIL import Image
import sys

MODEL_PATH = sys.argv[1]    # path to .pt or .pth weights (ResNet or ViT)
IMG_A      = sys.argv[2]    # first image path
IMG_B      = sys.argv[3]    # second image path

# ---- 6.1  Load model (auto-detect backbone type) --------------------------
model = torch.load(MODEL_PATH)
model.eval().cuda()

# ---- 6.2  Same val transforms as training --------------------------------
tf = T.Compose([
    T.Resize(256), T.CenterCrop(224),
    T.ToTensor(), T.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]),
])

# ---- 6.3  Prepare batch of two -------------------------------------------
batch = torch.stack([tf(Image.open(p).convert("RGB")) for p in (IMG_A, IMG_B)])
batch = batch.cuda()

with torch.no_grad():
    preds = model(batch).argmax(1).cpu().tolist()  # 0=real,1=fake

print(f"Prediction: {preds[0]} {preds[1]}  (0 = real, 1 = style-transfer)")