In [1]:
from PIL import Image, ImageDraw
from src.embed_clip import embed_dir
import numpy as np
import joblib
import os
import json

# Notes: we import minimal helpers here. Heavy model downloads are deferred until used.

In [2]:
# Ensure the repository root is on sys.path so `import src` works
# This searches upward from the current working directory for a folder
# containing `src` and inserts that root into sys.path. This is
# robust to nbconvert running from different CWDs (common in CI).
import sys, os
from pathlib import Path

def add_repo_root_to_path():
    p = Path.cwd()
    for _ in range(10):  # walk up to 10 levels
        if (p / 'src').is_dir():
            sys.path.insert(0, str(p))
            print('Added repo root to sys.path:', p)
            return
        if p.parent == p:
            break
        p = p.parent
    # fallback: ensure relative src exists from cwd
    src_rel = Path('src').resolve()
    if src_rel.exists():
        sys.path.insert(0, str(src_rel.parent))
        print('Added relative repo root to sys.path:', src_rel.parent)
    else:
        print('Warning: could not find src/ in parent tree; imports may fail')

add_repo_root_to_path()


Added repo root to sys.path: /workspaces/synthetic-detector


In [3]:
os.makedirs("data/real", exist_ok=True)
os.makedirs("data/ai", exist_ok=True)

# create a simple white square image
img = Image.new("RGB", (256, 256), "white")
d = ImageDraw.Draw(img)
d.rectangle((50, 50, 200, 200), outline="black", width=5)
img.save("data/real/dummy.jpg")

# duplicate to AI folder
img.save("data/ai/dummy.jpg")
print("✅ Dummy images created.")

✅ Dummy images created.


In [4]:
# Generate CLIP embeddings for the dummy dataset
from src.embed_clip import embed_dir
embed_dir("data", "embeddings_train.npz")
print("✅ Embeddings saved to embeddings_train.npz")

# check shape
D = np.load("embeddings_train.npz")
print("X:", D["X"].shape, "y:", D["y"].shape)


  from .autonotebook import tqdm as notebook_tqdm




✅ Embeddings saved to embeddings_train.npz
X: (3, 512) y: (3,)


In [5]:
# Train the tiny MLP on the generated embeddings
from src.train_mlp import train
train("embeddings_train.npz", "models/mlp_aesthetic.joblib")
print("✅ Trained MLP saved to models/mlp_aesthetic.joblib")

Saved models/mlp_aesthetic.joblib
✅ Trained MLP saved to models/mlp_aesthetic.joblib


In [6]:
# Run inference on a sample image and print JSON result
from src.infer import score_image
import json
res = score_image("data/ai/dummy.jpg")
print(json.dumps(res, indent=2))

{
  "aesthetic_score": 4.982,
  "label": "Real"
}
