In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
from PIL import Image

import config  # ✅ will work if notebook is in same folder as config.py
from data.dataset import UnifiedImageDataset

In [None]:
df = pd.read_csv(config.CSV_PATH)

dataset = UnifiedImageDataset(
    df.rename(columns={"local_path": "file_path"}),
    mode="file_df",
    size=config.IMG_SIZE, 
)

print(df.shape)

In [None]:
# --- sanity preview: first 10 images + saved edge maps ---

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import config

# 2) load saved edge maps (works if dtype is uint8 or float32)
edges = np.load(config.EDGE_MAPS_PATH, mmap_mode="r")   # shape: [N, H*W]
assert len(dataset) == edges.shape[0], \
    f"length mismatch: dataset={len(dataset)} vs edges={edges.shape[0]}"

# figure out H,W (fallback to sqrt if config changed)
H, W = config.IMG_SIZE
if edges.shape[1] != H * W:
    side = int(np.sqrt(edges.shape[1]))
    H = W = side

def _normalize_edge_row(row: np.ndarray) -> np.ndarray:
    """Return a 2D edge map in [0,1] no matter the saved dtype."""
    e = row.reshape(H, W)
    if e.dtype == np.uint8:
        # typically 0/1 when you saved after (Canny/255.0) into uint8
        vmax = 255 if e.max() > 1 else 1
        return (e.astype(np.float32) / vmax)
    return e.astype(np.float32)

In [None]:
# --- predicted outlines: load model + visualize GT vs Pred ---
import config

import torch
import numpy as np
import matplotlib.pyplot as plt

from models.edge_heads import EdgeHead

# where you saved the edge head
CKPT_PATH = "best_edge_head.pth"   # change if needed

# embeddings (aligned 1:1 with your CSV/edge maps)
EMB_PATH = getattr(config, "EMBEDDINGS_TARGET_PATH",
           getattr(config, "EMBEDDINGS_PATH", None))


assert EMB_PATH is not None, "Please set EMBEDDINGS_TARGET_PATH in config.py"
emb = np.load(EMB_PATH, mmap_mode="r")  # shape: [N, 1024]
edges = np.load(config.EDGE_MAPS_PATH, mmap_mode="r")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# build & load the model
clip_dim = emb.shape[1]
edge_dim = edges.shape[1]
edge_head = EdgeHead(clip_dim=clip_dim, edge_dim=edge_dim).to(device).eval()
edge_head.load_state_dict(torch.load(CKPT_PATH, map_location=device))
# --- load the SAME dataset ordering used when you generated the .npy files ---


def bce(a, b, eps=1e-7):
    """Binary cross-entropy between two [0,1] maps (diagnostic only)."""
    a = np.clip(a, eps, 1 - eps)
    b = np.clip(b, eps, 1 - eps)
    return np.mean(-(a * np.log(b) + (1 - a) * np.log(1 - b)))


In [None]:
# --- shapes ---
H, W = tuple(config.IMG_SIZE)
if edges.shape[1] != H * W:          # safeguard if config changed
    side = int(np.sqrt(edges.shape[1]))
    H = W = side

# --- predict one prob-map ---
def _predict_edge_map(i: int) -> np.ndarray:
    z = torch.from_numpy(emb[i]).float().unsqueeze(0).to(device)
    with torch.no_grad():
        _, pred_flat, _ = edge_head(z)      # [1, edge_dim], already in [0,1]
    return pred_flat.squeeze(0).cpu().numpy().reshape(H, W).astype(np.float32)



In [None]:
import numpy as np
import matplotlib.pyplot as plt
from skimage.measure import find_contours
from skimage.morphology import binary_opening, remove_small_objects, disk, skeletonize

# --- adaptive threshold helpers ---------------------------------------------

def topk_threshold(prob: np.ndarray, keep_ratio: float = 0.02) -> float:
    """
    Pick a threshold so that ~keep_ratio of pixels are kept (top-k by prob).
    keep_ratio=0.01..0.05 works well for outlines.
    """
    H, W = prob.shape
    k = max(1, int(keep_ratio * H * W))
    flat = prob.ravel()
    # threshold = kth largest value
    thr = np.partition(flat, flat.size - k)[flat.size - k]
    return float(thr)

def clean_and_thin(mask: np.ndarray, min_size: int = 64) -> np.ndarray:
    """Light morphological cleanup + thinning to get line-like structures."""
    mask = binary_opening(mask, footprint=disk(1))
    mask = remove_small_objects(mask, min_size=min_size)
    # Skeletonize to thin the edges to ~1px
    mask = skeletonize(mask)
    return mask

def draw_contour(ax, mask: np.ndarray, color='yellow', lw=2):
    """Draw contour around a boolean mask."""
    contours = find_contours(mask.astype(float), level=0.5)
    for c in contours:
        ax.plot(c[:, 1], c[:, 0], color=color, lw=lw)
    ax.set_axis_off()

# --- viewer (uses your existing _predict_edge_map, _normalize_edge_row, dataset) ----

def show_edges_with_pred(n=10, start=0, fixed_thr=None, keep_ratio=0.02,
                         outline_color='yellow', min_size=64):
    """
    If fixed_thr is None, we compute an adaptive threshold that keeps
    ~keep_ratio of the most confident pixels.
    """
    end = min(start + n, len(dataset))
    for i in range(start, end):
        # image
        img_t, _ = dataset[i]
        img_np = img_t.permute(1, 2, 0).cpu().numpy()

        # GT + predicted
        gt   = _normalize_edge_row(edges[i])   # (H,W) in [0,1]
        pred = _predict_edge_map(i)            # (H,W) in [0,1]
        H, W = pred.shape

        # choose threshold
        thr = fixed_thr if fixed_thr is not None else topk_threshold(pred, keep_ratio=keep_ratio)

        # binarize + cleanup
        pred_bin = (pred >= thr)
        outline_mask = clean_and_thin(pred_bin, min_size=min_size)

        # diagnostics
        mse = float(np.mean((pred - gt) ** 2))
        bce_val = float(bce(gt, pred))
        path = dataset.df.iloc[i]["file_path"]

        # plot
        fig, ax = plt.subplots(1, 5, figsize=(16, 3.2))
        fig.suptitle(
            f"idx={i} | {path}\nMSE={mse:.5f}  BCE={bce_val:.5f}  "
            # f"thr={'auto' if fixed_thr is None else fixed_thr:.2f}  "
            f"(keep={keep_ratio*100:.1f}%)",
            fontsize=10
        )

        ax[0].imshow(img_np);                 ax[0].set_title("Image"); ax[0].axis("off")
        ax[1].imshow(gt, cmap="gray", vmin=0, vmax=1); ax[1].set_title("GT edge"); ax[1].axis("off")
        ax[2].imshow(pred, cmap="gray", vmin=0, vmax=1); ax[2].set_title("Pred heatmap"); ax[2].axis("off")

        ax[3].imshow(img_np)
        ax[3].imshow(pred, cmap="magma", vmin=0, vmax=1, alpha=0.30)
        draw_contour(ax[3], outline_mask, color=outline_color, lw=2)
        ax[3].set_title("Pred overlay"); ax[3].axis("off")

        ax[4].imshow(img_np)
        # visualize the binary mask softly so the image is visible below
        ax[4].imshow(outline_mask, cmap="autumn", vmin=0, vmax=1, alpha=0.45)
        ax[4].set_title("Pred outline"); ax[4].axis("off")

        plt.tight_layout()
        plt.show()

In [None]:
show_edges_with_pred(n=20, start=0, fixed_thr=None, keep_ratio=0.50)

# Generation

In [None]:
import os
HF_CACHE = "/data/hf-cache"
os.makedirs(HF_CACHE, exist_ok=True)

os.environ["HF_HOME"] = HF_CACHE
os.environ["HUGGINGFACE_HUB_CACHE"] = os.path.join(HF_CACHE, "hub")
os.environ["TRANSFORMERS_CACHE"]     = os.path.join(HF_CACHE, "transformers")
os.environ["DIFFUSERS_CACHE"]        = os.path.join(HF_CACHE, "diffusers")
os.environ["TORCH_HOME"]             = os.path.join(HF_CACHE, "torch")

In [None]:
edge_head.eval()

In [None]:
from IPython.display import display
from torchvision import transforms
from ip_adapter import IPAdapter

to_pil = transforms.ToPILImage()

def display_images(images):
    for img in images:
        display(img)

@torch.no_grad()
def generate_from_dataset_id(i: int,
                             prompt: str = "a cat playing with a ball",
                             guidance_scale: float = 10.0,
                             steps: int = 50):
    # 1) Get the original image from your dataset
    img_t, _ = dataset[i]
    pil_img = to_pil(img_t)

    # 2) Get a CLIP image embedding
    # Prefer precomputed `emb[i]`; fall back to computing via clip_model+preprocess
    try:
        z_clip = torch.as_tensor(emb[i], dtype=torch.float32, device=device).unsqueeze(0)  # [1, D]
    except Exception:
        assert 'clip_model' in globals() and 'preprocess' in globals(), \
            "Need `clip_model` and `preprocess` to compute CLIP on the fly."
        z_clip = clip_model.encode_image(preprocess(pil_img).unsqueeze(0).to(device)).float()


    _, _, edge_map_embedding = edge_head(z_clip)  # keep if you want to inspect/log it

    # 4) Generate from CLIP embedding with IP-Adapter
    images = ip_adapter.generate_from_embeddings(
        clip_image_embeds=edge_map_embedding,             # <-- use CLIP embedding
        prompt=prompt,
        num_samples=1,
        guidance_scale=guidance_scale,
        num_inference_steps=steps,            # 30–60 is usually plenty
    )

    # 5) Display: original then generated
    display(pil_img)
    display_images(images)
    return images


In [None]:
generate_from_dataset_id(i=1,
                         prompt="a cat",
                         guidance_scale=7.5,
                         steps=200)

In [None]:
from diffusers import StableDiffusionPipeline

# Initialize Stable Diffusion pipeline
pipe = StableDiffusionPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    torch_dtype=torch.float16,
    safety_checker=None,   # saves ~1.2 GB locally
    feature_extractor=None,
    cache_dir=HF_CACHE,
).to(device)

# Initialize IP-Adapter with custom embedding type
ip_adapter = IPAdapter(
    sd_pipe=pipe,
    image_encoder_path="laion/CLIP-ViT-H-14-laion2B-s32B-b79K",
    ip_ckpt="/data/thesis/models/ip-adapter_sd15.bin",
    device=device,
    embedding_type='clip'
)