## Cell 1 — Imports, config, helpers

In [2]:
# ==== Data Synthesis Verification: Clean vs Degraded (EXTREME) ====
# Generates metrics + plots for the report (with a CLEAN baseline)
# Exports figure PNGs using the exact names referenced in your LaTeX.

import os, math, random
from pathlib import Path
import numpy as np
import cv2
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm
from skimage.metrics import structural_similarity as ssim, peak_signal_noise_ratio as psnr

# -----------------------
# CONFIG: EDIT THESE PATHS
# -----------------------
CLEAN_DIR = "/kaggle/input/claude-synthesis/clean"  # your clean base set
DEG_DIRS = {
    "fog":           "/kaggle/input/claude-synthesis/fog",
    "rain":          "/kaggle/input/claude-synthesis/rain",
    "lowlight":      "/kaggle/input/claude-synthesis/lowlight",
    "fog_rain":      "/kaggle/input/claude-synthesis/fog_rain",
    "fog_lowlight":  "/kaggle/input/claude-synthesis/fog_lowlight",
    "rain_lowlight": "/kaggle/input/claude-synthesis/rain_lowlight",
    "extreme":       "/kaggle/input/claude-synthesis/extreme",
}
OUT_DIR = "/kaggle/working/Verification"
FIGS_DIR = "/kaggle/working/Verification/figs"     # export with LaTeX-friendly filenames
SAMPLE_PLOTS_PER_DEG = 6
MAX_SIDE = 1280
IMG_EXTS = {".jpg",".jpeg",".png",".bmp",".tif",".tiff",".webp"}
SEED = 42

os.makedirs(OUT_DIR, exist_ok=True)
os.makedirs(FIGS_DIR, exist_ok=True)
random.seed(SEED); np.random.seed(SEED)

# ---------- I/O helpers ----------
def imread_rgb(path):
    img = cv2.imdecode(np.fromfile(path, dtype=np.uint8), cv2.IMREAD_UNCHANGED)
    if img is None:
        raise ValueError(f"Failed to read: {path}")
    if img.ndim == 2:
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
    elif img.shape[2] == 4:
        img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
    if MAX_SIDE and max(img.shape[:2]) > MAX_SIDE:
        h, w = img.shape[:2]
        s = MAX_SIDE / max(h, w)
        img = cv2.resize(img, (int(w*s), int(h*s)), interpolation=cv2.INTER_AREA)
    return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

def imwrite_rgb(path, img_rgb):
    os.makedirs(os.path.dirname(path), exist_ok=True)
    bgr = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2BGR)
    cv2.imencode(".png", bgr)[1].tofile(path)

def list_images(root):
    root = Path(root)
    return sorted([str(p) for p in root.rglob("*") if p.suffix.lower() in IMG_EXTS])

def index_by_stem(paths):
    idx = {}
    for p in paths:
        stem = Path(p).stem
        if stem not in idx:  # first one wins
            idx[stem] = p
    return idx

# ---------- Analysis helpers ----------
def rgb_hist(img, bins=256):
    H = []
    for c in range(3):
        h, _ = np.histogram(img[...,c].ravel(), bins=bins, range=(0,255), density=True)
        H.append(h + 1e-12)
    return np.stack(H, 0)

def js_divergence(p, q):
    m = 0.5*(p+q)
    kl = lambda a,b: np.sum(a*np.log(a/b))
    return 0.5*kl(p,m) + 0.5*kl(q,m)

def js_distance_rgb(H1, H2):
    d = []
    for c in range(3):
        p = H1[c] / (H1[c].sum() + 1e-12)
        q = H2[c] / (H2[c].sum() + 1e-12)
        d.append(np.sqrt(js_divergence(p, q)))
    return float(np.mean(d))

def canny_edge_density(img_rgb):
    g = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2GRAY)
    edges = cv2.Canny(g, 100, 200)
    return float((edges > 0).mean()), edges

def radial_power_profile(img_rgb):
    g = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2GRAY).astype(np.float32)/255.0
    F = np.fft.fftshift(np.fft.fft2(g))
    mag = np.abs(F)
    h,w = g.shape
    cy,cx = h//2, w//2
    y,x = np.ogrid[:h,:w]
    r = np.hypot(x-cx, y-cy).astype(np.int32)
    prof = np.bincount(r.ravel(), mag.ravel()) / (np.bincount(r.ravel()) + 1e-9)
    prof = prof[:r.max()]
    prof = prof / (prof.sum() + 1e-9)
    return prof

def l2_prefix(a,b):
    n = min(len(a), len(b))
    return float(np.linalg.norm(a[:n] - b[:n]))


## Cell 2 — Index, pair clean↔degraded

In [3]:
# ---------- Indexing ----------
clean_files = list_images(CLEAN_DIR)
if not clean_files:
    raise SystemExit(f"No clean images found in {CLEAN_DIR}. Fix CLEAN_DIR.")

clean_idx = index_by_stem(clean_files)
print(f"[i] Clean images: {len(clean_idx)}")

deg_indices = {}
for name, ddir in DEG_DIRS.items():
    paths = list_images(ddir)
    if not paths:
        print(f"[WARN] No images found in {ddir} for '{name}'")
    deg_indices[name] = index_by_stem(paths)
    print(f"[i] Degraded[{name}]: {len(deg_indices[name])}")

# Create paired list: (stem, clean_path, {deg_name: deg_path})
pairs = []
for stem, cpath in clean_idx.items():
    avail = {name: dindex[stem] for name, dindex in deg_indices.items() if stem in dindex}
    if avail:
        pairs.append((stem, cpath, avail))
print(f"[i] Paired clean with ≥1 degraded: {len(pairs)}")

# consistent order for plots
DOMAIN_ORDER = [
    "clean", "fog", "rain", "lowlight",
    "fog_rain", "fog_lowlight", "rain_lowlight", "extreme"
]


[i] Clean images: 500
[i] Degraded[fog]: 500
[i] Degraded[rain]: 500
[i] Degraded[lowlight]: 500
[i] Degraded[fog_rain]: 500
[i] Degraded[fog_lowlight]: 500
[i] Degraded[rain_lowlight]: 500
[i] Degraded[extreme]: 500
[i] Paired clean with ≥1 degraded: 500


## Cell 3 — Compute metrics (SSIM, PSNR, Edge Δ, Hist-JS, FFT-L2)

In [None]:
rows = []
for stem, cpath, dmap in tqdm(pairs, desc="Computing metrics"):
    try:
        clean = imread_rgb(cpath)
    except Exception as e:
        print(f"[skip clean] {cpath}: {e}")
        continue

    Hc = rgb_hist(clean)
    ec_ratio, _ = canny_edge_density(clean)
    fc = radial_power_profile(clean)
    g_clean = cv2.cvtColor(clean, cv2.COLOR_RGB2GRAY)

    for dname, dpath in dmap.items():
        try:
            deg = imread_rgb(dpath)
        except Exception as e:
            print(f"[skip deg] {dpath}: {e}")
            continue

        g_deg = cv2.cvtColor(deg, cv2.COLOR_RGB2GRAY)
        ssim_val = ssim(g_clean, g_deg, data_range=255)
        psnr_val = psnr(g_clean, g_deg, data_range=255)

        ed_ratio, _ = canny_edge_density(deg)
        edge_delta = ed_ratio - ec_ratio

        Hd = rgb_hist(deg)
        hist_js = js_distance_rgb(Hc, Hd)

        fd = radial_power_profile(deg)
        fft_l2 = l2_prefix(fc, fd)

        rows.append({
            "image": stem,
            "degradation": dname,
            "ssim": round(float(ssim_val), 6),
            "psnr": round(float(psnr_val), 3),
            "edge_ratio_clean": round(float(ec_ratio), 6),
            "edge_ratio_degraded": round(float(ed_ratio), 6),
            "edge_ratio_delta": round(float(edge_delta), 6),
            "hist_js_rgb": round(float(hist_js), 6),
            "fft_radial_l2": round(float(fft_l2), 6),
        })

df = pd.DataFrame(rows)
csv_path = f"{OUT_DIR}/metrics.csv"
df.to_csv(csv_path, index=False)
print(f"[i] Wrote metrics: {csv_path}")

if df.empty:
    raise SystemExit("[WARN] No metric rows were produced. Check your paths/pairs.")


  return 10 * np.log10((data_range**2) / err)
Computing metrics:  91%|█████████ | 455/500 [08:10<00:46,  1.03s/it]

## Cell 4 — Build summary + add CLEAN baseline

In [5]:
summary = (
    df.groupby("degradation")
      .agg(ssim_mean=("ssim","mean"),
           psnr_mean=("psnr","mean"),
           edge_delta_mean=("edge_ratio_delta","mean"),
           hist_js_mean=("hist_js_rgb","mean"),
           fft_l2_mean=("fft_radial_l2","mean"),
           n=("image","count"))
)

# Insert "clean" baseline (identity)
baseline = pd.DataFrame({
    "degradation":    ["clean"],
    "ssim_mean":      [1.0],
    "psnr_mean":      [np.inf],   # true ∞, will handle in plots
    "edge_delta_mean":[0.0],
    "hist_js_mean":   [0.0],
    "fft_l2_mean":    [0.0],
    "n":              [len(clean_idx)],
}).set_index("degradation")

summary_with_clean = pd.concat([baseline, summary], axis=0)

# Apply nice order, keep only present domains
summary_with_clean = summary_with_clean.reindex(
    [d for d in DOMAIN_ORDER if d in summary_with_clean.index]
)

summary.to_csv(f"{OUT_DIR}/summary_by_degradation.csv")
summary_with_clean.to_csv(f"{OUT_DIR}/summary_by_degradation_with_clean.csv")
print("[i] Saved summaries (with and without clean baseline).")
summary_with_clean.round(4)


[i] Saved summaries (with and without clean baseline).


Unnamed: 0_level_0,ssim_mean,psnr_mean,edge_delta_mean,hist_js_mean,fft_l2_mean,n
degradation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
clean,1.0,inf,0.0,0.0,0.0,500
fog,0.6453,15.6223,-0.0525,0.3632,0.0776,500
rain,0.519,15.7486,0.0793,0.3039,0.0286,500
lowlight,0.1393,9.0793,-0.0214,0.7504,0.411,500
fog_rain,0.5997,18.0756,-0.0475,0.3171,0.0642,500
fog_lowlight,0.3535,11.6077,-0.0527,0.5915,0.1464,500
rain_lowlight,0.0765,8.1841,-0.0386,0.785,0.4106,500
extreme,0.2384,inf,0.1291,0.5244,0.1303,500


## Cell 5 — Export report-ready bar plots (SSIM, PSNR, Hist-JS)

In [6]:
plt_params = dict(figsize=(9,4))
metrics_for_bars = [
    ("ssim_mean", "Mean SSIM (lower → stronger shift)", "ssim_bar.png"),
    ("psnr_mean", "Mean PSNR (dB) (lower → stronger corruption)", "psnr_bar.png"),
    ("hist_js_mean", "Histogram JS Distance (RGB)", "hist_js_bar.png"),
]

for col, title, fname in metrics_for_bars:
    plt.figure(**plt_params)
    vals = summary_with_clean[col].replace(np.inf, np.nan)  # ∞ → NaN for plotting
    vals.plot(kind="bar")
    plt.title(title)
    plt.ylabel(col); plt.xticks(rotation=30, ha="right")
    plt.grid(axis="y", alpha=0.3)
    # Annotate ∞ on the clean bar for PSNR
    if col == "psnr_mean" and "clean" in summary_with_clean.index:
        idx = list(summary_with_clean.index).index("clean")
        ymax = np.nanmax(vals.to_numpy()) if np.isfinite(vals.to_numpy()).any() else 1.0
        plt.text(idx, (ymax*0.9 if np.isfinite(ymax) else 1.0), "∞", ha="center", va="center", fontsize=12)
    plt.tight_layout()
    path1 = os.path.join(OUT_DIR, fname)
    path2 = os.path.join(FIGS_DIR, fname)  # for Overleaf
    plt.savefig(path1, dpi=160, bbox_inches="tight"); plt.savefig(path2, dpi=160, bbox_inches="tight")
    plt.close()
    print(f"[i] Saved {fname} → {path1} & {path2}")


[i] Saved ssim_bar.png → /kaggle/working/Verification/ssim_bar.png & /kaggle/working/Verification/figs/ssim_bar.png
[i] Saved psnr_bar.png → /kaggle/working/Verification/psnr_bar.png & /kaggle/working/Verification/figs/psnr_bar.png
[i] Saved hist_js_bar.png → /kaggle/working/Verification/hist_js_bar.png & /kaggle/working/Verification/figs/hist_js_bar.png


## Cell 6 — Save preview grids for all 7 domains (clean|degraded)

In [7]:
def save_preview_grids(df, k_per_deg=8):
    for dname in df["degradation"].unique():
        sub = df[df["degradation"] == dname]
        if sub.empty: continue
        ex = sub.sample(n=min(k_per_deg, len(sub)), random_state=SEED)

        tiles, names = [], []
        for _, r in ex.iterrows():
            stem = r["image"]
            cpath = clean_idx.get(stem)
            dpath = deg_indices[dname].get(stem)
            if not cpath or not dpath:
                continue
            try:
                cimg = imread_rgb(cpath); dimg = imread_rgb(dpath)
            except:
                continue
            # height-match
            h = cimg.shape[0]
            if dimg.shape[0] != h:
                scale = h / dimg.shape[0]
                dimg = cv2.resize(dimg, (int(dimg.shape[1]*scale), h), interpolation=cv2.INTER_AREA)
            cat = np.concatenate([cimg, dimg], axis=1)  # [clean | degraded]
            tiles.append(cat); names.append(stem)

        if not tiles:
            continue
        cols = 2
        rows = int(np.ceil(len(tiles)/cols))
        plt.figure(figsize=(cols*6, rows*4))
        for i, img in enumerate(tiles, 1):
            plt.subplot(rows, cols, i); plt.imshow(img); plt.axis('off')
            plt.title(f"{names[i-1]} [clean | {dname}]")
        plt.suptitle(f"Preview – {dname}", fontsize=14, y=1.02)
        plt.tight_layout()
        p1 = f"{OUT_DIR}/preview_{dname}.png"
        p2 = f"{FIGS_DIR}/preview_{dname}.png"
        plt.savefig(p1, dpi=160, bbox_inches="tight"); plt.savefig(p2, dpi=160, bbox_inches="tight")
        plt.close()
        print(f"[i] Saved preview grid: {p1} & {p2}")

save_preview_grids(df, k_per_deg=6)


[i] Saved preview grid: /kaggle/working/Verification/preview_fog.png & /kaggle/working/Verification/figs/preview_fog.png
[i] Saved preview grid: /kaggle/working/Verification/preview_rain.png & /kaggle/working/Verification/figs/preview_rain.png
[i] Saved preview grid: /kaggle/working/Verification/preview_lowlight.png & /kaggle/working/Verification/figs/preview_lowlight.png
[i] Saved preview grid: /kaggle/working/Verification/preview_fog_rain.png & /kaggle/working/Verification/figs/preview_fog_rain.png
[i] Saved preview grid: /kaggle/working/Verification/preview_fog_lowlight.png & /kaggle/working/Verification/figs/preview_fog_lowlight.png
[i] Saved preview grid: /kaggle/working/Verification/preview_rain_lowlight.png & /kaggle/working/Verification/figs/preview_rain_lowlight.png
[i] Saved preview grid: /kaggle/working/Verification/preview_extreme.png & /kaggle/working/Verification/figs/preview_extreme.png


## Cell 7 — FFT and Edge examples (fog, rain, extreme)

In [8]:
def pick_example_for(dname):
    sub = df[df["degradation"] == dname]
    if sub.empty: return None
    r = sub.sample(n=1, random_state=SEED).iloc[0]
    stem = r["image"]
    cpath = clean_idx.get(stem); dpath = deg_indices[dname].get(stem)
    if not cpath or not dpath: return None
    try:
        clean = imread_rgb(cpath); deg = imread_rgb(dpath)
    except:
        return None
    return stem, clean, deg

# ----- FFT plots (fog & extreme) -----
for dname, outname in [("fog", "fft_fog.png"), ("extreme", "fft_extreme.png")]:
    picked = pick_example_for(dname)
    if picked is None:
        print(f"[WARN] No example for FFT: {dname}")
        continue
    stem, clean, deg = picked
    fc = radial_power_profile(clean); fd = radial_power_profile(deg)
    n = min(len(fc), len(fd)); x = np.arange(n)
    plt.figure(figsize=(8,4))
    plt.plot(x, fc[:n], label="Clean")
    plt.plot(x, fd[:n], label=dname, linestyle="--")
    plt.xlabel("Spatial Frequency (radial index)"); plt.ylabel("Normalized power")
    plt.title(f"FFT Radial Power: {stem} [{dname}]"); plt.legend(); plt.tight_layout()
    p1 = f"{OUT_DIR}/{outname}"; p2 = f"{FIGS_DIR}/{outname}"
    plt.savefig(p1, dpi=160, bbox_inches="tight"); plt.savefig(p2, dpi=160, bbox_inches="tight")
    plt.close()
    print(f"[i] Saved {outname} → {p1} & {p2}")

# ----- Edge maps (fog & rain) -----
for dname, outname in [("fog","edges_fog.png"), ("rain","edges_rain.png")]:
    picked = pick_example_for(dname)
    if picked is None:
        print(f"[WARN] No example for edges: {dname}")
        continue
    stem, clean, deg = picked
    ec_ratio, ec_map = canny_edge_density(clean)
    ed_ratio, ed_map = canny_edge_density(deg)
    plt.figure(figsize=(10,4))
    plt.subplot(1,2,1); plt.imshow(ec_map, cmap='gray'); plt.axis('off'); plt.title(f"Clean edges ({ec_ratio:.3f})")
    plt.subplot(1,2,2); plt.imshow(ed_map, cmap='gray'); plt.axis('off'); plt.title(f"{dname} edges ({ed_ratio:.3f})")
    plt.suptitle(f"Canny Edge Maps: {stem}", y=1.02)
    plt.tight_layout()
    p1 = f"{OUT_DIR}/{outname}"; p2 = f"{FIGS_DIR}/{outname}"
    plt.savefig(p1, dpi=160, bbox_inches="tight"); plt.savefig(p2, dpi=160, bbox_inches="tight")
    plt.close()
    print(f"[i] Saved {outname} → {p1} & {p2}")


[i] Saved fft_fog.png → /kaggle/working/Verification/fft_fog.png & /kaggle/working/Verification/figs/fft_fog.png
[i] Saved fft_extreme.png → /kaggle/working/Verification/fft_extreme.png & /kaggle/working/Verification/figs/fft_extreme.png
[i] Saved edges_fog.png → /kaggle/working/Verification/edges_fog.png & /kaggle/working/Verification/figs/edges_fog.png
[i] Saved edges_rain.png → /kaggle/working/Verification/edges_rain.png & /kaggle/working/Verification/figs/edges_rain.png


In [9]:
# ==== Final: Zip all outputs for submission or report ====
import shutil
from pathlib import Path

OUT_DIR = Path("/kaggle/working/Verification")
ZIP_PATH = Path("/kaggle/working/JayRaj_Data_Analysis_Results.zip")

# Clean previous zip if exists
if ZIP_PATH.exists():
    ZIP_PATH.unlink()

# Create zip recursively (includes figs/, metrics, CSVs, etc.)
shutil.make_archive(str(ZIP_PATH).replace(".zip", ""), 'zip', root_dir=OUT_DIR)

# Verify contents
print(f"✅ Created zip archive: {ZIP_PATH}")
print("Contents:")
for p in sorted(OUT_DIR.rglob("*")):
    print(" -", p.relative_to(OUT_DIR))


✅ Created zip archive: /kaggle/working/JayRaj_Data_Analysis_Results.zip
Contents:
 - edges_fog.png
 - edges_rain.png
 - fft_extreme.png
 - fft_fog.png
 - figs
 - figs/edges_fog.png
 - figs/edges_rain.png
 - figs/fft_extreme.png
 - figs/fft_fog.png
 - figs/hist_js_bar.png
 - figs/preview_extreme.png
 - figs/preview_fog.png
 - figs/preview_fog_lowlight.png
 - figs/preview_fog_rain.png
 - figs/preview_lowlight.png
 - figs/preview_rain.png
 - figs/preview_rain_lowlight.png
 - figs/psnr_bar.png
 - figs/ssim_bar.png
 - hist_js_bar.png
 - metrics.csv
 - preview_extreme.png
 - preview_fog.png
 - preview_fog_lowlight.png
 - preview_fog_rain.png
 - preview_lowlight.png
 - preview_rain.png
 - preview_rain_lowlight.png
 - psnr_bar.png
 - ssim_bar.png
 - summary_by_degradation.csv
 - summary_by_degradation_with_clean.csv
