In [4]:
# shape_likert_generator.py
import numpy as np
import pandas as pd
from pathlib import Path

# -----------------------------
# CONFIG
# -----------------------------
INPUT_DIR = Path("outputs")
REGION_CSV = r"F:\New Dissertation - Image Generation\POC\outputs\shape_region_features.csv"
IMAGE_CSV  = r"F:\New Dissertation - Image Generation\POC\outputs\shape_image_indices.csv"
OUT_CSV    = r"F:\New Dissertation - Image Generation\POC\outputs\shape_likert_simulated.csv"

SEED = 42
NOISE = 0.35          # overall Likert noise (lower = more deterministic)
CLIP_MIN, CLIP_MAX = 1, 5

rng = np.random.default_rng(SEED)


# -----------------------------
# Helpers
# -----------------------------
def robust_norm01(x, lo=5, hi=95):
    """Robust normalize to [0,1] using percentiles to avoid outliers."""
    x = np.asarray(x, dtype=float)
    if np.all(~np.isfinite(x)):
        return np.full_like(x, np.nan, dtype=float)
    a = np.nanpercentile(x, lo)
    b = np.nanpercentile(x, hi)
    if not np.isfinite(a) or not np.isfinite(b) or abs(b - a) < 1e-12:
        return np.clip(x, 0, 1)
    y = (x - a) / (b - a)
    return np.clip(y, 0.0, 1.0)

def to_likert_1to5(score01, noise=NOISE):
    """
    Map score in [0,1] -> Likert 1..5 with noise.
    """
    s = np.asarray(score01, dtype=float)
    s = np.clip(s, 0.0, 1.0)
    # map to 1..5
    y = 1.0 + 4.0 * s
    # add small gaussian noise
    y = y + rng.normal(0.0, noise, size=y.shape)
    y = np.clip(y, CLIP_MIN, CLIP_MAX)
    return np.rint(y).astype(int)

def weighted_mean(x, w):
    x = np.asarray(x, dtype=float)
    w = np.asarray(w, dtype=float)
    m = np.isfinite(x) & np.isfinite(w) & (w > 0)
    if m.sum() == 0:
        return np.nan
    return float(np.sum(x[m] * w[m]) / (np.sum(w[m]) + 1e-12))

def safe_log1p(x):
    x = np.asarray(x, dtype=float)
    x = np.where(np.isfinite(x) & (x > 0), x, np.nan)
    return np.log1p(x)


# -----------------------------
# Load CSVs
# -----------------------------
df_r = pd.read_csv(REGION_CSV)
df_i = pd.read_csv(IMAGE_CSV)

# -----------------------------
# Build extra image-level aggregates from region table
# -----------------------------
# weights: saliency if present else area_ratio
w = df_r["saliency"].astype(float)
w = np.where(np.isfinite(w) & (w > 0), w, np.nan)
df_r["_w"] = w
# fallback when all NaN weights per image will be handled groupwise
df_r["_w_area"] = np.where(np.isfinite(df_r["area_ratio"]), df_r["area_ratio"].astype(float), 0.0)

df_r["_axis_strength"] = safe_log1p(df_r["pca_eig_ratio"].astype(float))

def agg_group(g):
    ww = g["_w"].to_numpy(dtype=float)
    if not np.isfinite(ww).any():
        ww = g["_w_area"].to_numpy(dtype=float)
    ww = np.where(np.isfinite(ww) & (ww > 0), ww, 0.0)

    return pd.Series({
        "axis_strength_wm": weighted_mean(g["_axis_strength"].to_numpy(dtype=float), ww),
        "symmetry_wm": weighted_mean(g["symmetry_pca"].to_numpy(dtype=float), ww),
        "smoothness_wm": weighted_mean(g["smoothness"].to_numpy(dtype=float), ww),
        "centeredness_wm": weighted_mean(g["centeredness"].to_numpy(dtype=float), ww),

        # stability fields: handle missing columns gracefully
        "stability_mean_wm": weighted_mean(g.get("stability_mask_mean", g.get("stability_iou")).to_numpy(dtype=float), ww)
        if ("stability_mask_mean" in g.columns or "stability_iou" in g.columns) else np.nan,

        "stability_range_wm": weighted_mean(g["stability_mask_range"].to_numpy(dtype=float), ww)
        if "stability_mask_range" in g.columns else np.nan,

        "n_regions": int(len(g)),
        "saliency_top1_share": float(
            (np.nanmax(np.where(np.isfinite(g["saliency"].to_numpy(dtype=float)), g["saliency"].to_numpy(dtype=float), 0.0)))
            / (np.nansum(np.where(np.isfinite(g["saliency"].to_numpy(dtype=float)), g["saliency"].to_numpy(dtype=float), 0.0)) + 1e-12)
        ) if np.isfinite(g["saliency"].to_numpy(dtype=float)).any() else np.nan,
    })

df_extra = df_r.groupby(["split", "image_id"], as_index=False).apply(agg_group).reset_index(drop=True)

# Merge onto image indices
df = df_i.merge(df_extra, on=["split", "image_id"], how="left")

# -----------------------------
# Build perceptual scores (0..1) for each Likert question
# using realistic proxies from computed metrics.
# -----------------------------
# Normalized components
axis01    = robust_norm01(df["axis_strength_index"].astype(float) if "axis_strength_index" in df.columns else df["axis_strength_wm"].astype(float))
sym01     = robust_norm01(df["symmetry_index"].astype(float) if "symmetry_index" in df.columns else df["symmetry_wm"].astype(float))
stab01    = robust_norm01(df["stability_index"].astype(float) if "stability_index" in df.columns else df["stability_mean_wm"].astype(float))
hier01    = robust_norm01(df["hierarchy_concentration_top1"].astype(float) if "hierarchy_concentration_top1" in df.columns else df["saliency_top1_share"].astype(float))
branch01  = robust_norm01(df["branching_complexity"].astype(float)) if "branching_complexity" in df.columns else np.full(len(df), np.nan)
smooth01  = robust_norm01(df["smoothness_wm"].astype(float))
cent01    = robust_norm01(df["centeredness_wm"].astype(float))

# A simple mechanical-ness penalty (very high symmetry + high branching tends to feel constructed)
mech01 = np.nan_to_num(0.6 * sym01 + 0.4 * branch01, nan=0.5)

# Q1 Shape Emergence: hierarchy + stability + mild axis support
q1_score = np.nan_to_num(0.45 * hier01 + 0.40 * stab01 + 0.15 * axis01, nan=0.5)

# Q2 Boundary Coherence: stability + smoothness (closure/continuity proxy)
q2_score = np.nan_to_num(0.55 * stab01 + 0.45 * smooth01, nan=0.5)

# Q3 Felt Structural Axis: axis strength (dominant) + low branchiness
q3_score = np.nan_to_num(0.75 * axis01 + 0.25 * (1.0 - np.nan_to_num(branch01, nan=0.5)), nan=0.5)

# Q4 Symmetry Quality: symmetry, but penalize "mechanical" feeling
q4_score = np.nan_to_num(0.75 * sym01 + 0.25 * (1.0 - mech01), nan=0.5)

# Q5 Hierarchical Organization: hierarchy concentration (dominant)
q5_score = np.nan_to_num(hier01, nan=0.5)

# Q6 Partâ€“Whole Integration: hierarchy + axis + centeredness (coherence proxy)
q6_score = np.nan_to_num(0.35 * hier01 + 0.35 * axis01 + 0.30 * cent01, nan=0.5)

# Q7 Perceptual Stability: stability + centeredness
q7_score = np.nan_to_num(0.70 * stab01 + 0.30 * cent01, nan=0.5)

# Q8 Organic Coherence: smoothness + stability, penalize mechanical-ness
q8_score = np.nan_to_num(0.50 * smooth01 + 0.35 * stab01 + 0.15 * (1.0 - mech01), nan=0.5)

# -----------------------------
# Convert to Likert 1..5
# -----------------------------
df_out = df[["split", "image_id"]].copy()

df_out["Q1_shape_emergence"]       = to_likert_1to5(q1_score)
df_out["Q2_boundary_coherence"]    = to_likert_1to5(q2_score)
df_out["Q3_felt_structural_axis"]  = to_likert_1to5(q3_score)
df_out["Q4_symmetry_quality"]      = to_likert_1to5(q4_score)
df_out["Q5_hierarchical_org"]      = to_likert_1to5(q5_score)
df_out["Q6_part_whole_integration"]= to_likert_1to5(q6_score)
df_out["Q7_perceptual_stability"]  = to_likert_1to5(q7_score)
df_out["Q8_organic_coherence"]     = to_likert_1to5(q8_score)

# composites (useful for reporting)
df_out["structural_coherence_mean"] = df_out[
    ["Q1_shape_emergence","Q2_boundary_coherence","Q3_felt_structural_axis","Q5_hierarchical_org","Q7_perceptual_stability"]
].mean(axis=1)

df_out["expressive_integrity_mean"] = df_out[
    ["Q4_symmetry_quality","Q6_part_whole_integration","Q8_organic_coherence"]
].mean(axis=1)

# include computed indices (optional but helpful for analysis joins)
keep_cols = [c for c in ["axis_strength_index","symmetry_index","stability_index","hierarchy_concentration_top1","branching_complexity"] if c in df.columns]
for c in keep_cols:
    df_out[c] = df[c].values

# write
OUT_CSV.parent.mkdir(parents=True, exist_ok=True)
df_out.to_csv(OUT_CSV, index=False)

print("Wrote:", OUT_CSV)
print("Rows:", len(df_out))
print(df_out.head(10))

Wrote: outputs\shape_likert_simulated.csv
Rows: 128
       split                      image_id  Q1_shape_emergence  \
0  generated  generated\ComfyUI_00082_.png                   2   
1  generated  generated\ComfyUI_00083_.png                   3   
2  generated  generated\ComfyUI_00084_.png                   4   
3  generated  generated\ComfyUI_00085_.png                   2   
4  generated  generated\ComfyUI_00086_.png                   2   
5  generated  generated\ComfyUI_00087_.png                   4   
6  generated  generated\ComfyUI_00088_.png                   3   
7  generated  generated\ComfyUI_00089_.png                   2   
8  generated  generated\ComfyUI_00090_.png                   3   
9  generated  generated\ComfyUI_00091_.png                   3   

   Q2_boundary_coherence  Q3_felt_structural_axis  Q4_symmetry_quality  \
0                      3                        4                    3   
1                      3                        2                    2   

  df_extra = df_r.groupby(["split", "image_id"], as_index=False).apply(agg_group).reset_index(drop=True)
