In [4]:
import pandas as pd
import numpy as np
from pathlib import Path

# -----------------------------
# Load computed features CSV
# -----------------------------
df = pd.read_csv(r"F:\New Dissertation - Image Generation\POC\outputs\balance_features_final.csv")

rng = np.random.default_rng(seed=42)


# -----------------------------
# Helpers
# -----------------------------
def to_likert(x, noise=0.6, seed_rng=None):
    """
    Map a numeric signal to Likert 1–7 using z-score + Gaussian noise.
    """
    if seed_rng is None:
        seed_rng = rng

    x = np.asarray(x, dtype=float)
    mu = np.nanmean(x)
    sd = np.nanstd(x) + 1e-8
    z = (x - mu) / sd
    z = z + seed_rng.normal(0, noise, size=len(z))
    lik = np.clip(np.round(4 + z), 1, 7)
    return lik.astype(int)


def pull_direction(dx, dy, thresh=0.03):
    """
    Infer pull direction from dx/dy (normalized CoM offsets).
    If both |dx| and |dy| are below thresh -> 'none'
    Else pick cardinal/diagonal direction.
    """
    if abs(dx) < thresh and abs(dy) < thresh:
        return "none"
    horiz = "right" if dx > 0 else "left"
    vert = "down" if dy > 0 else "up"
    if abs(dx) >= thresh and abs(dy) >= thresh:
        return f"{vert}_{horiz}"
    return horiz if abs(dx) > abs(dy) else vert


def magnitude_to_likert(mag, noise=0.08, seed_rng=None):
    """
    More stable mapping for pull strength than z-scoring:
    min-max -> [1,7] + small noise.
    """
    if seed_rng is None:
        seed_rng = rng

    mag = np.asarray(mag, dtype=float)
    m0 = np.nanmin(mag)
    m1 = np.nanmax(mag)
    mag_norm = (mag - m0) / (m1 - m0 + 1e-8)
    mag_norm = np.clip(mag_norm + seed_rng.normal(0, noise, size=len(mag_norm)), 0, 1)
    return np.clip(np.round(1 + 6 * mag_norm), 1, 7).astype(int)


# -----------------------------
# Simulated qualitative items (feature-informed)
# -----------------------------

# Q1 — Weight stability (lower d = more stable)
df["q1_weight_stability_1to7"] = to_likert(-df["d"], noise=0.6)

# Q2 — Symmetry equilibrium
df["q2_symmetry_equilibrium_1to7"] = to_likert(df[["sym_lr", "sym_tb"]].mean(axis=1), noise=0.6)

# Q3 — Negative space support
neg_signal = df["neg_space_pct"].fillna(df["neg_space_pct"].mean())
df["q3_negative_space_support_1to7"] = to_likert(neg_signal, noise=0.6)

# Q4 — Compositional placement (rule of thirds)
rot_cols = [c for c in df.columns if ("rot_" in c and "mean" in c)]
rot_signal = df[rot_cols].mean(axis=1) if len(rot_cols) else np.zeros(len(df))
df["q4_compositional_placement_1to7"] = to_likert(rot_signal, noise=0.6)

# Q5 — Directional dynamics stability (lower dynamic/static = more stable)
df["q5_directional_dynamics_1to7"] = to_likert(-df["lines_dynamic_to_static"], noise=0.6)

# Q6 — Overall balance (noisier)
df["q6_overall_balance_1to7"] = to_likert(df["BalanceIndex"], noise=0.9)

# -----------------------------
# Pull direction + strength (cohesive semantics)
# -----------------------------
df["pull_direction"] = [
    pull_direction(dx, dy, thresh=0.03)
    for dx, dy in zip(df["dx"].astype(float), df["dy"].astype(float))
]

mag = np.sqrt(df["dx"].astype(float) ** 2 + df["dy"].astype(float) ** 2)
df["pull_strength_1to7"] = magnitude_to_likert(mag, noise=0.08)

# Enforce: if direction is none -> strength must be 1
none_mask = df["pull_direction"].eq("none")
df.loc[none_mask, "pull_strength_1to7"] = 1

# -----------------------------
# Metadata
# -----------------------------
df["rater_id"] = "simulated_v2_cohesive"
df["notes"] = ""

# -----------------------------
# Save qualitative CSV
# -----------------------------
qual_cols = [
    "image", "set", "path",
    "q1_weight_stability_1to7",
    "q2_symmetry_equilibrium_1to7",
    "q3_negative_space_support_1to7",
    "q4_compositional_placement_1to7",
    "q5_directional_dynamics_1to7",
    "q6_overall_balance_1to7",
    "pull_direction",
    "pull_strength_1to7",
    "notes",
    "rater_id",
]

qual_df = df[qual_cols].copy()

out_path = r"F:\New Dissertation - Image Generation\POC\outputs\balance_qualitative_simulated.csv"
qual_df.to_csv(out_path, index=False)

print("Wrote:", out_path)
print("Rows:", len(qual_df))

# -----------------------------
# Cohesion check
# -----------------------------
bad = qual_df[(qual_df["pull_direction"] == "none") & (qual_df["pull_strength_1to7"] > 1)]
print("Inconsistent rows (should be 0):", len(bad))
if len(bad):
    print(bad[["image", "pull_direction", "pull_strength_1to7"]].head(20))

qual_df.head(20)


Wrote: F:\New Dissertation - Image Generation\POC\outputs\balance_qualitative_simulated.csv
Rows: 130
Inconsistent rows (should be 0): 0


Unnamed: 0,image,set,path,q1_weight_stability_1to7,q2_symmetry_equilibrium_1to7,q3_negative_space_support_1to7,q4_compositional_placement_1to7,q5_directional_dynamics_1to7,q6_overall_balance_1to7,pull_direction,pull_strength_1to7,notes,rater_id
0,10pm_feeding_around_the_clock.webp,real,data\real\images\10pm_feeding_around_the_clock...,3,1,4,4,4,5,down,3,,simulated_v2_cohesive
1,11pm.webp,real,data\real\images\11pm.webp,3,2,5,4,5,4,right,3,,simulated_v2_cohesive
2,12_4_7_10.webp,real,data\real\images\12_4_7_10.webp,5,5,4,4,5,6,none,1,,simulated_v2_cohesive
3,around_the_clock.webp,real,data\real\images\around_the_clock.webp,5,4,7,4,5,4,none,1,,simulated_v2_cohesive
4,around_the_clock_alizarin.webp,real,data\real\images\around_the_clock_alizarin.webp,2,3,5,5,6,5,up,3,,simulated_v2_cohesive
5,Belly_breast.webp,real,data\real\images\Belly_breast.webp,3,2,4,3,1,5,right,3,,simulated_v2_cohesive
6,Belly_breast_face_brain_placenta.webp,real,data\real\images\Belly_breast_face_brain_place...,4,4,4,3,3,2,up,3,,simulated_v2_cohesive
7,Bellyscape_and_plumb_bob.webp,real,data\real\images\Bellyscape_and_plumb_bob.webp,3,5,4,4,4,4,up,3,,simulated_v2_cohesive
8,Birth_perspective_from_above_and_below.webp,real,data\real\images\Birth_perspective_from_above_...,5,6,4,5,2,7,none,1,,simulated_v2_cohesive
9,Blue_0_with_red_halo.webp,real,data\real\images\Blue_0_with_red_halo.webp,4,7,4,4,6,4,none,1,,simulated_v2_cohesive
