In [None]:
import numpy as np
import pandas as pd
import random
from tqdm import tqdm

# --------------------------------------------------------
# 1) Load scored positions
# --------------------------------------------------------
df = pd.read_parquet("positions.parquet")

# --------------------------------------------------------
# 2) Weights for the composite score
#    Lower total_score = better point
# --------------------------------------------------------
w_path, w_wall, w_goal, w_deg, w_gate = 0.3, 0.2, 0.2, 0.8, 0.1

# --------------------------------------------------------
# 3) Compute total score (lower is better)
# --------------------------------------------------------
df["total_score"] = (
      w_path * df.path_distance          # penalty: long shortest path
    + w_goal * df.distance_to_goal       # penalty: far from goal
    - w_deg  * df.degree                 # bonus: along the main corridor / progress
    - w_wall * df.distance_from_wall     # bonus: far from walls (room to maneuver)
    + w_gate * df.distance_to_gate       # penalty: far from the vertical gate
)

# Quick sanity prints
print(df.nsmallest(3, "total_score")[["x", "y", "degree", "total_score"]])
print(df.nlargest (3, "total_score")[["x", "y", "degree", "total_score"]])


def sample_pairs(
    df: pd.DataFrame,
    num_pairs: int       = 200_000,
    hard_ratio: float    = 0.7,
    thresh: float        = 0.05,
    min_delta_deg: float = 0.001,      # minimum absolute gap in 'degree'
    max_trials: int      = 50_000_000  # guard against infinite loops
):
    """
    Draw `num_pairs` preference pairs (x_better, y_better, x_worse, y_worse, 1)
    while enforcing:
      • hard_ratio   → % of 'hard' pairs (similar scores)
      • thresh       → boundary between easy/hard by total_score difference
      • min_delta_deg→ minimum absolute difference on 'degree' column
    Returns a list of tuples compatible with the training format.
    """
    idx     = df.index.to_numpy()
    scores  = df["total_score"].to_numpy()
    degrees = df["degree"].to_numpy()  # cache for speed
    pairs   = set()
    n_hard  = int(num_pairs * hard_ratio)

    pbar = tqdm(total=num_pairs, desc="sampling pairs")
    trials = 0
    while len(pairs) < num_pairs and trials < max_trials:
        trials += 1

        i, j = np.random.choice(idx, 2, replace=False)
        si, sj = scores[i], scores[j]
        if si == sj:
            continue

        # --- 1) easy vs hard constraint by score gap ---
        better, worse = (i, j) if si < sj else (j, i)  # lower score = better
        delta_score   = abs(si - sj)
        want_hard     = len(pairs) < n_hard
        if (want_hard and delta_score >= thresh) or (not want_hard and delta_score < thresh):
            continue

        # --- 2) enforce minimum 'degree' separation ---
        if abs(degrees[better] - degrees[worse]) < min_delta_deg:
            continue

        # --- 3) add the pair ---
        pair = (
            df.at[better, "x"], df.at[better, "y"],
            df.at[worse,  "x"], df.at[worse,  "y"],
            1,
        )
        if pair not in pairs:
            pairs.add(pair)
            pbar.update(1)

    pbar.close()

    if len(pairs) < num_pairs:
        print(f"⚠️  only {len(pairs)} pairs after {trials} attempts "
              f"(min_delta_deg may be too high).")

    return list(pairs)


def sample_same_row_pairs(df: pd.DataFrame, n_pairs: int, min_dx: float = 0.25):
    """
    Draw pairs from approximately the same y-band (row) but sufficiently far apart in x.
    Useful for 'apples-to-apples' comparisons along a corridor row.
    """
    pairs = []
    grouped = df.groupby(pd.cut(df.y, bins=np.arange(0, 1.01, 0.1)))
    while len(pairs) < n_pairs:
        _, g = random.choice(list(grouped))
        if len(g) < 2:
            continue
        a, b = g.sample(2).itertuples()
        if abs(a.x - b.x) < min_dx:
            continue
        better, worse = (a, b) if a.total_score < b.total_score else (b, a)
        pairs.append((better.x, better.y, worse.x, worse.y, 1))
    return pairs


# --------------------------------------------------------
# 4) Build the dataset: 40% same-row + 60% mixed (with 70% hard)
# --------------------------------------------------------
n_total   = 200_000
pairs_row = sample_same_row_pairs(df, int(n_total * 0.4))
pairs_mix = sample_pairs(df, n_total - len(pairs_row), hard_ratio=0.7)
pairs     = pairs_row + pairs_mix

# --------------------------------------------------------
# 5) Save
# --------------------------------------------------------
pd.DataFrame(
    pairs,
    columns=["x_better", "y_better", "x_worse", "y_worse", "preference"]
).to_parquet("preferences.parquet", index=False)

print("✓ preferences.parquet saved with", len(pairs), "pairs")


             x         y    degree  total_score
2120  0.930761  0.852933  0.972222    -0.928902
1848  0.942394  0.873502  0.972222    -0.913331
1086  0.950295  0.853441  0.972222    -0.910157
             x         y  degree  total_score
1612  0.003462  0.063943     0.0     0.588210
3106  0.012176  0.095758     0.0     0.580507
29    0.070106  0.004755     0.0     0.576571


  grouped = df.groupby(pd.cut(df.y, bins=np.arange(0,1.01,0.1)))
sampling pairs: 100%|██████████| 120000/120000 [01:26<00:00, 1387.08it/s] 


✓ preferences.parquet salvato con 200000 coppie
