In [3]:
import numpy as np, pandas as pd, random
from tqdm import tqdm

# --------------------------------------------------------
# 1) carica
# --------------------------------------------------------
df = pd.read_parquet("positions.parquet")

# --------------------------------------------------------
# 3) pesi
# --------------------------------------------------------
w_path, w_wall, w_goal, w_deg = 0.6, 0.15, 0.15, 0.15

# --------------------------------------------------------
# 4) score (minore è migliore)
# --------------------------------------------------------
df["total_score"] = (
      w_path * df.path_distance        #  + penalità: cammino lungo
    + w_goal * df.distance_to_goal        #  + penalità: lontano dal goal
    - w_deg  * df.degree         #  + penalità: vicolo cieco
    - w_wall * df.distance_from_wall        #  – bonus: punti “larghi”, lontani dai muri
)

# quick sanity
print(df.nsmallest(3,"total_score")[["x","y","degree","total_score"]])
print(df.nlargest (3,"total_score")[["x","y","degree","total_score"]])

# --------------------------------------------------------
# 5) genera 200k coppie (50 % facili / 50 % difficili)
# --------------------------------------------------------
def sample_pairs(df, num_pairs=200_000, hard_ratio=0.7, thresh=0.05):
    idx     = df.index.to_numpy()
    scores  = df["total_score"].to_numpy()
    pairs   = set()
    n_hard  = int(num_pairs * hard_ratio)
    pbar    = tqdm(total=num_pairs, desc="sampling pairs")
    while len(pairs) < num_pairs:
        i, j = np.random.choice(idx, 2, replace=False)
        si, sj = scores[i], scores[j]
        if si == sj:          # pari → ignora
            continue
        better, worse = (i, j) if si < sj else (j, i)  # score minore è migliore
        delta = abs(si - sj)
        want_hard = len(pairs) < n_hard
        if (want_hard and delta < thresh) or (not want_hard and delta >= thresh):
            pair = (df.at[better,"x"], df.at[better,"y"],
                    df.at[worse,"x"],  df.at[worse,"y"], 1)
            if pair not in pairs:
                pairs.add(pair); pbar.update(1)
    pbar.close()
    return list(pairs)

pairs = sample_pairs(df, num_pairs=300_000, hard_ratio=0.3)

# --------------------------------------------------------
# 6) salva
# --------------------------------------------------------
pd.DataFrame(pairs,
    columns=["x_better","y_better","x_worse","y_worse","preference"]
).to_parquet("preferences.parquet", index=False)

print("✓ preferences.parquet salvato con", len(pairs), "coppie")


             x         y  degree  total_score
6383  0.945350  0.863834     1.0    -0.260993
377   0.919748  0.850561     1.0    -0.255454
1843  0.934974  0.860986     1.0    -0.254876
             x         y    degree  total_score
6413  0.016242  0.000223  0.333333     0.699365
4117  0.001055  0.053445  0.333333     0.694302
6183  0.021039  0.004672  0.333333     0.687447


sampling pairs: 100%|██████████| 300000/300000 [01:01<00:00, 4863.87it/s] 


✓ preferences.parquet salvato con 300000 coppie
