In [None]:
import numpy as np
import pandas as pd
import random

# Load the dataset and scores
df_positions = pd.read_csv("positions.csv")

# Score weights
weight_dead_end = 5  # Strong penalization of dead-zones 
weight_distance_to_goal = 3  # Distance to goal 
weight_distance_from_wall = 1  # Distance to walls 

# Function to evaluate the total score 
def compute_total_score(row):
    return (
        -weight_dead_end * row["is_dead_end"] +
        -weight_distance_to_goal * row["distance_to_goal"] +
        weight_distance_from_wall * row["distance_from_wall"]
    )

# Adding column "total_score" 
df_positions["total_score"] = df_positions.apply(compute_total_score, axis=1)

# Generate preference pairs
def generate_preference_pairs(df, num_pairs=50000):
    pairs = []
    for _ in range(num_pairs):
        a, b = df.sample(n=2).to_dict(orient='records')
        better = a if a["total_score"] > b["total_score"] else b
        worse = b if a["total_score"] > b["total_score"] else a
        pairs.append((better["x"], better["y"], worse["x"], worse["y"], 1))  # 1 indicates the preference
    return pairs

# Generate preference pairs
preference_pairs = generate_preference_pairs(df_positions, num_pairs=100000)

# Save the dataset
df_preferences = pd.DataFrame(preference_pairs, columns=["x_better", "y_better", "x_worse", "y_worse", "preference"])
df_preferences.to_csv("preferences.csv", index=False)

print("Generation of preferences dataset compleated")


Generazione del dataset di preferenze completata!
