In [2]:
import numpy as np
import pandas as pd
import random
from tqdm import tqdm 

# Load the dataset and scores
df_positions = pd.read_parquet("positions.parquet")

points = len(df_positions)

# Score weights
weight_path_distance = 7  # Highest weight for shortest path distance
weight_dead_end = 0  # Penalize dead-ends
weight_distance_to_goal = 3  # Distance to goal
weight_distance_from_wall = 5  # Distance from walls

# Function to evaluate the total score
def compute_total_score(row):
    return (
        -weight_path_distance * row["path_distance"] +
        -weight_dead_end * row["is_dead_end"] +
        -weight_distance_to_goal * row["distance_to_goal"] +
        weight_distance_from_wall * row["distance_from_wall"]
    )

# Adding column "total_score"
df_positions["total_score"] = df_positions.apply(compute_total_score, axis=1)

def generate_preference_pairs(df, num_pairs=50000):
    """
    Generate preference pairs from a DataFrame. If num_pairs equals the total
    number of possible unique pairs (N*(N-1)/2 for N = len(df)), then generate
    ALL combinations deterministically. Otherwise, sample randomly until
    num_pairs unique pairs have been generated.
    """
    points = len(df)
    max_pairs = int(points * (points - 1) / 2)
    
    # Convert the DataFrame to a list of dicts for faster repeated access
    records = df.to_dict(orient='records')

    # If we want to generate ALL possible pairs
    if num_pairs == max_pairs:
        pairs = []
        with tqdm(total=max_pairs, desc="Generating all preference pairs") as pbar:
            for i in range(points):
                for j in range(i+1, points):
                    a = records[i]
                    b = records[j]
                    
                    # Determine which record has a higher total_score
                    if a["total_score"] > b["total_score"]:
                        better = a
                        worse = b
                    else:
                        better = b
                        worse = a
                    
                    # Create a preference pair (better_x, better_y, worse_x, worse_y, label)
                    pair = (better["x"], better["y"], worse["x"], worse["y"], 1)
                    pairs.append(pair)
                    pbar.update(1)
                    
        return pairs
    else:
        # Otherwise, do the random sampling approach
        pairs = set()  # Use a set to avoid duplicate pairs
        with tqdm(total=num_pairs, desc="Generating preference pairs") as pbar:
            while len(pairs) < num_pairs:
                # Randomly select two different records from the DataFrame
                a, b = df.sample(n=2).to_dict(orient='records')
                
                # Determine which record has a higher total_score
                if a["total_score"] > b["total_score"]:
                    better = a
                    worse = b
                else:
                    better = b
                    worse = a
                
                # Create a preference pair
                pair = (better["x"], better["y"], worse["x"], worse["y"], 1)
                
                # Add the pair if it's not already in the set
                if pair not in pairs:
                    pairs.add(pair)
                    pbar.update(1)
        
        return list(pairs)


num_pairs = int(points * (points - 1)/2)
# Generate preference pairs
preference_pairs = generate_preference_pairs(df_positions, num_pairs=num_pairs)

# Save the dataset
df_preferences = pd.DataFrame(preference_pairs, columns=["x_better", "y_better", "x_worse", "y_worse", "preference"])
df_preferences.to_parquet("preferences.parquet", index=False)

print("Generation of preferences dataset completed")


Generating all preference pairs: 100%|██████████| 1999000/1999000 [00:00<00:00, 4236433.09it/s]


Generation of preferences dataset completed
