# Eval Generation
---
Notebook takes in our eval dataset and generates our various desired eval datasets we can load in.

These will be saved as parquets in the evals folder.

In [1]:
import ast
import random
import pandas as pd

from board import get_piece_name_at_location, convert_board

In [2]:
# Import as a dataframe
file_path = './deepmind_data/train_20k.csv'
df = pd.read_csv(file_path)

# Need to convert from string to lists
df['Move'] = df['Move'].apply(ast.literal_eval)
df['Win Probability'] = df['Win Probability'].apply(ast.literal_eval)

board_notation = "visual"    # {fen, spaced_fen, visual_simple, visual}

### List all Legal Moves for piece / board state
---
Given a piece and a board state, list all legal moves — 50 examples. Will measure:
- % of legal moves predicted ($\frac{\text{num legal moves predicted}}{\text{num total legal moves}}$)
- % of moves predicted that were legal ($\frac{\text{num legal moves predicted}}{\text{total num moves predicted}}$)
- % Error Rate (not following parse directions)

In [3]:
min_moves = 2
desired_samples = 500
boards = []
prompts = []
answers = []

# Shuffle dataframe and make a deep copy of it
legalmoves_df = df.sample(frac=1, ignore_index=True).copy(deep=True)

# Iterate through and get samples
for index, row in legalmoves_df.iterrows():
    if len(prompts) >= desired_samples:
        break
    
    # Otherwise get possible moves from the board
    board = row['FEN']
    moveset = row['Move']

    # Get the counts of the various pieces in the moveset
    piece_counts = {}
    for move in moveset:
        piece_pos = move[:2]
        piece_counts[piece_pos] = piece_counts.get(piece_pos, 0) + 1
    valid_pieces = [k for k, v in piece_counts.items() if v >= min_moves]
    if not valid_pieces:
        continue  # Skip if no valid pieces w/ enough moves

    # Sample a piece from the valid pieces
    piece = random.choice(valid_pieces)
    piece_name = get_piece_name_at_location(board, piece)
    if piece_name is None:
        print(f"Piece not found at {piece} in FEN: {board}")
        continue
    prompt = f"""Please analyze the following board:
{convert_board(board, board_notation)}

Please give a list of all legal moves for the {piece_name} at {piece}.

Provide your answer within answer tags <answer> [list of moves] </answer> where you give a list of moves. 

The moves should be in the format of position notation starting with the current square (e.g., b2c3 which means you move from position b2 to position c3). 

You're welcome to think through it step by step but must return your response in the answer tags."""
    prompts.append(prompt)

    legal_moves = [move for move in moveset if move.startswith(piece)]
    answers.append(legal_moves)
    boards.append(board) # Store board so we can visualize

# Create a new dataframe with the prompts and answers
df_new = pd.DataFrame({
    'board': boards,
    'prompt': prompts,
    'answer': answers
})
print(f"Number of samples generated for the legal moves eval: {len(df_new)}")

# Save the new dataframe to a parquet file
df_new.to_parquet(f'./processed_rejsampling/legalmoves_{board_notation}_{len(df_new)}.parquet', index=False)

Number of samples generated for the legal moves eval: 500


### Given a list of submoves, predict the worst move
---


In [4]:
# Define threshold and desired number of moves
provided_moves = 5
desired_samples = 500
worst_move_thresh = 0.3

boards = []
prompts = []
answers = []

# Shuffle dataframe and make a deep copy of it
submoves_df = df.sample(frac=1, ignore_index=True).copy(deep=True)

# Iterate through and get samples
for index, row in submoves_df.iterrows():
    if len(prompts) >= desired_samples:
        break

    # Get board state, moveset, and win probabilities
    board = row['FEN']
    moveset = row['Move']
    win_probs = row['Win Probability']

    # Zip moveset and win probs together then get worst move
    move_prob_pairs = list(zip(moveset, win_probs))
    worst_move, worst_move_win_prob = min(move_prob_pairs, key=lambda x: x[1])

    # Filter out moves with win probability <= worst_move_thresh + worst_move_win_prob
    filtered_moves = [
        move for move, prob in move_prob_pairs
        if prob > worst_move_thresh + worst_move_win_prob
    ]

    # If not enough moves remain, skip
    if len(filtered_moves) < provided_moves-1:
        continue

    # Randomly sample the desired number of moves
    sampled_moves = random.sample(filtered_moves, provided_moves-1)
    sampled_moves.append(worst_move)
    random.shuffle(sampled_moves)

    # Create the prompt
    prompt = f"""Please analyze the following board:
{convert_board(board, board_notation)}

From the following moves: {sampled_moves}, choose the worst move and return it within answer tags such that your response is <answer> 'worst_move' </answer>. 

You may think through it step by step but must end with your response in the answer tags."""
    prompts.append(prompt)
    boards.append(board)

    ground_truth = str((worst_move, sampled_moves))
    answers.append(ground_truth)
    

# Create a new dataframe with the prompts and answers
df_new = pd.DataFrame({
    'board': boards,
    'prompt': prompts,
    'answer': answers
})
print(f"Number of samples generated for the worst move eval: {len(df_new)}")

# Save the new dataframe to a parquet file
df_new.to_parquet(f'./processed_rejsampling/worstmove_{board_notation}_{len(df_new)}.parquet', index=False)

Number of samples generated for the worst move eval: 500


### Given a list of submoves, predict the best move

In [5]:
# Define threshold and desired number of moves
provided_moves = 5
desired_samples = 500
best_move_thresh = 0.3

boards = []
prompts = []
answers = []

# Shuffle dataframe and make a deep copy of it
submoves_df = df.sample(frac=1, ignore_index=True).copy(deep=True)

# Iterate through and get samples
for index, row in submoves_df.iterrows():
    if len(prompts) >= desired_samples:
        break

    # Get board state, moveset, and win probabilities
    board = row['FEN']
    moveset = row['Move']
    win_probs = row['Win Probability']

    # Zip moveset and win_probs together
    move_prob_pairs = list(zip(moveset, win_probs))

    # Find the best move (highest win probability)
    best_move, best_move_win_prob = max(move_prob_pairs, key=lambda x: x[1])

    # Filter out moves with win probability <= best_move_win_prob - best_move_thresh
    filtered_moves = [
        move for move, prob in move_prob_pairs
        if prob < best_move_win_prob - best_move_thresh
    ]

    # If not enough moves remain, skip
    if len(filtered_moves) < provided_moves - 1:
        continue

    # Randomly sample the desired number of moves
    sampled_moves = random.sample(filtered_moves, provided_moves - 1)
    sampled_moves.append(best_move)
    random.shuffle(sampled_moves)

    # Create the prompt
    prompt = f"""Please analyze the following board:
{convert_board(board, board_notation)}

From the following moves: {sampled_moves}, choose the best move and return it within answer tags such that your response is <answer> 'best_move' </answer>. 

You may think through it step by step but must end with your response in the answer tags."""
    prompts.append(prompt)
    boards.append(board)
    
    ground_truth = str((best_move, sampled_moves))
    answers.append(ground_truth)

# Create a new dataframe with the prompts and answers
df_new = pd.DataFrame({
    'board': boards,
    'prompt': prompts,
    'answer': answers
})
print(f"Number of samples generated for the best move eval: {len(df_new)}")

# Save the new dataframe to a parquet file
df_new.to_parquet(f'./processed_rejsampling/bestmove_{board_notation}_{len(df_new)}.parquet', index=False)

Number of samples generated for the best move eval: 500


### Given a board (no legal moves provided), give a move

In [6]:
# Define desired num samples and initial setup
desired_samples = 500
min_possible_moves = 3

boards = []
prompts = []
answers = []

predmove_df = df.sample(frac=1, ignore_index=True).copy(deep=True)

# Loop through df and get eval samples
for index, row in submoves_df.iterrows():
    if len(prompts) >= desired_samples:
        break

    # Get board state, moveset, and win probabilities
    board = row['FEN']
    moveset = row['Move']
    win_probs = row['Win Probability']

    if len(moveset) < min_possible_moves:
        continue

    # Create a dict of moves and their win probs (zipped together)
    move_prob_pairs = str(dict(list(zip(moveset, win_probs))))

    prompt = f"""Please analyze the following board:
{convert_board(board, board_notation)}

Choose the best move and provide it within answer tags such that your response is <answer> 'my_move' </answer>.

Your move should be in the format of position notation starting with the current square (e.g., b2c3 which means you move from position b2 to position c3).

You may think through it step by step but must end with your response in the answer tags."""
    
    prompts.append(prompt)
    answers.append(move_prob_pairs)
    boards.append(board)

# Create a new dataframe with the prompts and answers
df_new = pd.DataFrame({
    'board': boards,
    'prompt': prompts,
    'answer': answers
})
print(f"Number of samples generated for the predict move eval: {len(df_new)}")

# Save the new dataframe to a parquet file
df_new.to_parquet(f'./processed_rejsampling/predictmove_{board_notation}_{len(df_new)}.parquet', index=False)

Number of samples generated for the predict move eval: 500
