# Eval Generation
---
Notebook takes in our eval dataset and generates our various desired eval datasets we can load in.

These will be saved as parquets in the evals folder.

In [1]:
import ast
import random
import pandas as pd

import chess

In [2]:
# Import as a dataframe
file_path = './raw/evals_1k.csv'
df = pd.read_csv(file_path)

# Need to convert from string to lists
df['Move'] = df['Move'].apply(ast.literal_eval)
df['Win Probability'] = df['Win Probability'].apply(ast.literal_eval)

### List all Legal Moves for piece / board state
---
Given a piece and a board state, list all legal moves — 50 examples. Will measure:
- % of legal moves predicted ($\frac{\text{num legal moves predicted}}{\text{num total legal moves}}$)
- % of moves predicted that were legal ($\frac{\text{num legal moves predicted}}{\text{total num moves predicted}}$)
- % Error Rate (not following parse directions)

In [3]:
def get_piece_name_at_location(fen, location):
    board = chess.Board(fen)
    square = chess.parse_square(location)
    piece = board.piece_at(square)
    
    if piece is None:
        return None

    color = 'white' if piece.color == chess.WHITE else 'black'
    name = piece.piece_type  # this gives an int (1-6)
    name_str = chess.piece_name(name)  # maps int to string name like 'bishop'

    return f"{color} {name_str}"

In [None]:
min_moves = 2
desired_samples = 50
boards = []
prompts = []
answers = []

# Shuffle dataframe and make a deep copy of it
legalmoves_df = df.sample(frac=1, ignore_index=True).copy(deep=True)

# Iterate through and get samples
for index, row in legalmoves_df.iterrows():
    if len(prompts) >= desired_samples:
        break
    
    # Otherwise get possible moves from the board
    board = row['FEN']
    moveset = row['Move']

    # Get the counts of the various pieces in the moveset
    piece_counts = {}
    for move in moveset:
        piece_pos = move[:2]
        piece_counts[piece_pos] = piece_counts.get(piece_pos, 0) + 1
    valid_pieces = [k for k, v in piece_counts.items() if v >= min_moves]
    if not valid_pieces:
        continue  # Skip if no valid pieces w/ enough moves

    # Sample a piece from the valid pieces
    piece = random.choice(valid_pieces)
    piece_name = get_piece_name_at_location(board, piece)
    if piece_name is None:
        print(f"Piece not found at {piece} in FEN: {board}")
        continue
    prompt = f"Given the board state in FEN notation (starting with rank 8 decreasing to rank 1):\n{board}\n\nPlease give a list of all legal moves for the {piece_name} at {piece}.\n\nProvide your answer within answer tags <answer> [list of moves] </answer> where you give a list of moves. The moves should be in the format of position notation starting with the current square (e.g., b2c3 which means you move from position b2 to position c3). You're welcome to think through it step by step but must return your response in the answer tags."
    prompts.append(prompt)

    legal_moves = [move for move in moveset if move.startswith(piece)]
    answers.append(legal_moves)
    boards.append(board) # Store board so we can visualize

# Create a new dataframe with the prompts and answers
df_new = pd.DataFrame({
    'board': boards,
    'prompt': prompts,
    'answer': answers
})
print(f"Number of samples generated for the legal moves eval: {len(df_new)}")

# Save the new dataframe to a parquet file
df_new.to_parquet(f'./evals/legalmoves_{len(df_new)}.parquet', index=False)

Number of samples generated for the legal moves eval: 50


### Given a list of submoves, predict the worst move
---
