In [2]:
##IMPORTS##
import chess
import numpy as np
import pandas as pd

df = pd.read_csv('chessData.csv')

def gen_legal_moves(board,frac=1):
    legal_moves = board.legal_moves
    legal_moves = [str(move) for move in legal_moves]
    if frac == 1:
        return legal_moves
    else:
        return np.random.choice(legal_moves, math.floor(frac*len(legal_moves)), replace=False)

In [3]:
##SYNTETIC DATA GENERATION##
def generate_synthetic_data_dict(N,size,df,seed=1):
    if size == 1:
        return generate_synthetic_data_single(N,seed,df)
    data = []
    np.random.seed(seed)
    for i in range(N):
        dict = {}
        random_list = np.random.multinomial(size, np.ones(size)/size, size=1)[0]
        board = chess.Board(df['FEN'][i])
        legal_moves = gen_legal_moves(board)
        for j in range(size):
            if random_list[j] != 0:
                dict[np.random.choice(legal_moves)] = random_list[j]
        data.append(dict)

    return data

def generate_synthetic_data_single(N,seed,df):
    data = []
    np.random.seed(seed)
    for i in range(N):
        board = chess.Board(df['FEN'][i])
        legal_moves = gen_legal_moves(board)
        data.append(np.random.choice(legal_moves))
    return data

path=("stockfish-windows-x86-64\stockfish\stockfish-windows-x86-64.exe")


def evaluate_move(board, move, path):
    #Evaluates the move before move and after move return the difference
    board1 = board.copy()
    board1.push(chess.Move.from_uci(move))
    board2 = board.copy()
    #Return differnce of board1 and board2


def amount_legal_moves_taken(l , df):
    amount = 0
    for i in range(len(l)):
        board = chess.Board(df['FEN'][i])
        legal_moves = gen_legal_moves(board)
        if l[i] in legal_moves:
            amount += 1
    return amount

def amount_index_zero(l,df):
    amount = 0
    for i in range(len(l)):
        board = chess.Board(df['FEN'][i])
        legal_moves = gen_legal_moves(board)
        if l[i] == legal_moves[0]:
            amount += 1
    return amount

def amount_mean_taken_over_mu_moves():
    pass


amount = amount_index_zero(generate_synthetic_data_dict(10,1,df,1),df)
amount 

  path=("stockfish-windows-x86-64\stockfish\stockfish-windows-x86-64.exe")


1

In [4]:
import chess
import chess.engine
import pandas as pd
import numpy as np

def evaluate_move(fen,move):
    # Path to Stockfish executable
    stockfish_path = "stockfish-windows-x86-64\stockfish\stockfish-windows-x86-64.exe"
    
    with chess.engine.SimpleEngine.popen_uci(stockfish_path) as engine:
        engine.configure({"Threads": 8})  # Increased number of threads for stability
        # Create a chess board from FEN
        board = chess.Board(fen)
        # Determine which player is to move
        is_white_to_move = board.turn == chess.WHITE
        # Evaluate the initial position
        initial_info = engine.analyse(board, chess.engine.Limit(time=0.1))  # Increased time limit for stability
        initial_score = initial_info["score"].white().score(mate_score=10000)  # Get the POV score for White
        
        # If it's Black to move, invert the score
        if not is_white_to_move:
            initial_score = -initial_score
        
        board = chess.Board(fen)
        board.push(chess.Move.from_uci(move))
        info = engine.analyse(board, chess.engine.Limit(time=0.1))  # Increased time limit for stability
        move_score_uci = info["score"].white().score(mate_score=10000)  # Get the POV score for White
        # If it's Black to move, invert the score
        if not is_white_to_move:
            move_score_uci = -move_score_uci

        relative_move_difference = (move_score_uci - initial_score)/initial_score
        return relative_move_difference

def moves_dataframe(fen):   
    legal_moves = gen_legal_moves(chess.Board(fen))
        
    # Evaluate all legal moves
    move_differences = []
    for move in legal_moves:
        score = evaluate_move(fen, move)
        move_differences.append((move, score))
        
    
    # Convert to DataFrame
    moves_df = pd.DataFrame(move_differences, columns=['move', 'score_difference'])   
    return moves_df
        

def percentiles(moves_df):
        score_75th_percentile = np.percentile(moves_df['score_difference'], 75)
        score_25th_percentile = np.percentile(moves_df['score_difference'], 25)
        score_50th_percentile = np.percentile(moves_df['score_difference'], 50)
        # Return results
        result = {
            "score_75th_percentile": score_75th_percentile,
            "score_50th_percentile": score_50th_percentile,
            "score_25th_percentile": score_25th_percentile,}
        return result

def in_percentile(score,percentile):
    if score >= percentile["score_75th_percentile"]:
        return np.array([0,0,0,1])
    elif percentile["score_50th_percentile"] <= score < percentile["score_75th_percentile"]:
        return np.array([0,0,1,0])
    elif percentile["score_25th_percentile"] <= score < percentile["score_50th_percentile"]:
        return np.array([0,1,0,0])
    else:
        return np.array([1,0,0,0])
    
def percentile_distribution(moves,df):
    result = np.zeros(4)
    for i in range(len(moves)):
        moves_df = moves_dataframe(df['FEN'][i],moves[i],df)
        move_difference = evaluate_move(df['FEN'][i],moves[i])
        result += in_percentile(move_difference, percentiles(moves_df))
    return result

def stockfish_score_function(N, df):
    stockfish_score = []
    for i in range(N):
        fen = df['FEN'][i]
        moves_df = moves_dataframe(fen)
        stockfish_score.append(max(moves_df['score_difference']))
    return stockfish_score   


  stockfish_path = "stockfish-windows-x86-64\stockfish\stockfish-windows-x86-64.exe"


In [8]:
###WASERSTEIN DISTANCE###
from scipy.stats import wasserstein_distance


llm_moves = generate_synthetic_data_dict(10,1,df)
llm_moves = [evaluate_move(df['FEN'][i],llm_moves[i]) for i in range(len(llm_moves))]
stockfish_moves = stockfish_score_function(10,df)
wasserstein_distance(llm_moves,stockfish_moves)


  stockfish_path = "stockfish-windows-x86-64\stockfish\stockfish-windows-x86-64.exe"


KeyboardInterrupt: 

In [None]:
wasserstein_distance(stockfish_moves,llm_moves)

np.float64(2295.7000000000003)

In [9]:
import numpy as np
from scipy.stats import wasserstein_distance

def permutation_test(sample1, sample2, num_permutations=1000):
    combined = np.concatenate([sample1, sample2])
    observed_distance = wasserstein_distance(sample1, sample2)
    
    permuted_distances = []
    for _ in range(num_permutations):
        np.random.shuffle(combined)
        permuted_sample1 = combined[:len(sample1)]
        permuted_sample2 = combined[len(sample1):]
        permuted_distances.append(wasserstein_distance(permuted_sample1, permuted_sample2))
    
    p_value = np.mean(np.array(permuted_distances) >= observed_distance)
    return observed_distance, p_value

# Example usage
sample1 = llm_moves
sample2 = llm_moves
observed_distance, p_value = permutation_test(sample1, sample2)
print(f"Observed Distance: {observed_distance}, p-value: {p_value}")


Observed Distance: 0.0, p-value: 1.0


In [100]:
##DATA PREPROCESSING##


moves_played = []
#Loop through all dicts in moves, append the key with the highest value to moves_played
for dict in moves:
    max_key = max(dict, key=dict.get)
    moves_played.append(max_key)

for i in range(len(moves_played)):
    if moves_played[i] not in gen_legal_moves(chess.Board(df['FEN'][i])):
        print(i)


37


In [1]:
def get_ensemble_output_dict():
    moves = []
    with open("ensemble_output.txt", "r") as file:
        for line in file:
            #Remove the everything before the first : and the space after
            line = line[line.find(':')+1:]
            #Remove the newline character
            line = line[:-1]
            #Split the line at all the commas
            line = line.split(',')
            dict = {}
            for element in line:
                if element in dict:
                    dict[element] += 1
                else:
                    dict[element] = 1
            moves.append(dict)
    return moves

def get_ensemble_output():
    d_moves = get_ensemble_output_dict()
    moves = []
    #Loop through all dicts in moves, append the key with the highest value to moves_played
    for dict in d_moves:
        max_key = max(dict, key=dict.get)
        moves.append(max_key)
    return moves

get_ensemble_output()

['c4d5',
 'a7a8',
 'e3g5',
 'e6e8',
 'e7f8',
 'b5c6',
 'd7c5',
 'd5e6',
 'g5h7',
 'e4f5',
 'g4h6',
 'f1c4',
 'b3d5',
 'd4e6',
 'c6d7',
 'd8e7',
 'd1d6',
 'd3e2',
 'd5e6',
 'e4d5',
 'g5h7',
 'e3d4',
 'e2f4',
 'd2e4',
 'f8e7',
 'b2c3',
 'e7f8',
 'c5d7',
 'f8e8',
 'e2e8',
 'd4d8',
 'g8h8',
 'f5e4',
 'e7b7',
 'e1d1',
 'g4g5',
 'g6g5',
 'e6e8',
 'e6e5',
 'd6c8',
 'd4a4',
 'b6a8',
 'g6f4',
 'f6d7',
 'c1e3',
 'h8h7',
 'd3d4',
 'g6g5',
 'd8e7',
 'd6d5',
 'b4b5',
 'e6e5',
 'a6c8',
 'g2e4',
 'a3a4',
 'c7d7',
 'b7g2',
 'f6g4',
 'c3f6',
 'h3f4',
 'g6f7',
 'h6h5',
 'e6g6',
 'h7h5',
 'e6c8',
 'b5b3',
 'b2b4',
 'e2e1',
 'b6b2',
 'c1f4',
 'c8b7',
 'e2c3',
 'a7d4',
 'c8d7',
 'd3f1',
 'a1f1',
 'f3e1',
 'e1d2',
 'e3h6',
 'e3c1',
 'f5g4',
 'a8a3',
 'd3d7',
 'c6c2',
 'g1h1',
 'a6a5',
 'e6e5',
 'c3c4',
 'b1a1',
 'd2e2',
 'd1c1',
 'f6d5',
 'h1h2',
 'e8d7',
 'd1d2',
 'd5c7',
 'd8d7',
 'e6c4',
 'e8c8',
 'f5h3',
 'f6d7',
 'c8d7',
 'd1e2',
 'c6c5',
 'e2d4',
 'c8d7',
 'd3e3',
 'd7c6',
 'f1e1',
 'd3g3',
 'a8d8',
 

In [None]:
##DATA ANALYSIS##