In [1]:
##IMPORTS##
import chess
import numpy as np
import pandas as pd

df = pd.read_csv('chessData.csv')

def gen_legal_moves(board,frac=1):
    legal_moves = board.legal_moves
    legal_moves = [str(move) for move in legal_moves]
    if frac == 1:
        return legal_moves
    else:
        return np.random.choice(legal_moves, math.floor(frac*len(legal_moves)), replace=False)

In [2]:
df['FEN'][130]

'8/8/3k1p2/1pp4p/3nPK2/1P4PP/3N4/8 b - - 1 45'

In [3]:
gen_legal_moves(chess.Board(df['FEN'][130]))

['d6e7',
 'd6d7',
 'd6c7',
 'd6e6',
 'd6c6',
 'd4e6',
 'd4c6',
 'd4f5',
 'd4f3',
 'd4b3',
 'd4e2',
 'd4c2',
 'f6f5',
 'h5h4',
 'c5c4',
 'b5b4']

In [6]:
##SYNTETIC DATA GENERATION##
def generate_synthetic_data_dict(N,size,df,seed=1):
    if size == 1:
        return generate_synthetic_data_single(N,seed,df)
    data = []
    np.random.seed(seed)
    for i in range(N):
        dict = {}
        random_list = np.random.multinomial(size, np.ones(size)/size, size=1)[0]
        board = chess.Board(df['FEN'][i])
        legal_moves = gen_legal_moves(board)
        for j in range(size):
            if random_list[j] != 0:
                dict[np.random.choice(legal_moves)] = random_list[j]
        data.append(dict)

    return data

def generate_synthetic_data_single(N,seed,df):
    data = []
    np.random.seed(seed)
    for i in range(N):
        board = chess.Board(df['FEN'][i])
        legal_moves = gen_legal_moves(board)
        data.append(np.random.choice(legal_moves))
    return data

path=("stockfish-windows-x86-64\stockfish\stockfish-windows-x86-64.exe")


def evaluate_move(board, move, path):
    #Evaluates the move before move and after move return the difference
    board1 = board.copy()
    board1.push(chess.Move.from_uci(move))
    board2 = board.copy()
    #Return differnce of board1 and board2


def amount_legal_moves_taken(l , df):
    amount = 0
    for i in range(len(l)):
        board = chess.Board(df['FEN'][i])
        legal_moves = gen_legal_moves(board)
        if l[i] in legal_moves:
            amount += 1
    return amount

def amount_index_zero(l,df):
    amount = 0
    for i in range(len(l)):
        board = chess.Board(df['FEN'][i])
        legal_moves = gen_legal_moves(board)
        if l[i] == legal_moves[0]:
            amount += 1
    return amount

def amount_mean_taken_over_mu_moves():
    pass


amount = amount_index_zero(generate_synthetic_data_dict(10,1,df,1),df)
amount 

1

In [7]:
import chess
import chess.engine
import pandas as pd
import numpy as np

def evaluate_move(fen,move):
    # Path to Stockfish executable
    stockfish_path = "stockfish-windows-x86-64\stockfish\stockfish-windows-x86-64.exe"
    
    with chess.engine.SimpleEngine.popen_uci(stockfish_path) as engine:
        engine.configure({"Threads": 8})  # Increased number of threads for stability
        # Create a chess board from FEN
        board = chess.Board(fen)
        # Determine which player is to move
        is_white_to_move = board.turn == chess.WHITE
        # Evaluate the initial position
        initial_info = engine.analyse(board, chess.engine.Limit(time=0.1))  # Increased time limit for stability
        initial_score = initial_info["score"].white().score(mate_score=10000)  # Get the POV score for White
        
        # If it's Black to move, invert the score
        if not is_white_to_move:
            initial_score = -initial_score
        
        board = chess.Board(fen)
        board.push(chess.Move.from_uci(move))
        info = engine.analyse(board, chess.engine.Limit(time=0.1))  # Increased time limit for stability
        move_score_uci = info["score"].white().score(mate_score=10000)  # Get the POV score for White
        # If it's Black to move, invert the score
        if not is_white_to_move:
            move_score_uci = -move_score_uci

        relative_move_difference = (move_score_uci - initial_score)/initial_score
        return relative_move_difference

def moves_dataframe(fen):   
    legal_moves = gen_legal_moves(chess.Board(fen))
        
    # Evaluate all legal moves
    move_differences = []
    for move in legal_moves:
        score = evaluate_move(fen, move)
        move_differences.append((move, score))
        
    
    # Convert to DataFrame
    moves_df = pd.DataFrame(move_differences, columns=['move', 'score_difference'])   
    return moves_df
        

def percentiles(moves_df):
        score_75th_percentile = np.percentile(moves_df['score_difference'], 75)
        score_25th_percentile = np.percentile(moves_df['score_difference'], 25)
        score_50th_percentile = np.percentile(moves_df['score_difference'], 50)
        # Return results
        result = {
            "score_75th_percentile": score_75th_percentile,
            "score_50th_percentile": score_50th_percentile,
            "score_25th_percentile": score_25th_percentile,}
        return result

def in_percentile(score,percentile):
    if score >= percentile["score_75th_percentile"]:
        return np.array([0,0,0,1])
    elif percentile["score_50th_percentile"] <= score < percentile["score_75th_percentile"]:
        return np.array([0,0,1,0])
    elif percentile["score_25th_percentile"] <= score < percentile["score_50th_percentile"]:
        return np.array([0,1,0,0])
    else:
        return np.array([1,0,0,0])
    
def percentile_distribution(moves,df):
    result = np.zeros(4)
    for i in range(len(moves)):
        moves_df = moves_dataframe(df['FEN'][i],moves[i],df)
        move_difference = evaluate_move(df['FEN'][i],moves[i])
        result += in_percentile(move_difference, percentiles(moves_df))
    return result

def stockfish_score_function(N, df):
    stockfish_score = []
    for i in range(N):
        fen = df['FEN'][i]
        moves_df = moves_dataframe(fen)
        stockfish_score.append(max(moves_df['score_difference']))
    return stockfish_score   


In [66]:
import numpy as np
from scipy.stats import wasserstein_distance

def permutation_test(sample1, sample2, num_permutations=1000):
    combined = np.concatenate([sample1, sample2])
    observed_distance = wasserstein_distance(sample1, sample2)
    
    permuted_distances = []
    for _ in range(num_permutations):
        np.random.shuffle(combined)
        permuted_sample1 = combined[:len(sample1)]
        permuted_sample2 = combined[len(sample1):]
        permuted_distances.append(wasserstein_distance(permuted_sample1, permuted_sample2))
    
    p_value = np.mean(np.array(permuted_distances) >= observed_distance)
    return observed_distance, p_value



In [1]:
import numpy as np
from evaluation_pipeline_copy import evaluate_move
def mean_amount_moves(list):
    #Loop through the list of dicts and count the amount of keys for each dict
    amount_moves = []
    for i in range(len(list)):
        amount_moves.append(len(list[i].keys()))
    return round(np.mean(amount_moves),2)



In [2]:
from evaluation_pipeline_copy import get_ensemble_output_dict_varried_size,get_ensemble_output_varried_size

size_names = [5,10,15,20]

In [3]:
for size in size_names:
    print(f"{size}: E[#Moves]:{mean_amount_moves(get_ensemble_output_dict_varried_size(size))}")

5: E[#Moves]:1.44
10: E[#Moves]:1.79
15: E[#Moves]:2.12
20: E[#Moves]:2.55


In [2]:
import pandas as pd
#from evaluation_pipeline_copy import score
df = pd.read_csv('chessData.csv')

In [3]:
from scipy.stats import gaussian_kde

#val5 = score(get_ensemble_output_varried_size(5),df)
#val10 = score(get_ensemble_output_varried_size(10),df)
#val15 = score(get_ensemble_output_varried_size(15),df)
#val20 = score(get_ensemble_output_varried_size(20),df)



In [20]:
sample1 = kde_5_val
sample2 = [kde_1_val,kde_10_val,kde_15_val,kde_20_val]
for sample in sample2:
    observed_distance, p_value = permutation_test(sample1, sample)
    print(f"Observed Distance: {observed_distance}, p-value: {p_value}")

Observed Distance: 9.386628076745352e-06, p-value: 0.1243
Observed Distance: 2.2491724331825117e-06, p-value: 0.9384
Observed Distance: 1.8359343524229726e-06, p-value: 0.9799
Observed Distance: 2.3617852770865823e-06, p-value: 0.9204


In [6]:
from variables import *
import numpy as np
score_differences_2 = [score_difference_2_1350,score_difference_2_1800,score_difference_2_2250,score_difference_2_2700,score_difference_2_3150]
score_differences_3 = [score_difference_3_1350,score_difference_3_1800,score_difference_3_2250,score_difference_3_2700,score_difference_3_3150]
score_differences_ensemble = [score_difference_ensemble_1350,score_difference_ensemble_1800,score_difference_ensemble_2250,score_difference_ensemble_2700,score_difference_ensemble_3150]
score_differences_random = [score_difference_random_1350,score_difference_random_1800,score_difference_random_2250,score_difference_random_2700,score_difference_random_3150]


In [7]:
kdes_2 = [gaussian_kde(score_diff, bw_method=0.5) for score_diff in score_differences_2]
kdes_3 = [gaussian_kde(score_diff, bw_method=0.5) for score_diff in score_differences_3]
kdes_ensemble = [gaussian_kde(score_diff, bw_method=0.5) for score_diff in score_differences_ensemble]
kdes_random = [gaussian_kde(score_diff, bw_method=0.5) for score_diff in score_differences_random]

kde_vals_2 = [kde(np.linspace(min(score_diff), max(score_diff), 1000)) for kde,score_diff in zip(kdes_2,score_differences_2)]
kde_vals_3 = [kde(np.linspace(min(score_diff), max(score_diff), 1000)) for kde,score_diff in zip(kdes_3,score_differences_3)]
kde_vals_ensemble = [kde(np.linspace(min(score_diff), max(score_diff), 1000)) for kde,score_diff in zip(kdes_ensemble,score_differences_ensemble)]
kde_vals_random = [kde(np.linspace(min(score_diff), max(score_diff), 1000)) for kde,score_diff in zip(kdes_random,score_differences_random)]

In [8]:
scores = [1350,1800,2250,2700,3150]
kde_vals_list = [kde_vals_2,kde_vals_3,kde_vals_ensemble,kde_vals_random]
levels = {0:1350,1:1800,2:2250,3:2700,4:3150}
entities = {0:"2",1:"3",2:"Ensemble",3:"Random"}

In [38]:
#Try every combination of the different models and levels
for i in range(len(kde_vals_list)):
    for j in range(len(kde_vals_list)):
        if i != j:
            print(f"Model: {entities[i]} vs Model: {entities[j]}")
            sample1 = kde_vals_list[i]
            sample2 = kde_vals_list[j]
            for u in range(len(sample1)):
                for v in range(len(sample2)):
                    observed_distance, p_value = permutation_test(sample1[u], sample2[v])
                    print(f"{levels[u]} vs {levels[v]}: Observed Distance: {observed_distance}, p-value: {p_value}")
                print("\n")


Model: 2 vs Model: 3
1350 vs 1350: Observed Distance: 1.1836076716361472e-05, p-value: 0.0148
1350 vs 1800: Observed Distance: 1.1808697176502977e-05, p-value: 0.014
1350 vs 2250: Observed Distance: 1.593983621715049e-05, p-value: 0.0002
1350 vs 2700: Observed Distance: 1.1898888817922867e-05, p-value: 0.0125
1350 vs 3150: Observed Distance: 6.993035745236677e-06, p-value: 0.1657


1800 vs 1350: Observed Distance: 1.181840954445609e-05, p-value: 0.0164
1800 vs 1800: Observed Distance: 1.1790781414797212e-05, p-value: 0.0136
1800 vs 2250: Observed Distance: 1.590371339328555e-05, p-value: 0.0007
1800 vs 2700: Observed Distance: 1.188132790727091e-05, p-value: 0.0147
1800 vs 3150: Observed Distance: 6.983680762272049e-06, p-value: 0.1665


2250 vs 1350: Observed Distance: 1.1798696574928963e-05, p-value: 0.0112
2250 vs 1800: Observed Distance: 1.1771571754397485e-05, p-value: 0.0126
2250 vs 2250: Observed Distance: 1.5912861614382397e-05, p-value: 0.0002
2250 vs 2700: Observed Distance: 

In [62]:
# Bootstrap KL-divergence function
from scipy.special import rel_entr
from sklearn.utils import resample

def bootstrap_kl(sample1, sample2, num_bootstraps=1000000, alpha=0.05):


    observed_kl = np.sum(rel_entr(sample1, sample2))

    bootstrap_kl_divs = []
    for _ in range(num_bootstraps):
        boot_sample1 = resample(sample1, replace=True)
        boot_sample2 = resample(sample2, replace=True)
        

        kl_div = np.sum(rel_entr(boot_sample1, boot_sample2))
        bootstrap_kl_divs.append(kl_div)

    lower_bound = np.percentile(bootstrap_kl_divs, 100 * alpha / 2)
    upper_bound = np.percentile(bootstrap_kl_divs, 100 * (1 - alpha / 2))

    significant = observed_kl < lower_bound or observed_kl > upper_bound
    return round(observed_kl, 3), (round(lower_bound, 3), round(upper_bound, 3)), significant

In [None]:
#Try every combination of the different models and levels
for i in range(len(kde_vals_list)):
    for j in range(len(kde_vals_list)):
        if i != j:
            print(f"Model: {entities[i]} vs Model: {entities[j]}")
            sample1 = kde_vals_list[i]
            sample2 = kde_vals_list[j]
            for u in range(len(sample1)):
                for v in range(len(sample2)):
                    observed_kl, p_value  = bootstrap_kl(sample1[u], sample2[v])
                    print(f"{levels[u]} vs {levels[v]}: Observed Divergence: {observed_kl}, p-value: {p_value}")
                print("\n")

In [None]:
from scipy.stats import entropy
from sklearn.utils import resample
def permutation_test_kl(sample1, sample2, num_permutations=10000):
    # Ensure samples are probability distributions
    
    combined = np.concatenate([sample1, sample2])
    observed_kl = entropy(sample1, sample2)
    
    permuted_kls = []
    for _ in range(num_permutations):
        np.random.shuffle(combined)
        permuted_sample1 = combined[:len(sample1)]
        permuted_sample2 = combined[len(sample1):]
        
        permuted_kls.append(entropy(permuted_sample1, permuted_sample2))
    
    permuted_kls = np.array(permuted_kls)
   
    p_value = np.mean(permuted_kls >= observed_kl)
    return round(float(observed_kl), 3), p_value


#Try every combination of the different models and levels
for i in range(len(kde_vals_list)):
    for j in range(len(kde_vals_list)):
        if i != j:
            print(f"Model: {entities[i]} vs Model: {entities[j]}")
            sample1 = kde_vals_list[i]
            sample2 = kde_vals_list[j]
            for u in range(len(sample1)):
                for v in range(len(sample2)):
                    observed_kl, p_value  = permutation_test_kl(sample1[u], sample2[v])
                    print(f"{levels[u]} vs {levels[v]}: Observed Divergence: {observed_kl}, p-value: {p_value}")
                print("\n")



In [78]:
l = [score_differences_2 ,score_differences_3 ,
score_differences_ensemble ,
score_differences_random ]

n_best = []
for list in l:
    l_best = []
    for elo in list:
        best = 0
        for score in elo:
            if score == 0:
                best += 1
        l_best.append(best)
    n_best.append(l_best)
n_best

[[7, 5, 6, 7, 9], [9, 7, 10, 8, 10], [12, 13, 15, 11, 14], [9, 6, 7, 7, 8]]

In [83]:
from evaluation_pipeline_copy import *
sizes = [5,10,15,20]
moves = [get_ensemble_output_varried_size(size) for size in sizes]
scores = []
file_names = ["score_difference_5_2250","score_difference_10_2250","score_difference_15_2250","score_difference_20_2250"]

score_diffs = []
for i in range(len(sizes)):
    move_scores,moves_df_list,score_difference = move_difference(moves[i],df,file_names[i],2250)
    score_diffs.append(score_difference)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92

In [107]:

moves_1_2250 = get_ensemble_output_varried_size(1) 

move_scores_1_2250,moves_df_list_1_2250,score_difference_1_2250 = move_difference(moves_1_2250,df,"move_difference_1_2250",2250)


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118


In [95]:
kdes_sizes = [gaussian_kde(score_diff, bw_method=0.5) for score_diff in score_diffs]
kde_vals_size = [kde(np.linspace(min(score_diff), max(score_diff), 1000)) for kde,score_diff in zip(kdes_sizes,score_diffs)]
size_dict = {0:5,1:10,2:15,3:20}


for i in range(len(kde_vals_size)):
    print(f"{size_dict[i]} vs Ensemble: {sum(rel_entr(kde_vals_ensemble[3],kde_vals_size[i],))}")
    

5 vs Ensemble: 0.008642689162259867
10 vs Ensemble: 0.008115809372990756
15 vs Ensemble: 0.008105015144793365
20 vs Ensemble: 0.015937069130253775


In [110]:
kde_1_2250 = gaussian_kde(score_difference_1_2250, bw_method=0.5)
kde_vals_1_2250 = kde_1_2250(np.linspace(min(score_difference_1_2250), max(score_difference_1_2250), 1000))

for i in range(len(kde_vals_size)):
    observed_distance, p_value = permutation_test(kde_vals_size[-1], kde_vals_size[1])
    print(f"{size_dict[i]} vs 20: Observed Distance: {observed_distance}, p-value: {p_value}")

5 vs 20: Observed Distance: 8.724852260076264e-06, p-value: 0.133
10 vs 20: Observed Distance: 8.724852260076264e-06, p-value: 0.152
15 vs 20: Observed Distance: 8.724852260076264e-06, p-value: 0.161
20 vs 20: Observed Distance: 8.724852260076264e-06, p-value: 0.175


In [112]:
#Get the first 100 moves from score_difference_3_2250
score_diff_3_2250_100 = score_difference_3_2250[:100]
kde_score_diff_3_2250_100 = gaussian_kde(score_diff_3_2250_100, bw_method=0.5)
kde_vals_3_2250_100 = kde_score_diff_3_2250_100(np.linspace(min(score_diff_3_2250_100), max(score_diff_3_2250_100), 1000))

observed_distance, p_value = permutation_test(kde_vals_3_2250_100, kde_vals_size[0])
print(f"1 vs 5 Observed Distance: {observed_distance}, p-value: {p_value}")

observed_distance, p_value = permutation_test(kde_vals_3_2250_100, kde_vals_size[1])
print(f"1 vs 10 Observed Distance: {observed_distance}, p-value: {p_value}")

observed_distance, p_value = permutation_test(kde_vals_3_2250_100, kde_vals_size[2])
print(f"1 vs 15 Observed Distance: {observed_distance}, p-value: {p_value}")

observed_distance, p_value = permutation_test(kde_vals_3_2250_100, kde_vals_size[3])
print(f"1 vs 20 Observed Distance: {observed_distance}, p-value: {p_value}")


1 vs 5 Observed Distance: 3.5674610087531856e-05, p-value: 0.0
1 vs 10 Observed Distance: 3.5420589014549953e-05, p-value: 0.0
1 vs 15 Observed Distance: 3.605097880910819e-05, p-value: 0.0
1 vs 20 Observed Distance: 2.6727474894872545e-05, p-value: 0.0


In [103]:
#Try every combination of levels
for i in range(len(kde_vals_ensemble)):
    for j in range(len(kde_vals_ensemble)):
            observed_distance, p_value = permutation_test(kde_vals_ensemble[i], kde_vals_ensemble[j])
            print(f"{levels[i]} vs {levels[j]}: Observed Distance: {observed_distance}, p-value: {p_value}")


1350 vs 1350: Observed Distance: 0.0, p-value: 1.0
1350 vs 1800: Observed Distance: 3.155475286960651e-07, p-value: 1.0
1350 vs 2250: Observed Distance: 1.5063368809021796e-05, p-value: 0.043
1350 vs 2700: Observed Distance: 2.0626555504302411e-07, p-value: 1.0
1350 vs 3150: Observed Distance: 1.4875471580172042e-05, p-value: 0.035
1800 vs 1350: Observed Distance: 3.155475286960651e-07, p-value: 1.0
1800 vs 1800: Observed Distance: 0.0, p-value: 1.0
1800 vs 2250: Observed Distance: 1.4793990225513315e-05, p-value: 0.024
1800 vs 2700: Observed Distance: 3.3387524464137735e-07, p-value: 1.0
1800 vs 3150: Observed Distance: 1.4605139559113032e-05, p-value: 0.05
2250 vs 1350: Observed Distance: 1.5063368809021796e-05, p-value: 0.045
2250 vs 1800: Observed Distance: 1.4793990225513315e-05, p-value: 0.042
2250 vs 2250: Observed Distance: 0.0, p-value: 1.0
2250 vs 2700: Observed Distance: 1.5126742234827063e-05, p-value: 0.043
2250 vs 3150: Observed Distance: 2.011999850252221e-07, p-value: 1

In [114]:
for i in range(len(score_differences_ensemble)):
    print(np.std(score_differences_ensemble[i]), np.std(score_differences_random[i]), np.std(score_differences_ensemble[i])- np.std(score_differences_random[i]))

935.4679991938324 1562.0050978506636 -626.5370986568312
937.6390976629976 1562.2845400602582 -624.6454423972606
1310.5426944376632 1561.7516218594956 -251.2089274218324
935.8082889090306 1562.0205965038572 -626.2123075948266
1310.3157788463684 1561.784099282063 -251.46832043569452
