# Import required libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Load data

In [None]:
data_filename = 'tactics/data/stats/metrics_valid_maia1600.csv'
df = pd.read_csv(data_filename)

# Do analysis

In [None]:
df.describe()

## Group by tactic

In [None]:
df2 = df.groupby('tactic_text')
df2.describe()
agg = df2.aggregate(np.sum)
agg

In [None]:
agg['avg_divergence'] = agg['divergence'] / agg['matches']
plt.hist(agg['avg_divergence'], bins=10)agg.loc[["f(A,B,C):-legal_move(B,C,A)"]]['avg_divergence'].values
plt.axvline(, linestyle='dashed')
plt.title('Histogram of Divergence $(T_{1600},$ Stockfish 14, $P_{test})$')
plt.xlabel('Divergence (Cp)')
plt.ylabel('Frequency')

In [None]:
df.loc[(df['text'] == "f(A,B,C):-legal_move(B,C,A)")]['avg_divergence'].values

In [None]:
plt.hist(df['avg'], bins=10)
plt.axvline(df.loc[(df['text'] == "f(A,B,C):-legal_move(B,C,A)")]['avg_divergence'].values, linestyle='dashed')

In [None]:
agg['coverage'] = agg['matches'] / df.groupby(['position', 'move']).ngroups
plt.hist(agg['coverage'], bins=10)
plt.axvline(agg.loc[["f(A,B,C):-legal_move(B,C,A)"]]['coverage'].values, linestyle='dashed')
plt.title('Histogram of Coverage $(T_{1600}$, $P_{test})$')
plt.xlabel('Coverage')
plt.ylabel('Frequency')

In [None]:
agg['accuracy'] = agg['correct_move'] / agg['matches']
plt.hist(agg['accuracy'], bins=10)
plt.axvline(agg.loc[["f(A,B,C):-legal_move(B,C,A)"]]['accuracy'].values, linestyle='dashed')
plt.title('Histogram of Accuracy $(T_{1600}$, $P_{test})$')
plt.xlabel('Accuracy')
plt.ylabel('Frequency')

In [None]:
plt.hist(agg['tactic_evals'], bins=10)
plt.axvline(agg.loc[["f(A,B,C):-legal_move(B,C,A)"]]['tactic_evals'].values, linestyle='dashed', color='blue', label='Random tactic evaluation')
plt.axvline(agg.loc[["f(A,B,C):-legal_move(B,C,A)"]]['ground_evals'].values, linestyle='dashed', color='green', label='Ground move evaluation')
plt.axvline(agg.loc[["f(A,B,C):-legal_move(B,C,A)"]]['best_move_evals'].values, linestyle='dashed', color='red', label='Stockfish 14 best move evaluation')
plt.xlabel('Total Evaluation Score (Cp)')
plt.ylabel('Frequency')
plt.legend(loc=1)

In [None]:
final = agg.sort_values(by = ['avg_divergence'], ascending = [True])
final

In [None]:
list(final.index)

# Calculate Dataset Metrics

In [None]:
from collections import Counter

total_games = 0
valid_elo = 0
total_pos = 0
total_elo = 0
game_count = Counter()

pgn_path = 'tactics/data/lichess_db_standard_rated_2013-01.pgn'

In [None]:
offsets = []
handle = open(pgn_path)
header = chess.pgn.read_headers(handle)

In [None]:
header.get('Termination')


In [None]:
import chess.pgn

with open(pgn_path) as pgn:
    while game := chess.pgn.read_game(pgn):
        total_games += 1
        white_elo = game.headers['WhiteElo']
        black_elo = game.headers['BlackElo']
        if '?' not in white_elo and '?' not in black_elo:
            total_elo += int(white_elo)
            total_elo += int(black_elo)
            valid_elo += 1
        game_count[game.headers['Event']] += 1

In [None]:
print('Total games', total_games)
avg_elo = total_elo / (2 * valid_elo)
print('Average ELO', avg_elo)

In [None]:
variance = 0

with open(pgn_path) as pgn:
    while game := chess.pgn.read_game(pgn):
        total_games += 1
        white_elo = game.headers['WhiteElo']
        black_elo = game.headers['BlackElo']
        if '?' not in white_elo and '?' not in black_elo:
            variance += (int(white_elo) - avg_elo) ** 2
            variance += (int(black_elo) - avg_elo) ** 2
        game_count[game.headers['Event']] += 1

In [None]:
import math

print('SD', math.sqrt(variance / valid_elo))

# Test Evaluation

In [None]:
import chess
import chess.engine
from tactics.util import *

board = chess.Board('r1bqk1nr/ppp2ppp/3b4/3p4/8/2PNP3/PP3PPP/RNBQKB1R b KQkq - 2 7')
move1 = chess.Move.from_uci('g8h6')
move2 = chess.Move.from_uci('g8f6')
engine_path = get_lc0_cmd(LC0, MAIA_1900) + ['--verbose-move-stats']
#engine_path = STOCKFISH
mate_score = 2000
n = 5

In [None]:
with chess.engine.SimpleEngine.popen_uci(engine_path) as engine:
    analysis = engine.analyse(board, limit=chess.engine.Limit(depth=1), multipv=n, game=object())
    top_results = [(root['pv'][0], root['score'].relative.score(mate_score=mate_score)) for root in analysis]
    top_n_results = top_results[:n]

In [None]:
top_n_results

In [None]:
with chess.engine.SimpleEngine.popen_uci(engine_path) as engine:
    evals = get_evals(engine, board, [move1, move2])
evals