Summarizes results of matches between finetuned adversaries and KataGo.

### Load libraries

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import utils
from IPython.core.display import HTML
from IPython.display import display
from sgf_parser import game_info
from statsmodels.stats.proportion import proportion_confint

plt.style.use(
    ["tableau-colorblind10", utils.get_style("default"), utils.get_style("1-col")]
)

### Parse data

In [3]:
df = utils.parse_sgfs(
    [
        "/nas/ucb/k8/go-attack/match/ttseng-eval-ft-vs-b60-20230524-160144",
        "/nas/ucb/k8/go-attack/match/ttseng-eval-ft-vs-b60-v1124-20230524-161516",
        "/nas/ucb/k8/go-attack/match/ttseng-eval-ft-vs-b60-20230526-140744/",
        "/nas/ucb/k8/go-attack/match/ttseng-eval-ft-vs-b60-v4096-20230526-140926",
        "/nas/ucb/k8/go-attack/match/ttseng-eval-b60ft-vs-cp505-20230526-152339",
        "/nas/ucb/k8/go-attack/match/ttseng-cyclic-vs-b60-s7702m-20230526-152118",
    ],
    no_victim_okay=True,
)
pd.concat([df.b_name, df.w_name]).value_counts()

b60-s7701m-v1600         1400
230520-s97528320-v600    1200
cp505-v1600              1000
230518-s34275840-v600     500
230520-s22809344-v600     500
230520-s75567360-v600     500
b60-s7701m-v4096          400
cyclic-adv-v600           400
b18-s5832m-v1600          300
dtype: int64

### Analyze data

In [4]:
for adv in [
    "cyclic-adv-v600",
    "230518-s34275840-v600",
    "230520-s22809344-v600",
    "230520-s75567360-v600",
    "230520-s97528320-v600",
]:
    sub_df = df.query("b_name == @adv or w_name == @adv")

    print(f"Adversary: {adv}")
    for opp in sorted(pd.concat([sub_df.b_name, sub_df.w_name]).unique()):
        if opp == adv:
            continue

        opp_df = sub_df.query(
            "|".join(
                [
                    f"(b_name == @opp and w_name == @adv)",
                    f"(b_name == @adv and w_name == @opp)",
                ]
            )
        )
        tot_games = len(opp_df)
        if tot_games == 0:
            continue

        n_games_won = (opp_df.win_name == adv).sum()
        print(
            f"{adv} vs {opp}: {n_games_won}/{tot_games} ({n_games_won/tot_games:.2%})"
        )
    print()


Adversary: cyclic-adv-v600
cyclic-adv-v600 vs b60-s7701m-v1600: 0/400 (0.00%)

Adversary: 230518-s34275840-v600
230518-s34275840-v600 vs b18-s5832m-v1600: 20/100 (20.00%)
230518-s34275840-v600 vs b60-s7701m-v1600: 59/200 (29.50%)
230518-s34275840-v600 vs cp505-v1600: 130/200 (65.00%)

Adversary: 230520-s22809344-v600
230520-s22809344-v600 vs b18-s5832m-v1600: 13/100 (13.00%)
230520-s22809344-v600 vs b60-s7701m-v1600: 68/200 (34.00%)
230520-s22809344-v600 vs cp505-v1600: 107/200 (53.50%)

Adversary: 230520-s75567360-v600
230520-s75567360-v600 vs b18-s5832m-v1600: 15/100 (15.00%)
230520-s75567360-v600 vs b60-s7701m-v1600: 71/200 (35.50%)
230520-s75567360-v600 vs cp505-v1600: 102/200 (51.00%)

Adversary: 230520-s97528320-v600
230520-s97528320-v600 vs b60-s7701m-v1600: 187/400 (46.75%)
230520-s97528320-v600 vs b60-s7701m-v4096: 131/400 (32.75%)
230520-s97528320-v600 vs cp505-v1600: 268/400 (67.00%)

