This notebook parses sgf files generated by selfplay (or victimplay).

It presents the files in a searchable and filterable way,
and allows one to easily export files for viewing in Sabaki.

### Load libraries

In [None]:
import dataclasses
import os
import posix
import random
import sys
from typing import List

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from tqdm.auto import tqdm
from tqdm.contrib.concurrent import process_map

In [None]:
sys.path.append("../engines/KataGo-custom/python")

from sgfmill import sgf, sgf_moves

### Load games

In [None]:
SELFPLAY_DIR = "/home/tony/code/go_attack/victimplay-training/svm3/selfplay"
RAW_SGF_STRS: List[str] = []

bot_de: posix.DirEntry
for bot_de in tqdm([de for de in os.scandir(SELFPLAY_DIR) if de.is_dir()]):

    sgfs_path = os.path.join(bot_de.path, "sgfs")
    for sgf_de in [de for de in os.scandir(sgfs_path) if de.is_file()]:
        sgf_path: str = sgf_de.path
        with open(sgf_path) as f:
            for line in f.readlines():
                RAW_SGF_STRS.append(line.strip())

len(RAW_SGF_STRS)

### Parse games

In [None]:
@dataclasses.dataclass
class GameInfo:
    victim_color: str
    adv_color: str
    win_color: str

    adv_win: bool
    adv_minus_victim_score: float  # With komi

    komi: float # Positive if white has the advantage
    adv_komi: float # Positive if adv has the advantage

    is_continuation: bool # Whether game is continuation of previous game

    sgf_str: str

    # TODO: Compute number of initial passes by one player or another.


def get_game_info(sgf_str: str) -> GameInfo:
    sgf_game = sgf.Sgf_game.from_string(sgf_str)

    b_name = sgf_game.get_player_name("b")
    w_name = sgf_game.get_player_name("w")

    victim_color = {b_name: "b", w_name: "w"}["victim"]
    adv_color = {"b": "w", "w": "b"}[victim_color]
    
    win_color = sgf_game.get_winner()
    lose_color = {"b": "w", "w": "b", None: None}[win_color]

    if win_color is None:
        adv_minus_victim_score = 0
    else:
        win_score = float(sgf_game.get_root().get("RE").split("+")[1])
        adv_minus_victim_score = {
            win_color: win_score,
            lose_color: -win_score,
        }[adv_color]

    komi = sgf_game.get_komi()
    adv_komi = {"w": komi, "b": -komi}[adv_color]

    return GameInfo(
        victim_color=victim_color,
        adv_color=adv_color,
        win_color=win_color,
        adv_win=adv_color==win_color,
        adv_minus_victim_score=adv_minus_victim_score,
        komi=komi,
        adv_komi=adv_komi,
        is_continuation=sgf_game.get_root().has_setup_stones(),
        sgf_str=sgf_str,
    )


random.seed(42)
GAME_INFOS: List[GameInfo] = process_map(
    get_game_info,
    random.sample(RAW_SGF_STRS, 10000),
    max_workers=64,
    chunksize=50,
)

In [None]:
%%time
df = pd.DataFrame(
    [dataclasses.asdict(gi) for gi in GAME_INFOS]
)
print("Fraction of continuation games:", df.is_continuation.mean())
df.head()

### Analyze data

In [None]:
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
df.adv_minus_victim_score.hist(bins=100);

plt.subplot(1, 2, 2)
df.adv_komi.hist(bins=100);

In [None]:
df.plot.scatter(x="adv_minus_victim_score", y="adv_komi")

In [None]:
sub_df = df[df.adv_komi < 0].sort_values("adv_minus_victim_score", ascending=False, ignore_index=True)
sub_df


In [None]:
ss = sub_df.iloc[5].sgf_str
ss