This notebook parses sgf files generated by the `match` command,
and loads them into a Pandas dataframe.

### Load libraries

In [None]:
import dataclasses
import os
import random
from typing import List

import matplotlib.pyplot as plt
import pandas as pd
from tqdm.auto import tqdm
from tqdm.contrib.concurrent import process_map

from go_attack import game_info

In [None]:
MATCH_DIR = "../tests/testdata/visits-truncated/"
# MATCH_DIR = "/nas/ucb/tony/go-attack/matches/visit-exp3"

sgf_paths = game_info.find_sgf_files(MATCH_DIR)
raw_sgf_strs = game_info.read_and_concat_all_files(sgf_paths)

len(raw_sgf_strs)

In [None]:
random.seed(42)
game_infos: List[game_info.GameInfo] = process_map(
    game_info.parse_game_info,
    raw_sgf_strs,
    max_workers=64,
    chunksize=50,
)

In [None]:
%%time
df = pd.DataFrame([gi.to_dict() for gi in GAME_INFOS])
print("gtypes:", df.gtype.unique())
print("Number of cleanup games:", (df.gtype == "cleanuptraining").sum())

# Filter to only normal games
df = df[(df.gtype == "normal")]
print("Fraction continuation:    ", df.is_continuation.mean())
print("Fraction used_initial_pos:", df.used_initial_position.mean())
print("max(init_turn_num)       :", df.init_turn_num.max())

df.head()

### Helper functions

In [None]:
@dataclasses.dataclass
class SGFUrl:
    sgf: str
    text: str

    def sgf_str_to_url(self, sgf_str: str) -> str:
        return f"https://humancompatibleai.github.io/sgf-viewer/#sgf={sgf_str}"

    def __post_init__(self):
        self.url = self.sgf_str_to_url(self.sgf)

    def _repr_html_(self):
        """HTML link to this URL."""
        return f'<a href="{self.url}">{self.text}</a>'

    def __str__(self):
        """Return the underlying string."""
        return self.url

### Analyze data

In [None]:
plt.subplot(1, 2, 1)
df[(df.b_name == "cp127-v1") & (df.w_name == "cp63-v1024")].win_color.hist()

plt.subplot(1, 2, 2)
df[(df.w_name == "cp127-v1") & (df.b_name == "cp63-v1024")].win_color.hist()

In [None]:
cur_df = df[(df.win_name == "cp63-v1024") & (df.lose_name == "cp127-v1")]
len(cur_df)

In [None]:
SGFUrl(sgf=cur_df.sgf_str.iloc[0], text="cp63-v1024 beats cp127-v1 (game2)")