In [None]:
import json
import re

from tqdm import tqdm
import pandas as pd

In [None]:
with open("../matches/raw_pbn.json", "r") as f:
    pbn_list = json.load(f)

In [None]:
print(pbn_list[124])

In [None]:
def extract(s):
    return s[s.find('"')+1:s.rfind('"')]

features = {
    "[DealId": "id",
    "[Event": "event",
    "[Site": "site",
    "[Date": "date",
    "[Board": "board",
    "[Hometeam": "home_team",
    "[Visitteam": "visit_team",
    "[Deal": "deal",
    "[Auction": "declarer_starter",
    "[Declarer": "declarer",
    "[Contract": "contract",
}

result = []
for pbn in tqdm(pbn_list):
    data = {}
    lines = pbn.split("\n")
    for i, line in enumerate(lines):
        for prefix, col in features.items():
            if line.startswith(prefix):
                data[col] = extract(line)
        if line.startswith("[Auction"):
            bidding = []
            pivot = i+1
            while (pivot < len(lines)) and (not lines[pivot].startswith("[")):
                bidding.append(lines[pivot])
                pivot += 1
            data["bidding"] = ' '.join(bidding)
    result.append(data)

In [None]:
len(result)

In [None]:
# X means double
# XX means redouble
# AP means all pass
# =<note_index>= means all pass
print("Double:", sum("X" in data["bidding"] for data in result))
print("Redouble:", sum("XX" in data["bidding"] for data in result))
print("All pass:", sum("AP" in data["bidding"] for data in result))
print("Note:", sum("=" in data["bidding"] for data in result))

In [None]:
# Remove notes
for data in result:
    data["bidding"] = re.sub(r'=.+=', '', data["bidding"]).replace('  ', '')
print("Note:", sum("=" in data["bidding"] for data in result))

In [None]:
# Replace AP with 3 Passes
for data in result:
    data["bidding"] = data["bidding"].replace("AP", "Pass Pass Pass")
print("All pass:", sum("AP" in data["bidding"] for data in result))

In [None]:
# Replace X, XX with Pass
for data in result:
    data["bidding"] = data["bidding"].replace("XX", "Pass").replace("X", "Pass")
print("Double:", sum("X" in data["bidding"] for data in result))
print("Redouble:", sum("XX" in data["bidding"] for data in result))

In [None]:
pd.DataFrame(result).describe()

In [None]:
with open("../matches/bidding.json", "w") as f:
    json.dump(result, f, indent=4)