In [84]:
import pandas as pd
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 1000)
import json
import numpy
from IPython.display import display
from subprocess import check_output
import sys
import traceback
from tqdm.notebook import tqdm, trange

In [85]:
techs = {
    "tech1": "1q_1o",
    "tech2": "k_per_terra",
    "tech3": "4pip",
    "tech4": "7vp",
    "tech5": "1o_1pw",
    "tech6": "1k_1c",
    "tech7": "3vp_per_gaia_place",
    "tech8": "4c",
    "tech9": "4pw",
    # idk wtf this is --> "tech-ship0": "2c_per_trade"
}

adv_techs = {
    "advtech1": "3vp_per_fed_pass",
    "advtech2": "2vp_per_tech_bump",
    "advtech3": "1qic_5c_action",
    "advtech4": "2vp_per_mine",
    "advtech5": "3vp_per_rl_pass",
    "advtech6": "1o_per_sector",
    "advtech7": "1vp_per_terra_pass",
    "advtech8": "2vp_per_gaia",
    "advtech9": "4vp_per_ts",
    "advtech10": "2vp_per_sector",
    "advtech11": "3o_action",
    "advtech12": "5vp_per_fed",
    "advtech13": "3k_action",
    "advtech14": "3vp_per_mine_place",
    "advtech15": "3vp_per_ts_place",
}

feds = {
    "fed1": "12vp",
    "fed2": "qic",
    "fed3": "2pw",
    "fed4": "2o",
    "fed5": "6c",
    "fed6": "2k",
    "gleens": "gleens"
}

round_scorings = {
    "score1": "2vp_per_terra",
    "score2": "2vp_per_research_bump",
    "score3": "2vp_per_mine_place",
    "score4": "5vp_per_fed_place",
    "score5": "4vp_per_ts_place",
    "score6": "4vp_per_gaia_place",
    "score7": "5vp_per_3pip_place",
    "score8": "3vp_per_ts_place",
    "score9": "3vp_per_gaia_place",
    "score10": "5vp_per_3pip_place",
}

boosters = {

    "booster1": "1k_1o",
    "booster2": "2pwt_1o",
    "booster3": "1qic_2c",
    "booster4": "2c_terra",
    "booster5": "2pw_nav",
    "booster6": "1o_1vp_per_mine",
    "booster7": "1o_2vp_per_ts",
    "booster8": "1k_3vp_per_rl",
    "booster9": "4pw_4vp_per_3pip",
    "booster10": "4c_1vp_per_gaia",
}

bad_buildings = {"colony", 
                 "colonyShip", 
                 "tradeShip", 
                 "constructionShip", 
                 "researchShip",
                 "scout",
                 "frigate",
                 "battleShip",
                 "customsPost",
                 "tradePost"
                }

In [86]:
# for progressbar. Lines are very long jsons, so bash wc way better than native iteration
def wc(filename):
    return int(check_output(["wc", "-l", filename]).split()[0])

In [5]:
total_lines = wc('game_data_raw.txt')

In [92]:
"""
Parses non-expansion game data

ToDo: iterate through game moves to get first turn builings, and score vp disterbution
"""
def parse_tree(tree, errors_set):
    
    if not tree['cancelled'] and tree['status'] == 'ended':
        try:
            dat = {}
            dat['id'] = tree['_id']
            
            # check for expansions
            if 'expansions' in tree['game'].keys() and len(tree['game']['expansions']) > 0:
                raise ValueError('this game is with expansions: ' + ''.join(tree['game']['expansions']))
            if 'expansions' in tree['data'].keys() and tree['data']['expansions'] != 0:
                raise ValueError('this game is with expansions: ' + str(tree['data']['expansions']))

            # some jsons dont have layout. must be before the site supported that feature
            # assume standard
            if 'options' in tree['data'].keys() and 'layout' in tree['data']['options'].keys():
                dat['map_layout'] = tree['data']['options']['layout']
            else:
                dat['map_layout'] = 'standard'

            num_players = tree['options']['setup']['nbPlayers']
            dat['num_players'] = num_players
            tot_elo = 0

            #boosters in game
            found_boosters = tree['data']['tiles']['boosters']
            for i in range(10):
                booster_name = 'booster' + str(i + 1)
                if booster_name in found_boosters.keys():
                    dat[boosters[booster_name]] = True
                else:
                    dat[boosters[booster_name]] = False

            #tech locations
            found_techs = tree['data']['tiles']['techs']
            for loc in found_techs.keys():
                name = found_techs[loc]['tile']
                if name in techs:
                    dat['tech_' + loc] = techs[name]
                elif name in adv_techs:
                    dat['tech_' + loc] = adv_techs[name]
                else:
                    raise NameError('unknown tech: ' + name)
                    # dat['tech_' + loc] = name


            #scorings
            found_scorings = tree['data']['tiles']['scorings']['round']
            for i in range(len(found_scorings)):
                dat['round_' + str(i+1) +'_scoring'] = round_scorings[found_scorings[i]]
            dat['final_scoring_1'] = tree['data']['tiles']['scorings']['final'][0]
            dat['final_scoring_2'] = tree['data']['tiles']['scorings']['final'][1]


            #player data
            for i in range(num_players):
                #position
                pos = "pos_" + str(tree['players'][i]['ranking']) + "_"

                #elo
                elo = tree['players'][i]['elo']['initial']
                tot_elo += elo
                dat[pos + 'elo'] = elo

                #faction
                faction = tree['players'][i]['faction']
                dat[pos + 'faction'] = faction

                #score
                dat[pos + 'score'] = tree['players'][i]['score']

                #start pos
                dat[pos + 'start_pos'] = [i + 1 for i in range(len(tree['data']['setup'])) if tree['data']['setup'][i] == faction][0]

                #bid
                dat[pos + 'bid'] = tree['data']['players'][i]['data']['bid']

                #feds
                feds_taken = tree['data']['players'][i]['data']['tiles']['federations']
                dat[pos + 'feds_taken'] = len(feds_taken)
                for key in feds.keys():
                    dat[pos + 'fed_' + feds[key]] = 0
                for fed in feds_taken:
                    dat[pos + 'fed_' + feds[fed['tile']]] += 1

                #final buildings
                total_buildings = 0
                found_buildings = tree['data']['players'][i]['data']['buildings']
                for key in found_buildings.keys():
                    if key in bad_buildings:
                        if found_buildings[key] > 0:
                            raise ValueError('this is a game with expansions ' + key)
                        continue
                    elif key != 'gf' and key != 'sp':
                        dat[pos + 'build_' + key] = found_buildings[key]
                        total_buildings += found_buildings[key]
                dat[pos + 'num_structures'] = total_buildings

                #research
                tech_score = 0
                found_research = tree['data']['players'][i]['data']['research']
                for key in found_research.keys():
                    if key == 'dip':
                        raise ValueError('this is a game with expansions ' + key)
                        
                    dat[pos + 'research_level_' + key] = found_research[key]
                    if found_research[key] > 2:
                        tech_score += (4 * (found_research[key] - 2))
                dat[pos + 'tech_score'] = tech_score

                #techs taken
                total_techs = 0
                found_techs = tree['data']['players'][i]['data']['tiles']['techs']
                for key in techs.keys():
                    dat[pos + 'tech_taken_' + techs[key]] = False
                for key in adv_techs.keys():
                    dat[pos + 'adv_tech_taken_' + adv_techs[key]] = False
                for tech in found_techs:
                    total_techs += 1
                    name = tech['tile']
                    if name in techs.keys():
                        dat[pos + 'tech_taken_' + techs[name]] = True
                    elif name in adv_techs.keys():
                        dat[pos + 'adv_tech_taken_' + adv_techs[name]] = True
                    else:
                        dat[pos + 'tech_taken_' + adv_techs[name]] = True
                dat[pos + 'total_techs_taken'] = total_techs




            dat['average_elo'] = tot_elo / num_players
            dat = pd.DataFrame(dat,index=[0])
            return True, dat, num_players, errors_set
        except:
            errors_set[tree['_id']] = traceback.format_exc()
            return False, "", "", errors_set
            
    else:
        return False, "", "", errors_set

In [93]:
with open("game_data_raw.txt", "r") as game_data_raw:
    
    pbar = tqdm(total=total_lines)
    
    two_players = None
    three_players = None
    four_players = None
    
    errors_set = dict()
    
    lines = game_data_raw.readlines()
    line_num = 0
    for line in lines:
            game_tree = json.loads(line)
            success, df, num_player, errors_set = parse_tree(game_tree, errors_set)
            if success:
                if num_player == 2:
                    if two_players is not None:
                        two_players = pd.concat([two_players, df], axis=0, join="outer", ignore_index=True)
                    else:
                        two_players = df
                elif num_player == 3:
                    if three_players is not None:
                        three_players = pd.concat([three_players, df], axis=0, join="outer", ignore_index=True)
                    else:
                        three_players = df
                else:
                    if four_players is not None:
                        four_players = pd.concat([four_players, df], axis=0, join="outer", ignore_index=True)
                    else:
                        four_players = df
            pbar.update(1)

  0%|          | 0/31617 [00:00<?, ?it/s]

In [94]:
len(errors_set)

10215

In [95]:
four_players.head()

Unnamed: 0,id,map_layout,num_players,1k_1o,2pwt_1o,1qic_2c,2c_terra,2pw_nav,1o_1vp_per_mine,1o_2vp_per_ts,1k_3vp_per_rl,4pw_4vp_per_3pip,4c_1vp_per_gaia,tech_terra,tech_nav,tech_int,tech_gaia,tech_eco,tech_sci,tech_free1,tech_free2,tech_free3,tech_adv-terra,tech_adv-nav,tech_adv-int,tech_adv-gaia,tech_adv-eco,tech_adv-sci,round_1_scoring,round_2_scoring,round_3_scoring,round_4_scoring,round_5_scoring,round_6_scoring,final_scoring_1,final_scoring_2,pos_2_elo,pos_2_faction,pos_2_score,pos_2_start_pos,pos_2_bid,pos_2_feds_taken,pos_2_fed_12vp,pos_2_fed_qic,pos_2_fed_2pw,pos_2_fed_2o,pos_2_fed_6c,pos_2_fed_2k,pos_2_fed_gleens,pos_2_build_m,...,pos_3_start_pos,pos_3_bid,pos_3_feds_taken,pos_3_fed_12vp,pos_3_fed_qic,pos_3_fed_2pw,pos_3_fed_2o,pos_3_fed_6c,pos_3_fed_2k,pos_3_fed_gleens,pos_3_build_m,pos_3_build_ts,pos_3_build_lab,pos_3_build_PI,pos_3_build_ac1,pos_3_build_ac2,pos_3_num_structures,pos_3_research_level_terra,pos_3_research_level_nav,pos_3_research_level_int,pos_3_research_level_gaia,pos_3_research_level_eco,pos_3_research_level_sci,pos_3_tech_score,pos_3_tech_taken_1q_1o,pos_3_tech_taken_k_per_terra,pos_3_tech_taken_4pip,pos_3_tech_taken_7vp,pos_3_tech_taken_1o_1pw,pos_3_tech_taken_1k_1c,pos_3_tech_taken_3vp_per_gaia_place,pos_3_tech_taken_4c,pos_3_tech_taken_4pw,pos_3_adv_tech_taken_3vp_per_fed_pass,pos_3_adv_tech_taken_2vp_per_tech_bump,pos_3_adv_tech_taken_1qic_5c_action,pos_3_adv_tech_taken_2vp_per_mine,pos_3_adv_tech_taken_3vp_per_rl_pass,pos_3_adv_tech_taken_1o_per_sector,pos_3_adv_tech_taken_1vp_per_terra_pass,pos_3_adv_tech_taken_2vp_per_gaia,pos_3_adv_tech_taken_4vp_per_ts,pos_3_adv_tech_taken_2vp_per_sector,pos_3_adv_tech_taken_3o_action,pos_3_adv_tech_taken_5vp_per_fed,pos_3_adv_tech_taken_3k_action,pos_3_adv_tech_taken_3vp_per_mine_place,pos_3_adv_tech_taken_3vp_per_ts_place,pos_3_total_techs_taken,average_elo
0,Floating-stay-6137,standard,4,True,False,True,False,True,True,True,True,True,False,1q_1o,4pip,1k_1c,4pw,3vp_per_gaia_place,7vp,1o_1pw,4c,k_per_terra,1qic_5c_action,2vp_per_mine,4vp_per_ts,2vp_per_tech_bump,3vp_per_fed_pass,1o_per_sector,2vp_per_research_bump,4vp_per_gaia_place,3vp_per_gaia_place,4vp_per_ts_place,5vp_per_3pip_place,3vp_per_ts_place,structureFed,gaia,398.0,baltaks,195.0,1.0,0.0,2.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,5.0,...,4.0,0.0,4.0,1.0,0.0,2.0,0.0,0.0,0.0,1.0,4.0,4.0,1.0,1.0,1.0,0.0,11.0,3.0,5.0,0.0,0.0,1.0,0.0,16.0,True,True,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,3.0,306.0
1,Serene-sunshine-349,standard,4,True,True,True,False,True,True,True,False,True,False,1q_1o,1k_1c,3vp_per_gaia_place,4pw,4pip,k_per_terra,1o_1pw,7vp,4c,1vp_per_terra_pass,3vp_per_rl_pass,1o_per_sector,3vp_per_mine_place,2vp_per_mine,3o_action,4vp_per_ts_place,2vp_per_research_bump,5vp_per_3pip_place,3vp_per_gaia_place,2vp_per_terra,5vp_per_3pip_place,satellite,planetType,349.0,firaks,189.0,2.0,0.0,2.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,7.0,...,3.0,0.0,2.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,6.0,1.0,3.0,1.0,0.0,0.0,11.0,1.0,5.0,1.0,1.0,0.0,0.0,12.0,True,False,False,False,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,3.0,214.75
2,Dark-trade-2563,standard,4,True,True,True,False,True,True,False,True,True,False,1q_1o,3vp_per_gaia_place,4c,7vp,4pip,4pw,1o_1pw,1k_1c,k_per_terra,1vp_per_terra_pass,3vp_per_mine_place,3o_action,3vp_per_fed_pass,1qic_5c_action,5vp_per_fed,2vp_per_research_bump,2vp_per_terra,4vp_per_gaia_place,2vp_per_mine_place,3vp_per_gaia_place,5vp_per_3pip_place,sector,structure,152.0,ivits,161.0,1.0,0.0,4.0,0.0,1.0,0.0,1.0,2.0,0.0,0.0,4.0,...,2.0,0.0,3.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,8.0,1.0,2.0,1.0,1.0,1.0,14.0,4.0,5.0,0.0,1.0,1.0,4.0,28.0,True,False,True,False,True,False,True,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,6.0,113.75
3,Sweet-property-1343,standard,4,False,True,True,True,False,True,True,True,True,False,4c,k_per_terra,4pip,3vp_per_gaia_place,1o_1pw,7vp,1q_1o,4pw,1k_1c,4vp_per_ts,3vp_per_rl_pass,2vp_per_sector,2vp_per_tech_bump,1qic_5c_action,2vp_per_gaia,5vp_per_3pip_place,4vp_per_gaia_place,3vp_per_ts_place,2vp_per_terra,2vp_per_research_bump,3vp_per_gaia_place,structureFed,satellite,372.0,baltaks,189.0,1.0,0.0,3.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,6.0,...,2.0,0.0,3.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,6.0,0.0,3.0,1.0,1.0,1.0,12.0,4.0,4.0,4.0,0.0,2.0,2.0,24.0,False,True,False,True,True,False,False,True,True,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,6.0,221.25
4,real-time-game13,standard,4,False,False,False,True,True,True,True,True,True,True,3vp_per_gaia_place,4c,1k_1c,7vp,4pw,4pip,k_per_terra,1o_1pw,1q_1o,5vp_per_fed,1vp_per_terra_pass,3vp_per_rl_pass,3vp_per_ts_place,1qic_5c_action,1o_per_sector,4vp_per_gaia_place,3vp_per_gaia_place,4vp_per_ts_place,2vp_per_research_bump,5vp_per_3pip_place,5vp_per_fed_place,sector,structure,100.0,ivits,176.0,1.0,0.0,4.0,1.0,0.0,0.0,1.0,2.0,0.0,0.0,1.0,...,4.0,0.0,4.0,2.0,0.0,1.0,1.0,0.0,0.0,0.0,7.0,2.0,1.0,1.0,1.0,1.0,13.0,4.0,5.0,1.0,0.0,0.0,1.0,20.0,True,False,True,False,True,False,False,True,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,5.0,145.5


In [96]:
# monitor parsing errors
err_types = set(errors_set.values())
err_to_count = {}
for err in err_types:
    count = sum(map(str(err).__eq__, errors_set.values()))
    err_to_count[err] = count

err_to_count

{'Traceback (most recent call last):\n  File "/tmp/ipykernel_449958/3721108474.py", line 81, in parse_tree\n    dat[pos + \'bid\'] = tree[\'data\'][\'players\'][i][\'data\'][\'bid\']\nIndexError: list index out of range\n': 101,
 'Traceback (most recent call last):\n  File "/tmp/ipykernel_449958/3721108474.py", line 15, in parse_tree\n    raise ValueError(\'this game is with expansions: \' + \'\'.join(tree[\'game\'][\'expansions\']))\nValueError: this game is with expansions: frontiers\n': 22,
 'Traceback (most recent call last):\n  File "/tmp/ipykernel_449958/3721108474.py", line 48, in parse_tree\n    raise NameError(\'unknown tech: \' + name)\nNameError: unknown tech: tech-ship0\n': 8994,
 'Traceback (most recent call last):\n  File "/tmp/ipykernel_449958/3721108474.py", line 109, in parse_tree\n    raise ValueError(\'this is a game with expansions \' + key)\nValueError: this is a game with expansions dip\n': 726,
 'Traceback (most recent call last):\n  File "/tmp/ipykernel_449958/3

In [99]:
for col in four_players.columns:
    print(col)

id
map_layout
num_players
1k_1o
2pwt_1o
1qic_2c
2c_terra
2pw_nav
1o_1vp_per_mine
1o_2vp_per_ts
1k_3vp_per_rl
4pw_4vp_per_3pip
4c_1vp_per_gaia
tech_terra
tech_nav
tech_int
tech_gaia
tech_eco
tech_sci
tech_free1
tech_free2
tech_free3
tech_adv-terra
tech_adv-nav
tech_adv-int
tech_adv-gaia
tech_adv-eco
tech_adv-sci
round_1_scoring
round_2_scoring
round_3_scoring
round_4_scoring
round_5_scoring
round_6_scoring
final_scoring_1
final_scoring_2
pos_2_elo
pos_2_faction
pos_2_score
pos_2_start_pos
pos_2_bid
pos_2_feds_taken
pos_2_fed_12vp
pos_2_fed_qic
pos_2_fed_2pw
pos_2_fed_2o
pos_2_fed_6c
pos_2_fed_2k
pos_2_fed_gleens
pos_2_build_m
pos_2_build_ts
pos_2_build_lab
pos_2_build_PI
pos_2_build_ac1
pos_2_build_ac2
pos_2_num_structures
pos_2_research_level_terra
pos_2_research_level_nav
pos_2_research_level_int
pos_2_research_level_gaia
pos_2_research_level_eco
pos_2_research_level_sci
pos_2_tech_score
pos_2_tech_taken_1q_1o
pos_2_tech_taken_k_per_terra
pos_2_tech_taken_4pip
pos_2_tech_taken_7vp
pos

In [100]:
# get examples to check out jsons
# JSON: https://www.boardgamers.space/api/game/ + key
# see if game loads (usually doesnt if error): https://www.boardgamers.space/game/ + key
for err in err_types:
    for key in errors_set.keys():
        if errors_set[key] == err:
            print(key)
            print(err)
            break

Q-d-b-190221-tran-2le1
Traceback (most recent call last):
  File "/tmp/ipykernel_449958/3721108474.py", line 81, in parse_tree
    dat[pos + 'bid'] = tree['data']['players'][i]['data']['bid']
IndexError: list index out of range

Unholy-empire-2602
Traceback (most recent call last):
  File "/tmp/ipykernel_449958/3721108474.py", line 15, in parse_tree
    raise ValueError('this game is with expansions: ' + ''.join(tree['game']['expansions']))
ValueError: this game is with expansions: frontiers

Princely-heart-3901
Traceback (most recent call last):
  File "/tmp/ipykernel_449958/3721108474.py", line 48, in parse_tree
    raise NameError('unknown tech: ' + name)
NameError: unknown tech: tech-ship0

Obsessive-uranium-2818hjh
Traceback (most recent call last):
  File "/tmp/ipykernel_449958/3721108474.py", line 109, in parse_tree
    raise ValueError('this is a game with expansions ' + key)
ValueError: this is a game with expansions dip

1vs1live
Traceback (most recent call last):
  File "/tm

In [105]:
two_players.to_pickle("two_players_data")
three_players.to_pickle("three_players_data")
four_players.to_pickle("four_players_data")