In [None]:
import json
import gzip
import numpy as np
import pandas as pd
from datetime import datetime
from pprint import pprint
import os

In [None]:
#os.chdir("C:\\temp\\dpc")

In [None]:
with gzip.open("DPC.json.gz", "rt") as gz:
    dpc_json = json.load(gz)

In [None]:
def get_json_element_recursive(json, aKeys):
    #pprint(aKeys)
    
    if json is None:
        return None
    
    if not isinstance(aKeys, list):
        if aKeys in json:
            return json[aKeys]
        else:
            return None
    else:
        if aKeys[0] in json:
            if len(aKeys) > 1:
                return (get_json_element_recursive(json[aKeys[0]], aKeys[1:]))
            else:
                return json[aKeys[0]]
        else:
            return None

In [None]:
def get_json_element(json, aKeys):
    if json is None:
        return None
    
    if not isinstance(aKeys, list):
        if aKeys in json:
            return json[aKeys]
        else:
            return None
    
    for key in aKeys:
        if key in json:
            json = json[key]
        
    return json[key]

In [None]:
def get_observation_from_json(json, extract_keys):
    obs = {}
    
    for k in extract_keys:
        if extract_keys[k] is None:
            obs[k] = None
        elif callable(extract_keys[k]):
            obs[k] = extract_keys[k](json)
        else:
            obs[k] = get_json_element_recursive(json, extract_keys[k])
    
    return obs

In [None]:
match_basics_keys = {
    "match_id": "match_id",
    "match_seq_num": "match_seq_num",
    "replay_url": "replay_url",
    "replay_salt": "replay_salt",
    "start_time": "start_time",
    "duration": "duration",
    "human_players": "human_players",
    "radiant_team_id": ["radiant_team", "team_id"],
    "dire_team_id": ["dire_team", "team_id"],
    "lobby_tye": "lobby_type",
    "game_mode": "game_mode",
    "skill": "skill",
    "region": "region",
    "patch": "patch",
    "positive_votes": "positive_votes",
    "negetive_votes": "negetive_votes",
    "version": "version"
}
match_basics = pd.DataFrame(columns = match_basics_keys.keys())

In [None]:
match_teams_keys = {
    "match_id": None,
    "team_id": "team_id",
    "name": "name",
    "tag": "tag"
}
match_teams = pd.DataFrame(columns = match_teams_keys.keys())

In [None]:
match_league_keys = {
    "match_id": "match_id",
    "league_id": "leagueid",
    "league_name": ["league", "name"],
    "league_tier": ["league", "tier"],
    "series_id": "series_id",
    "series_type": "series_type"
}
match_league = pd.DataFrame(columns = match_league_keys.keys())

In [None]:
match_metrics_keys = {
    "match_id": "match_id",
    "radiant_win": "radiant_win",
    "first_blood_time": "first_blood_time",
    "dire_score": "dire_score",
    "radiant_score": "radiant_score",
    "barracks_status_dire": "barracks_status_dire",
    "barracks_status_radiant": "barracks_status_radiant",
    "tower_status_dire": "tower_status_dire",
    "tower_status_radiant": "tower_status_radiant",
    "throw": "throw",
    "comeback": "comeback",
    "loss": "loss",
    "win": "win"
}
match_metrics = pd.DataFrame(columns = match_metrics_keys.keys())

In [None]:
match_metrics_xp_gold_advantage_keys = {
    "match_id": "match_id",
    "radiant_gold_adv": "radiant_gold_adv",
    "radiant_xp_adv": "radiant_xp_adv"
}
match_metrics_xp_gold_advantage = pd.DataFrame(columns = match_metrics_xp_gold_advantage_keys.keys())
def match_metrics_xp_gold_advantage_post_process(df):
    df_gold = df.set_index("match_id")["radiant_gold_adv"].apply(pd.Series).reset_index().melt(id_vars="match_id").dropna()
    df_gold.columns = ["match_id", "minute", "radiant_gold_advantage"]
    df_xp = df.set_index("match_id")["radiant_xp_adv"].apply(pd.Series).reset_index().melt(id_vars="match_id").dropna()
    df_xp.columns = ["match_id", "minute", "radiant_xp_advantage"]

    df = pd.merge(df_gold, df_xp, on=["match_id", "minute"])
    df["radiant_gold_advantage"] = df["radiant_gold_advantage"].astype(np.int64)
    df["radiant_xp_advantage"] = df["radiant_xp_advantage"].astype(np.int64)
    return df

In [None]:
match_metrics_objectives_keys = {
    "match_id": None,
    "key": "key",
    "team": "team",
    "player_slot": "player_slot",
    "slot": "slot",
    "time": "time",
    "type": "type",
    "unit": "unit"
}
match_metrics_objectives = pd.DataFrame(columns=match_metrics_objectives_keys)

In [None]:
match_players_keys = {
    "match_id": "match_id",
    "isRadiant": "isRadiant",
    "player_slot": "player_slot",
    "account_id": "account_id",
    "personaname": "personaname",
    "name": "name",
    "rank_tier": "rank_tier",
    "actions_per_min": "actions_per_min",
    "hero_id": "hero_id",
    "kills": "kills",
    "deaths": "deaths",
    "assists": "assists",
    "kills_per_min": "kills_per_min",
    "kda": "kda",
    "multi_kills_2": ["multi_kills", "2"],
    "multi_kills_3": ["multi_kills", "3"],
    "multi_kills_4": ["multi_kills", "4"],
    "multi_kills_5": ["multi_kills", "5"],
    "level": "level",
    "total_xp": "total_xp",
    "xp_per_min": "xp_per_min",
    "total_gold": "total_gold",
    "gold": "gold",
    "gold_spent": "gold_spent",
    "gold_per_min": "gold_per_min",
    "last_hits": "last_hits",
    "denies": "denies",
    "firstblood_claimed": "firstblood_claimed",
    "hero_kills": "hero_kills",
    "lane_kills": "lane_kills",
    "tower_kills": "tower_kills",
    "ancient_kills": "ancient_kills",
    "neutral_kills": "neutral_kills",
    "roshan_kills": "neutral_kills",
    "observer_kills": "observer_kills",
    "sentry_kills": "sentry_kills",
    "courier_kills": "courier_kills",
    "necronomicon_kills": "necronomicon_kills",
    "camps_stacked": "camps_stacked",
    "creeps_stacked": "creeps_stacked",
    "tower_damage": "tower_damage",
    "hero_damage": "hero_damage",
    "hero_healing": "hero_healing",
    "pings": "pings",
    "rune_pickups": "rune_pickups",
    "obs_placed": "obs_placed",
    "observer_uses": "observer_uses",
    "sen_placed": "sen_placed",
    "sentry_uses": "sentry_uses",
    "purchase_tpscroll": "purchase_tpscroll",
    "stuns": "stuns",
    "backpack_0": "backpack_0",
    "backpack_1": "backpack_1",
    "backpack_2": "backpack_2",
    "item_0": "item_0",
    "item_1": "item_1",
    "item_2": "item_2",
    "item_3": "item_3",
    "item_4": "item_4",
    "item_5": "item_5",
    "is_roaming": "is_roaming",
    "life_state_0": ["life_state", "0"],
    "life_state_1": ["life_state", "1"],
    "life_state_2": ["life_state", "2"],
    "life_state_dead": "life_state_dead",
    "teamfight_participation": "teamfight_participation"
}
match_players = pd.DataFrame(columns = match_players_keys.keys())

In [None]:
match_player_ability_upgrade_keys ={
    "match_id": "match_id",
    "player_slot": "player_slot",
    "account_id": "account_id",
    "hero_id": "hero_id",
    "ability_upgrades_arr": "ability_upgrades_arr"
}
match_player_ability_upgrade = pd.DataFrame(columns=match_player_ability_upgrade_keys.keys())
def match_player_ability_upgrade_post_process(df):
    id_cols = ["match_id","player_slot","account_id","hero_id"]
    df = df.set_index(id_cols)["ability_upgrades_arr"].apply(pd.Series).reset_index().melt(id_vars=id_cols, var_name="order", value_name="ability").dropna()
    df.sort_values(by=["match_id"])
    df["ability"] = df["ability"].astype(np.int64)
    return df

In [None]:
match_player_kills_log_keys = {
    "match_id": None,
    "player_slot": None,
    "account_id": None,
    "time": "time",
    "key": "key"
}
match_player_kills_log = pd.DataFrame(columns = match_player_kills_log_keys)

In [None]:
match_player_per_min_metrics_keys = {
    "match_id": "match_id",
    "player_slot": "player_slot",
    "account_id": "account_id",
    "xp_t": "xp_t",
    "gold_t": "gold_t",
    "lh_t": "lh_t",
    "dn_t": "dn_t"
}
match_player_per_min_metrics = pd.DataFrame(columns=match_player_per_min_metrics_keys)
def match_player_per_min_metrics_post_process(df):
    id_cols = ["match_id","player_slot","account_id"]
    df_xp = df.set_index(id_cols)["xp_t"].apply(pd.Series).reset_index().melt(id_vars=id_cols, var_name="minute", value_name="xp_t").dropna()
    df_gold = df.set_index(id_cols)["gold_t"].apply(pd.Series).reset_index().melt(id_vars=id_cols, var_name="minute", value_name="gold_t").dropna()
    df_lh = df.set_index(id_cols)["lh_t"].apply(pd.Series).reset_index().melt(id_vars=id_cols, var_name="minute", value_name="lh_t").dropna()
    df_dn = df.set_index(id_cols)["dn_t"].apply(pd.Series).reset_index().melt(id_vars=id_cols, var_name="minute", value_name="dn_t").dropna()
    
    df = pd.merge(df_xp, df_gold, on=id_cols.append("minute"))
    df = pd.merge(df, df_lh, on=id_cols.append("minute"))
    df = pd.merge(df, df_dn, on=id_cols.append("minute"))
    
    df["xp_t"] = df["xp_t"].astype(np.int64)
    df["gold_t"] = df["gold_t"].astype(np.int64)
    df["lh_t"] = df["lh_t"].astype(np.int64)
    df["dn_t"] = df["dn_t"].astype(np.int64)
    
    return df

In [None]:
match_player_item_purchase_keys = {
    "match_id": None,
    "player_slot": None,
    "account_id": None,
    "item_key": "key",
    "time": "time"
}
match_player_item_purchase = pd.DataFrame(columns=match_player_item_purchase_keys)

In [None]:
match_player_buyback_keys = {
    "match_id": None,
    "player_slot": None,
    "account_id": None,
    "time": "time",
    "slot": "slot"
}
match_player_buyback = pd.DataFrame(columns = match_player_buyback_keys)

In [None]:
match_player_rune_log_keys = {
    "match_id": None,
    "player_slot": None,
    "account_id": None,
    "rune_key": "key",
    "time": "time",
}
match_player_rune_log = pd.DataFrame(columns=match_player_rune_log_keys)

In [None]:
# Match metrics df
match_basics = pd.DataFrame(columns = match_basics_keys.keys())
match_teams = pd.DataFrame(columns = match_teams_keys.keys())
match_league = pd.DataFrame(columns = match_league_keys.keys())
match_metrics = pd.DataFrame(columns = match_metrics_keys.keys())
match_metrics_xp_gold_advantage = pd.DataFrame(columns = match_metrics_xp_gold_advantage_keys.keys())
match_metrics_objectives = pd.DataFrame(columns=match_metrics_objectives_keys)

# Player metrics df
match_players = pd.DataFrame(columns = match_players_keys.keys())
match_player_ability_upgrade = pd.DataFrame(columns=match_player_ability_upgrade_keys.keys())
match_player_per_min_metrics = pd.DataFrame(columns=match_player_per_min_metrics_keys)
match_player_kills_log = pd.DataFrame(columns = match_player_kills_log_keys)
match_player_item_purchase = pd.DataFrame(columns=match_player_item_purchase_keys)
match_player_buyback = pd.DataFrame(columns = match_player_buyback_keys)
match_player_rune_log = pd.DataFrame(columns=match_player_rune_log_keys)

In [None]:
# Conversion
limit = 0
counter = 0

def set_match_player_info(obs, match_id, player_slot, account_id):
    obs["match_id"] = match_id
    obs["player_slot"] = player_slot
    obs["account_id"] = account_id
    return obs

def list_exist(json, key):
    if key in json and isinstance(json[key], list) and len(json[key]) > 0:
        return True
    else:
        return False

for key in dpc_json:
    # Key "1" is dummy
    if key == "1":
        continue
    
    # Match Information
    
    # match basics
    obs = get_observation_from_json(dpc_json[key], match_basics_keys)
    match_basics = match_basics.append(obs, ignore_index=True)
    
    # store match_id for future uses
    match_id = dpc_json[key]["match_id"]
    
    # match teams
    if "radiant_team" in dpc_json[key]:
        obs = get_observation_from_json(dpc_json[key]["radiant_team"], match_teams_keys)
        obs["match_id"] = match_id
        match_teams = match_teams.append(obs, ignore_index=True)
    
    if "dire_team" in dpc_json[key]:
        obs = get_observation_from_json(dpc_json[key]["dire_team"], match_teams_keys)
        obs["match_id"] = match_id
        match_teams = match_teams.append(obs, ignore_index=True)
    
    # match_league
    obs = get_observation_from_json(dpc_json[key], match_league_keys)
    match_league = match_league.append(obs, ignore_index=True)
    
    # match_metrics
    obs = get_observation_from_json(dpc_json[key], match_metrics_keys)
    match_metrics = match_metrics.append(obs, ignore_index=True)
    
    # match_metrics_xp_gold_advantage
    obs = get_observation_from_json(dpc_json[key], match_metrics_xp_gold_advantage_keys)
    match_metrics_xp_gold_advantage = match_metrics_xp_gold_advantage.append(obs, ignore_index=True)
    
    # match_metrics_objectives
    if list_exist(dpc_json[key], "objectives"):
        for o in dpc_json[key]["objectives"]:
            obs = get_observation_from_json(o, match_metrics_objectives_keys)
            obs["match_id"] = match_id
            match_metrics_objectives = match_metrics_objectives.append(obs, ignore_index=True)
            
    
            
    # Players specific information starts here
    if list_exist(dpc_json[key], "players"):
        for p in dpc_json[key]["players"]:
            # match_players
            obs = get_observation_from_json(p, match_players_keys)
            match_players = match_players.append(obs, ignore_index=True)
            
            # store player_slot and account_id for future use
            player_slot = p["player_slot"]
            account_id = p["account_id"]
            
            # match_player_ability_upgrade
            obs = get_observation_from_json(p, match_player_ability_upgrade_keys)
            match_player_ability_upgrade = match_player_ability_upgrade.append(obs, ignore_index=True)
            
            # match_player_per_min_metrics
            obs = get_observation_from_json(p, match_player_per_min_metrics_keys)
            match_player_per_min_metrics = match_player_per_min_metrics.append(obs, ignore_index=True)
            
            # match_player_kills_log
            if list_exist(p, "kills_log"):
                for l in p["kills_log"]:
                    obs = get_observation_from_json(l, match_player_kills_log_keys)
                    obs = set_match_player_info(obs, match_id, player_slot, account_id)
                    match_player_kills_log = match_player_kills_log.append(obs, ignore_index=True)
            
            # match_player_item_purchase
            if list_exist(p, "purchase_log"):
                for l in p["purchase_log"]:
                    obs = get_observation_from_json(l, match_player_item_purchase_keys)
                    obs = set_match_player_info(obs, match_id, player_slot, account_id)
                    match_player_item_purchase = match_player_item_purchase.append(obs, ignore_index=True)
                    
            # match_player_buyback
            if list_exist(p, "buyback_log"):
                for l in p["buyback_log"]:
                    obs = get_observation_from_json(l, match_player_buyback_keys)
                    obs = set_match_player_info(obs, match_id, player_slot, account_id)
                    match_player_buyback = match_player_buyback.append(obs, ignore_index=True)
            
            # match_player_rune_log
            if list_exist(p, "runes_log"):
                for l in p["runes_log"]:
                    obs = get_observation_from_json(l, match_player_rune_log_keys)
                    obs = set_match_player_info(obs, match_id, player_slot, account_id)
                    match_player_rune_log = match_player_rune_log.append(obs, ignore_index=True)

    # For small scale test
    if limit > 0:
        counter = counter+1
        if counter >= limit:
            break


In [None]:
dpc_json[key]

In [None]:
# Post Processing

# Match
match_metrics_xp_gold_advantage = match_metrics_xp_gold_advantage_post_process(match_metrics_xp_gold_advantage)

# Player
match_player_ability_upgrade = match_player_ability_upgrade_post_process(match_player_ability_upgrade)
match_player_per_min_metrics = match_player_per_min_metrics_post_process(match_player_per_min_metrics)


In [None]:
match_basics.head()

In [None]:
match_teams.head()

In [None]:
match_league.head()

In [None]:
match_metrics.head()

In [None]:
match_metrics_xp_gold_advantage.head()

In [None]:
match_metrics_objectives.head()

In [None]:
match_players.head()

In [None]:
match_player_ability_upgrade.head()

In [None]:
match_player_per_min_metrics.head()

In [None]:
match_player_kills_log.head()

In [None]:
match_player_item_purchase.head()

In [None]:
match_player_buyback.head()

In [None]:
match_player_rune_log.head()

In [None]:
# Write Output
# Match metrics df
match_basics.to_csv("match_basics.csv", index=False, encoding='utf-8')
match_teams.to_csv("match_teams.csv", index=False, encoding='utf-8')
match_league.to_csv("match_league.csv", index=False, encoding='utf-8')
match_metrics.to_csv("match_metrics.csv", index=False, encoding='utf-8')
match_metrics_xp_gold_advantage.to_csv("match_metrics_xp_gold_advantage.csv", index=False, encoding='utf-8')
match_metrics_objectives.to_csv("match_metrics_objectives.csv", index=False, encoding='utf-8')

# Player metrics df
match_players.to_csv("match_players.csv", index=False, encoding='utf-8')
match_player_ability_upgrade.to_csv("match_player_ability_upgrade.csv", index=False, encoding='utf-8')
match_player_per_min_metrics.to_csv("match_player_per_min_metrics.csv", index=False, encoding='utf-8')
match_player_kills_log.to_csv("match_player_kills_log.csv", index=False, encoding='utf-8')
match_player_item_purchase.to_csv("match_player_item_purchase.csv", index=False, encoding='utf-8')
match_player_buyback.to_csv("match_player_buyback.csv", index=False, encoding='utf-8')
match_player_rune_log.to_csv("match_player_rune_log.csv", index=False, encoding='utf-8')