上位チームの戦略を知ることは、シミュレーションコンペではとても重要です。過去のシミュレーションコンペ（Halite & Football）では、上位チームの対戦ログを学習したチームが金メダルを取得しています。
このnotebookでは、最上位チーム(nagiss)の対戦ログをダウンロードします。

Knowing the top team's strategy is very important in Simulation competition. In past simulation competitions (Halite & Football), the team that learned from logs of the top teams got the gold medal.
In this notebook, we download the match logs of the top team(nagiss).

https://www.kaggle.com/masatomatsui/santa-episode-scraper
https://www.kaggle.com/lebroschar/1000-greedy-decision-tree-model

In [None]:
import pandas as pd
import numpy as np
import os
import requests
import json
import datetime
import time

In [None]:
BUFFER = 1
MIN_SCORE = 1150 #ignore games with teams below this ranking

base_url = "https://www.kaggle.com/requests/EpisodeService/"
get_url = base_url + "GetEpisodeReplay"
list_url = base_url + "ListEpisodes"

In [None]:
# inital team list

r = requests.post(list_url, json = {"submissionId":  18703109}) # ID is sample value
rj = r.json()

teams_df = pd.DataFrame(rj['result']['teams'])

In [None]:
teams_df.sort_values('publicLeaderboardRank', inplace = True)
teams_df.head(10)

## Get Topteam(nagiss) Episode

In [None]:
def get_filename(ep_id):
    return 'games/{}.json'.format(ep_id)

def saveEpisode(sub_id, epid, rj):
    # request
    re = requests.post(get_url, json = {"EpisodeId": int(epid)})
        
    # save replay
    with open(get_filename(epid), 'w') as f:
        f.write(re.json()['result']['replay'])

    # save episode info
#     with open('{}_{}_info.json'.format(sub_id, epid), 'w') as f:
#         json.dump([r for r in rj['result']['episodes'] if r['id']==epid][0], f)

In [None]:
def saveEpisodesForSubmission(sub):
    start_time = datetime.datetime.now()
    r = BUFFER;
    result = requests.post(list_url, json = {"submissionId":  int(sub)})
    team_json = result.json()
    team_df = pd.DataFrame(team_json['result']['episodes'])
    team_df.head()
    print('{} games for {}'.format(len(team_df), sub))

    for i in range(len(team_df)):
        agents = team_df.agents.iloc[i]
        if agents[0]['initialScore'] is None or agents[0]['initialScore'] is None:
            continue
        
        if agents[0]['initialScore'] < MIN_SCORE or agents[0]['initialScore'] < MIN_SCORE:
            continue
        
        epid = team_df.id.iloc[i]
        
        filename = get_filename(epid)
        if os.path.exists(filename):
            continue
    
        saveEpisode(sub, epid, team_json); r+=1;
        try:
            size = os.path.getsize(filename) / 1e6
            print('Saved Episode #{} @ {:.1f}MB'.format(i, size), end='\r')
        except:
            print('file {}.json did not seem to save'.format(epid))    
        if r > (datetime.datetime.now() - start_time).seconds:
            time.sleep( r - (datetime.datetime.now() - start_time).seconds)
    
    print('\r')
    

In [None]:
for sub_id in teams_df['publicLeaderboardSubmissionId'].head(10):
    saveEpisodesForSubmission(sub_id)

# Parsing Game Files

In [None]:
import json

def get_game_data(filename):
    with open(filename) as f:
        data = json.load(f)

    game_id = data['info']['EpisodeId']
    agent_names = data['info']['TeamNames']
    print("Parsing game", game_id, agent_names)
    
    df = pd.DataFrame(columns=[])

    for agent in [0, 1]:
        d = {'game_id':[], 'agent':[], 'step':[], 'action':[], 'total':[]}
    
        # skip first step because it has no data in it
        for step in data['steps'][1:]:
            d['game_id'].append(game_id)
            d['agent'].append(agent_names[agent])
            d['step'].append(step[0]['observation']['step'])
            d['action'].append(step[agent]['action'])
            d['total'].append(step[agent]['reward'])
    
        df_a = pd.DataFrame(data=d, columns=['game_id', 'agent', 'step', 'action', 'total'])
    
        df_a['reward'] = df_a.total - df_a.total.shift(1, fill_value=0)
        df_a['prev_reward'] = df_a.reward.shift(1, fill_value=0)
        df_a['prev_action'] = df_a.action.shift(1, fill_value=0)
    
        df = df.append(df_a, ignore_index=True)

    for agent in agent_names:
        # convert to list because indixes don't match
        df.loc[df.agent == agent, 'prev_opp_action'] = df[df.agent != agent].action.shift(1, fill_value=0).tolist()

    df = df.astype({'prev_opp_action':'int32'})
    return df

# parses a game file into training data for a ML model
def get_training_data(filename):
    with open(filename) as f:
        data = json.load(f)

    game_id = data['info']['EpisodeId']
    agent_names = data['info']['TeamNames']
    num_machines = data['configuration']['banditCount']
    print("Parsing game", game_id, agent_names)
    
    # step machine agent n_pulls n_success n_opp_pulls threshold
    d = {'step':[], 
         'machine': [],
         'agent': [],
         'n_pulls': [],
         'n_success': [],
         'n_opp_pulls': [],
         'streak': [],
         'opp_streak': [],
         'win_streak': [],
         'threshold': []}
    
    agent_states = {'reward_0': 0, 'last_reward_0': 0, 'reward_1': 0, 'last_reward_1': 0}
    machine_states = [
        {'pulls_0': 0, 
         'success_0': 0, 
         'streak_0': 0, 
         'win_streak_0': 0,
         'pulls_1': 0,
         'success_1': 0,
         'streak_1': 0, 
         'win_streak_1': 0 } for i in range(num_machines)
    ]
    
    # add initial thresholds
    for m in range(num_machines):
        d['step'].append(0)
        d['machine'].append(m)
        d['agent'].append(0)
        d['n_pulls'].append(0)
        d['n_success'].append(0)
        d['n_opp_pulls'].append(0)
        d['streak'].append(0)
        d['opp_streak'].append(0)
        d['win_streak'].append(0)
        d['threshold'].append(data['steps'][0][0]['observation']['thresholds'][m]/100)

    for step in data['steps'][1:]:
        if step[0]['status'] =='INVALID' or step[1]['status'] == 'INVALID':
            # just quit parsing if one of the agents screwed something up
            break
        
        # update agent states
        for agent in [0, 1]:
            agent_states[f"last_reward_{agent}"] = step[agent]['reward']- agent_states[f"reward_{agent}"]
            agent_states[f"reward_{agent}"] = step[agent]['reward']
            
        # update machine states
        for agent in [0, 1]:
            action = step[agent]['action']
            machine_states[action][f"pulls_{agent}"] += 1
            machine_states[action][f"success_{agent}"] += agent_states[f"last_reward_{agent}"]
            
            # update streaks
            for m in range(num_machines):
                if m == step[agent]['action']:
                    machine_states[m][f"streak_{agent}"] += 1
                else:
                    machine_states[m][f"streak_{agent}"] = 0
            
            # update win streaks
            if agent_states[f"last_reward_{agent}"]:
                machine_states[action][f"win_streak_{agent}"] += 1
            else:
                machine_states[action][f"win_streak_{agent}"] = 0
        
        # update training data for each selected machine
        for agent in [0, 1]:
            for machine in step[0]['observation']['lastActions']:
                opp = 1 if agent == 0 else 0
                d['step'].append(step[0]['observation']['step'])
                d['machine'].append(machine)
                d['agent'].append(agent)
                d['n_pulls'].append(machine_states[machine][f"pulls_{agent}"])
                d['n_success'].append(machine_states[machine][f"success_{agent}"])
                d['n_opp_pulls'].append(machine_states[machine][f"pulls_{opp}"])
                d['streak'].append(machine_states[machine][f"streak_{agent}"])
                d['opp_streak'].append(machine_states[machine][f"streak_{opp}"])
                d['win_streak'].append(machine_states[machine][f"win_streak_{agent}"])
                d['threshold'].append(step[0]['observation']['thresholds'][machine]/100)

    df = pd.DataFrame(data = d)
    return df
    

In [None]:
# load training game files into one data frame
for dirname, _, filenames in os.walk('games'):
    print(f"Found {len(filenames)} game files")
    train_data = pd.concat([get_training_data(os.path.join(dirname, f)) for f in filenames], ignore_index=True)

print(f"\nFound {train_data.shape[0]} training rows")
train_data.to_parquet('training_data_win_streak.parquet')
train_data.head(1000000).to_parquet('training_data_win_streak_1_000_000.parquet')