In [1]:
%%capture
# install the latest version of kaggle_environments
!pip install --upgrade kaggle_environments

In [3]:
import os
from KoreGymEnv import KoreGymEnv
from kaggle_environments import make
from kaggle_environments.envs.kore_fleets.kore_fleets import random_agent, attacker_agent, balanced_agent, miner_agent

from stable_baselines3.common.env_checker import check_env
from stable_baselines3.ppo import MlpPolicy
from stable_baselines3 import PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common import results_plotter

from matplotlib import pyplot as plt

from tqdm.notebook import tqdm

import pandas as pd

from random import shuffle
from KoreGymEnvHelper import transform_observation, transform_actions

N_CPU = os.cpu_count()
print('CPU Cores =', N_CPU)

test_env = KoreGymEnv(random_agent)
check_env(test_env)

CPU Cores = 16


TypeError: only integer scalar arrays can be converted to a scalar index

In [2]:
LOG_DIR = './log/'
MODEL_DIR = '../input/model'
MODEL_FILE = 'model.pkl'
os.makedirs(LOG_DIR, exist_ok=True)
os.makedirs(MODEL_DIR, exist_ok=True)

def make_env(opponent,rank=0):
    def _init():
        env = KoreGymEnv(opponent)
        log_file = os.path.join(LOG_DIR, str(rank))
        env = Monitor(env, log_file, allow_early_resets=True)
        return env
    return _init

class ProgressBar(BaseCallback):
    def __init__(self, verbose=0):
        super(ProgressBar, self).__init__(verbose)
        self.pbar = None

    def _on_training_start(self):
        self.pbar = tqdm(total=self.locals['total_timesteps'])

    def _on_rollout_start(self):
        self.pbar.refresh()

    def _on_step(self):
         self.pbar.update()

    def _on_training_end(self):
        self.pbar.close()
        self.pbar = None

def get_actions(model, obs, config, deterministic=False):
    x_obs = transform_observation(False, obs, config, 12)
    actions, state = model.predict(x_obs, deterministic=deterministic)
    next_actions = transform_actions(actions, obs, config)
    return next_actions

def run_test(model, opponent,deterministic=False):
    env = make('kore_fleets', debug=True)
    config = env.configuration

    game_agents = [None, opponent]
    shuffle(game_agents)

    print('Agents:', game_agents)

    trainer = env.train(game_agents)
    obs = trainer.reset()

    while not env.done:
        actions = get_actions(model, obs, config, deterministic=deterministic)
        obs, reward, done, info = trainer.step(actions)

    env.render(mode='ipython', width=640, height=480)


In [4]:
opponent = random_agent
env = DummyVecEnv([make_env(opponent)])
try:
    model_path = os.path.join(MODEL_DIR, MODEL_FILE)
    model = PPO.load(model_path)
    model.set_env(env)
    print('Loaded model')

except:
    print('Making model')
    model = PPO(policy=MlpPolicy,
                env=env,
                verbose=0,
                n_steps=400,
                seed=None,
                batch_size=50,
                _init_setup_model=True,
                learning_rate=0.00025,
                gamma=0.99,
                ent_coef=0.01,
                vf_coef=0.5,
                max_grad_norm=0.5,
                policy_kwargs=None,
                tensorboard_log=None)

Making model


In [5]:
TIMESTEPS = 1000000
progressbar = ProgressBar()
model = model.learn(total_timesteps=TIMESTEPS, callback=progressbar)

model.save(MODEL_FILE)

  0%|          | 0/5 [00:00<?, ?it/s]

[ 0  1 18  9  1 45]
[ 0  1 18  1  1 34]
[1 1 1 1 1 4]
[ 0  0 14  8  1 39]
[ 1  0 18  3  0 67]
[ 1  0  1 10  1 66]
[ 0  0  7  1  0 32]
[ 2  1  9  1  1 92]
flight plan will be truncated: flight plan for 1 must be at most 1
flight plan will be truncated: flight plan for 1 must be at most 1
flight plan will be truncated: flight plan for 1 must be at most 1
[2 0 9 7 1 9]
[ 2  1 19 13  1 97]
[ 2  0  4 19  1  6]
[ 1  1 16 14  0 66]
[ 1  1  7 15  0 39]
[ 0  1  6  0  1 75]
[ 2  1  0 12  1 46]
flight plan will be truncated: flight plan for 1 must be at most 1
flight plan will be truncated: flight plan for 1 must be at most 1
flight plan will be truncated: flight plan for 1 must be at most 1
[ 0  1 13  5  0 70]
[ 2  1  0  3  1 17]
[ 1  0 18 17  0 60]
[ 2  1 19  4  1 48]
flight plan will be truncated: flight plan for 1 must be at most 1
flight plan will be truncated: flight plan for 1 must be at most 1
flight plan will be truncated: flight plan for 1 must be at most 1
[ 0  1 16  4  0 11]
[ 1  1 16

KeyboardInterrupt: 

In [None]:


plt.style.use(['seaborn-whitegrid'])

results_plotter.plot_results([LOG_DIR], TIMESTEPS,
                             results_plotter.X_TIMESTEPS, 'Kore Timesteps')

results_plotter.plot_results([LOG_DIR], TIMESTEPS,
                             results_plotter.X_EPISODES, 'Kore Episodes')

In [None]:
log_files = [os.path.join(LOG_DIR, 'monitor.csv')]

for i, log_file in enumerate(log_files):
  if os.path.isfile(log_file):
    df = pd.read_csv(log_file, skiprows=1)

    fig = plt.figure(figsize=(8, 2))
    plt.subplot(1, 2, 1, label=log_file)
    df['r'].rolling(window=TIMESTEPS//1000).mean().plot(title=f'Rewards {i}')

    plt.subplot(1, 2, 2, label=log_file)
    df['l'].rolling(window=TIMESTEPS//1000).mean().plot(title=f'Lengths {i}')

    plt.tight_layout()
    plt.show()

In [None]:
run_test(model,opponent,deterministic=True)

In [None]:
run_test(model, opponent,deterministic=False)

In [None]:
run_test(model, deterministic=TRUE)

In [None]:
run_test(model, deterministic=False)