In [1]:
# from psutil import virtual_memory
#
# gpu_info = !nvidia-smi
# gpu_info = '\n'.join(gpu_info)
# if gpu_info.find('failed') >= 0:
#   print('Not connected to a GPU')
# else:
#   print(gpu_info)
#
# ram_gb = virtual_memory().total / 1e9
# print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))
#
# if ram_gb < 20:
#   print('Not using a high-RAM runtime')
# else:
#   print('You are using a high-RAM runtime!')
#
# %tensorflow_version 2.x
# import tensorflow as tf
# print("Tensorflow version " + tf.__version__)
#
# try:
#   tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  # TPU detection
#   print('Running on TPU ', tpu.cluster_spec().as_dict()['worker'])
# except ValueError:
#   raise BaseException('ERROR: Not connected to a TPU runtime; please see the previous cell in this notebook for instructions!')
#
# tf.config.experimental_connect_to_cluster(tpu)
# tf.tpu.experimental.initialize_tpu_system(tpu)
# tpu_strategy = tf.distribute.TPUStrategy(tpu)

In [2]:
%%capture
# install the latest version of kaggle_environments
!pip install kaggle_environments
!pip install gym
!pip install stable-baselines3

In [3]:
# import shutil
#
# shutil.rmtree('Kore_Bot', ignore_errors=True)
# !git clone -b RL-Main https://github.com/DerrianHarris/Kore_Bot.git

In [4]:
import os
# import sys
# from google.colab import drive

DRIVE_LOC = './'#os.path.join('drive','MyDrive','Kore_Bot')
#
# drive.mount('/content/drive')
# sys.path.append('/content/Kore_Bot')

In [5]:
import gym
from KoreGymEnv import KoreGymEnv
from KoreGymEnvHelper import transform_observation, transform_actions

from kaggle_environments import make
from kaggle_environments.envs.kore_fleets.kore_fleets import random_agent, attacker_agent, balanced_agent, miner_agent

from stable_baselines3.common.env_checker import check_env
from stable_baselines3.ppo import MlpPolicy
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.vec_env import VecCheckNan, VecNormalize, DummyVecEnv, VecMonitor, SubprocVecEnv
from stable_baselines3.common import results_plotter
from stable_baselines3.common.callbacks import CallbackList, EvalCallback, CheckpointCallback
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.utils import set_random_seed

from matplotlib import pyplot as plt

from tqdm.notebook import tqdm

import pandas as pd

from random import shuffle

from gym.envs.registration import register
import torch as th
import numpy as np

#th.autograd.set_detect_anomaly(True)
#np.seterr(all='raise')

MAX_EPISODE_STEPS = 400

# Example for the CartPole environment
register(
    # unique identifier for the env `name-version`
    id="Kore-v1",
    # path to the class for creating the env
    # Note: entry_point also accept a class as input (and not only a string)
    entry_point=KoreGymEnv,
    # Max number of steps per episode, using a `TimeLimitWrapper`
    max_episode_steps=MAX_EPISODE_STEPS,
)

env = gym.make('Kore-v1',opponent=random_agent)#KoreGymEnv(random_agent)
check_env(env)

  f"It seems that your observation {key} is an image but the `dtype` "
  f"It seems that your observation space {key} is an image but the "
  "The minimal resolution for an image is 36x36 for the default `CnnPolicy`. "


In [11]:
LOG_DIR = os.path.join(DRIVE_LOC,'log')
MODEL_DIR = os.path.join(DRIVE_LOC,'input','model')
MODEL_FILE = 'best_model.zip'

best_model_path = os.path.join(MODEL_DIR, 'Best_Model')
inc_model_path = os.path.join(MODEL_DIR, 'Inc_Models')

best_model_file = os.path.join(best_model_path, MODEL_FILE)
#log_file = os.path.join(LOG_DIR,'0.monitor.csv')

os.makedirs(LOG_DIR, exist_ok=True)
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(best_model_path, exist_ok=True)
os.makedirs(inc_model_path, exist_ok=True)

class ProgressBar(BaseCallback):
    def __init__(self, verbose=0):
        super(ProgressBar, self).__init__(verbose)
        self.pbar = None

    def _on_training_start(self):
        self.pbar = tqdm(total=self.locals['total_timesteps'])

    def _on_rollout_start(self):
        self.pbar.refresh()

    def _on_step(self):
         self.pbar.update()

    def _on_training_end(self):
        self.pbar.close()
        self.pbar = None

def get_actions(model, obs, config, deterministic=False):
    x_obs = transform_observation(False, obs, config, 14)
    actions, state = model.predict(x_obs, deterministic=deterministic)
    next_actions = transform_actions(actions, obs, config)
    return next_actions

def run_test(model, opponent,deterministic=False):
    env = make('kore_fleets', debug=True)
    config = env.configuration

    game_agents = [None, opponent]
    shuffle(game_agents)

    print('Agents:', game_agents)

    trainer = env.train(game_agents)
    obs = trainer.reset()

    while not env.done:
        actions = get_actions(model, obs, config, deterministic=deterministic)
        obs, reward, done, info = trainer.step(actions)

    env.render(mode='ipython', width=640, height=480)
  
def make_env(env_id, opponent,rank, seed=0):
    """
    Utility function for multiprocessed env.

    :param env_id: (str) the environment ID
    :param num_env: (int) the number of environments you wish to have in subprocesses
    :param seed: (int) the inital seed for RNG
    :param rank: (int) index of the subprocess
    """
    def _init():
        env = gym.make(env_id,opponent=opponent)
        env.seed(seed + rank)
        return env
    set_random_seed(seed)
    return _init

def load_model(file,env):
    model = PPO.load(file)
    model.set_env(env)
    return model


In [8]:
opponent = random_agent

NUM_GAMES = 1000

TIMESTEPS = NUM_GAMES * MAX_EPISODE_STEPS

N_CPU = 8#os.cpu_count()
print('CPU Cores =', N_CPU)

if __name__ == '__main__':  
    env = make_vec_env('Kore-v1',n_envs=N_CPU,env_kwargs={'opponent':opponent})
    try:
        model = load_model(best_model_file,env)
        print('Loaded model')
    except:
        print('Making model')
        model = PPO(policy='MlpPolicy',
            env=env,
            verbose=0,
            n_steps=MAX_EPISODE_STEPS,
            batch_size=50,
            learning_rate=0.00025,
            policy_kwargs={'use_expln': True,'normalize_images':False, 'optimizer_class': th.optim.Adam, 'optimizer_kwargs' :{ 'eps' : 1e-04 }})



CPU Cores = 8
Making model


In [None]:
progressbar = ProgressBar()
checkpoint_callback = CheckpointCallback(save_freq=400, save_path=inc_model_path)
eval_callback = EvalCallback(env, best_model_save_path=best_model_path,
log_path=LOG_DIR, eval_freq=4000)
callback = CallbackList([progressbar, eval_callback,checkpoint_callback])
#np.set_printoptions(threshold=sys.maxsize)
model = model.learn(total_timesteps=TIMESTEPS, callback=callback)

In [None]:
# plt.style.use(['seaborn-whitegrid'])

# results_plotter.plot_results([LOG_DIR], TIMESTEPS, results_plotter.X_TIMESTEPS, 'Kore Timesteps')
# results_plotter.plot_results([LOG_DIR], TIMESTEPS, results_plotter.X_EPISODES, 'Kore Episodes')

In [None]:
log_files = [os.path.join(LOG_DIR, f'{i}.monitor.csv') for i in range(N_CPU)]

for i, log_file in enumerate(log_files):
  if os.path.isfile(log_file):
    df = pd.read_csv(log_file, skiprows=1)

    fig = plt.figure(figsize=(8, 2))
    plt.subplot(1, 2, 1, label=log_file)
    df['r'].rolling(window=TIMESTEPS//1000).mean().plot(title=f'Rewards {i}')

    plt.subplot(1, 2, 2, label=log_file)
    df['l'].rolling(window=TIMESTEPS//1000).mean().plot(title=f'Lengths {i}')

    plt.tight_layout()
    plt.show()

In [13]:
model_to_load = os.path.join(inc_model_path,'rl_model_144000_steps.zip')
eval_env = make_vec_env('Kore-v1',n_envs=1,env_kwargs={'opponent':opponent})
model = load_model(model_to_load,eval_env)
run_test(model, opponent, deterministic=False)

Agents: [<function random_agent at 0x0000025AFAE3E948>, None]
flight plan will be truncated: flight plan for 4 must be at most 3
flight plan will be truncated: flight plan for 4 must be at most 3
flight plan will be truncated: flight plan for 4 must be at most 3
flight plan will be truncated: flight plan for 1 must be at most 1
flight plan will be truncated: flight plan for 1 must be at most 1
flight plan will be truncated: flight plan for 1 must be at most 1
flight plan will be truncated: flight plan for 1 must be at most 1
flight plan will be truncated: flight plan for 1 must be at most 1
flight plan will be truncated: flight plan for 1 must be at most 1
flight plan will be truncated: flight plan for 5 must be at most 4
flight plan will be truncated: flight plan for 5 must be at most 4
flight plan will be truncated: flight plan for 5 must be at most 4
flight plan will be truncated: flight plan for 2 must be at most 2
flight plan will be truncated: flight plan for 2 must be at most 2
