In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from kaggle_environments import make
import sys

Loading environment football failed: No module named 'gfootball'


In [3]:
sys.path.append('/Users/jaime/Documents/MachineLearning/LuxAI/LuxAI-agents/')
from agent_loader import AgentLoader
from agents.rl_agents import RLAgent
from agents.dqn import DQNAgent



## Proper training

In [29]:
"""
sources:
    - https://github.com/Kaggle/kaggle-environments#training
    - https://github.com/Lux-AI-Challenge/Lux-Design-2021/blob/master/analysis/.ipynb_checkpoints/stepandreset-checkpoint.ipynb
    - https://www.kaggle.com/aithammadiabdellatif/keras-lux-ai-reinforcement-learning/comments
"""

import datetime
import torch
from itertools import count

from lux.game import Game
from agents.dqn import DQNAgent

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

agent = DQNAgent(team=0, training_mode=True)

# Make environment
env = make("lux_ai_2021", configuration={
    "seed": 562124210, 
#     "loglevel": 2, 
#     "annotations": True
}, debug=True)

# Training agent in first position (player 1) against "simple_agent"
trainer = env.train([None, "simple_agent"])

num_episodes = 1
episode_stats = []
for i_episode in range(num_episodes):
    
    start_time = datetime.datetime.now()
    
    # Re-init environment
    obs = trainer.reset()
    game_state = Game()
    game_state._initialize(obs["updates"])
    game_state._update(obs["updates"][2:])
    game_state.id = obs.player
    o = agent.get_observation_as_tensor(game_state)
    state = o - o

    for t in count():

        # Normal on-line operation ----------------------------------
        actions, action_codes = agent.get_actions(game_state)
        
        obs, reward, done, info = trainer.step(actions)
        game_state._update(obs["updates"])

        # Learning stuff --------------------------------------------
        if done:
            reward = torch.tensor([reward], device=device)
        else:
            reward = torch.tensor([obs['reward']], device=device)
        
        # Observe new state ahead of time to add to memory
        current_obs = agent.get_observation_as_tensor(game_state)
        if not done:
            next_state = current_obs - agent.last_obs
        else:
            next_state = None
        
        # Store the transition in memory
        agent.memory.push(state, action_codes, next_state, reward)

        # Move to next state
        state = next_state
        
        # Perform one step of the optimization (on the policy network)
        agent.optimize_model()
        # ---------------------------------------------------------------
        
        if done:
            time_cost = datetime.datetime.now() - start_time
            episode_stats.append([obs["step"], reward.item(), time_cost.total_seconds()])
            
#             print(f'Episode0 {i_episode}: {obs["step"]} turns, {reward.item()} points, {time_cost.total_seconds():.2} sec\n')

            # with open(f"replay-{eps}.json", "w") as f: # here we can save the replay as json for later
            #     f.write(env.render(mode="json"))

            break
            
    # Update the target network, copying all weights and biases in DQN
    if i_episode % agent.TARGET_UPDATE == 0:
        agent.target_net.load_state_dict(agent.policy_net.state_dict())

print(f'COMPLETED {num_episodes} EPISODES OF TRAINING')
env.render(mode="ipython", width=1000, height=500)

{'remainingOverageTime': 60, 'step': 0, 'width': 32, 'height': 32, 'reward': 0, 'globalUnitIDCount': 2, 'globalCityIDCount': 2, 'player': 0, 'updates': ['0', '32 32', 'rp 0 0', 'rp 1 0', 'r wood 0 29 316', 'r wood 1 24 342', 'r wood 1 25 323', 'r uranium 1 31 332', 'r wood 2 24 329', 'r wood 2 25 397', 'r wood 2 26 364', 'r uranium 2 31 346', 'r wood 3 28 800', 'r wood 4 10 318', 'r wood 4 11 398', 'r uranium 4 17 336', 'r wood 4 27 800', 'r wood 4 28 800', 'r wood 5 10 308', 'r wood 5 11 326', 'r uranium 5 17 335', 'r coal 7 31 355', 'r coal 8 0 369', 'r coal 8 30 404', 'r coal 8 31 422', 'r wood 11 23 317', 'r wood 11 24 330', 'r wood 11 25 314', 'r wood 12 23 349', 'r wood 12 24 390', 'r wood 13 6 337', 'r wood 13 7 325', 'r wood 13 24 374', 'r wood 14 6 369', 'r wood 14 7 387', 'r coal 14 16 393', 'r coal 15 14 379', 'r coal 15 15 411', 'r coal 15 16 403', 'r coal 16 14 379', 'r coal 16 15 411', 'r coal 16 16 403', 'r wood 17 6 369', 'r wood 17 7 387', 'r coal 17 16 393', 'r wood 1

In [None]:
"""
https://pytorch.org/tutorials/beginner/saving_loading_models.html
"""
model_path = '/Users/jaime/Documents/MachineLearning/LuxAI/LuxAI-agents/models/'
timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%s')
model_name = f'dqn-target_net-{timestamp}.pt'

# Save model
torch.save(agent.target_net.state_dict(), model_path+model_name)

# Load model
# model = TheModelClass(*args, **kwargs)
# model.load_state_dict(torch.load(model_path))
# model.eval()

## Test the agent loader

In [4]:
from agents.random_agents import RandomContextualAgent, RandomMaturingAgent
agent = AgentLoader(agent_class=RandomMaturingAgent).game_loop

In [24]:
# Test run in environment

env = make("lux_ai_2021", configuration={
#     "seed": 562124210, 
#     "loglevel": 2, 
#     "annotations": True
}, debug=True)

steps = env.run([agent, "simple_agent"])
print(len(steps)-1)
env.render(mode="ipython", width=1000, height=800)

31


NameError: name 'game_state' is not defined

## Train agents

In [None]:
"""
sources:
    - https://github.com/Kaggle/kaggle-environments#training
    - https://github.com/Lux-AI-Challenge/Lux-Design-2021/blob/master/analysis/.ipynb_checkpoints/stepandreset-checkpoint.ipynb
    - https://www.kaggle.com/aithammadiabdellatif/keras-lux-ai-reinforcement-learning/comments
"""
from lux.game import Game

# Initialise training agent
myagent = RLAgent(team=0)

# Make environment
env = make("lux_ai_2021", configuration={
#     "seed": 562124210, 
#     "loglevel": 2, 
#     "annotations": True
}, debug=True)

# Training agent in first position (player 1) against "simple_agent"
trainer = env.train([None, "simple_agent"])
obs = trainer.reset()
game_state = Game()
game_state._initialize(obs["updates"])
game_state._update(obs["updates"][2:])
game_state.id = obs.player
eps = 1

max_steps = 359
for turn in range(1, max_steps):

    # AGENT INTERACTION HERE
    # I guess reward shaping would happen inside get_actions
    actions = myagent.get_actions(game_state)
    
    obs, reward, done, info = trainer.step(actions)
    game_state._update(obs["updates"])
    
    if done:
        print(f'Episode {eps} complete.')
        print({"reward": reward,              # reward (final) is only given at the end of an episode (when "done")
               "obs_reward": obs["reward"],   # every observation (start of turn) also has an observation reward
               "obs_step": obs["step"]})      # print number of steps in this episode (to see if it made it to the end)
        
        # with open(f"replay-{eps}.json", "w") as f: # here we can save the replay as json for later
        #     f.write(env.render(mode="json"))
        
        # TODO: Perform parameter updates based on reward
        # reward_history = [ob['reward'] for ob in obs] # FIXME
        # reward_history.append(reward)  # Final reward
        # myagent.update_params(reward_history)
        
        # Re-init environment
        obs = trainer.reset()
        game_state = Game()
        game_state._initialize(obs["updates"])
        game_state._update(obs["updates"][2:])
        game_state.id = obs.player
        eps += 1
        
print(f'Episode {eps} complete.')
print({"reward": reward, 
       "obs_reward": obs["reward"],
       "obs_step": obs["step"]})
env.render(mode="ipython", width=1000, height=500)