In [8]:
import torch
import torch.nn.functional as F


logits = torch.tensor([float("-inf"), 0., 2., 5.])
policy = F.softmax(logits, dim=-1)
log_policy = F.log_softmax(logits, dim=-1)
log_policy_masked_zeroed = torch.where(
    log_policy.isneginf(),
    torch.zeros_like(log_policy),
    log_policy
)

logits, policy, log_policy, policy * log_policy_masked_zeroed

(tensor([-inf, 0., 2., 5.]),
 tensor([0.0000, 0.0064, 0.0471, 0.9465]),
 tensor([   -inf, -5.0550, -3.0550, -0.0550]),
 tensor([ 0.0000, -0.0322, -0.1440, -0.0520]))

In [8]:
import gym
import numpy as np
import torch

p = 2
x = 32
y = 32

action_space = gym.spaces.Dict({
    "worker": gym.spaces.MultiDiscrete(np.zeros((4, p, x, y), dtype=int) + 8),
    "cart": gym.spaces.MultiDiscrete(np.zeros((4, p, x, y), dtype=int) + 4),
    "city_tile": gym.spaces.MultiDiscrete(np.zeros((1, p, x, y), dtype=int) + 2),
})

{
    key: (*space.shape, 1) for key, space in action_space.spaces.items()
}

{'cart': (4, 2, 32, 32, 1),
 'city_tile': (1, 2, 32, 32, 1),
 'worker': (4, 2, 32, 32, 1)}

In [2]:
import atexit
import gym
import json
from kaggle_environments import make
import math
import numpy as np
from pathlib import Path
import random
from scipy.stats import rankdata
from subprocess import Popen, PIPE
import sys
import tqdm
from typing import *

from lux_ai.lux.game import Game
from lux_ai.lux_gym.lux_env import LuxEnv
from lux_ai.lux_gym.obs_spaces import ObsSpace

DIR_PATH = Path("/home/isaiah/GitHub/Kaggle/Lux_AI_2021/lux_ai/")
dimension_process = Popen(
    ["node", str(DIR_PATH / "dimensions/main.js")],
    stdin=PIPE,
    stdout=PIPE
)

env = make("lux_ai_2021")
env.reset()

initiate = {
    "type": "start",
    "agent_names": [], # unsure if this is provided?
    "config": env.configuration
}
dimension_process.stdin.write((json.dumps(initiate) + "\n").encode())
dimension_process.stdin.flush()

agent1res = json.loads(dimension_process.stdout.readline())
agent2res = json.loads(dimension_process.stdout.readline())

game_state = Game()
game_state._initialize(agent1res)
game_state._update(agent1res)

Loading environment football failed: No module named 'gfootball'


In [6]:
class Observation(Dict[str, any]):
    def __init__(self, player=0):
        self.player = player


def generate_replay(seed: int, all_actions: list[list[str]]) -> dict:
    replay = {}
    replay["seed"] = seed
    replay["allCommands"] = []
    # Always random, right?
    replay["mapType"] = "random"
    replay["teamDetails"] = [{"name": "", "tournamentID": ""} for _ in range(2)]
    # TODO: This will not remain correct
    replay["version"] = "1.2.1"
    replay["results"] = {
        # TODO: These ranks are bogus
        "ranks": [{"rank": i+1, "agentID": i} for i in range(2)],
        "replayFile": ""
    }
    for both_actions in all_actions:
        commands = []
        for i, player_actions in enumerate(both_actions):
            commands.extend([
                {"command": act, "agentID": i}
                for act in player_actions
            ])
        replay["allCommands"].append(commands)
    return replay


def run_game(agent_constructors, seed = None) -> dict:
    # Initialize environment
    if seed is None:
        env.configuration["seed"] += 1
    else:
        env.configuration["seed"] = seed
    initiate = {
        "type": "start",
        "agent_names": [], # unsure if this is provided?
        "config": env.configuration
    }
    dimension_process.stdin.write((json.dumps(initiate) + "\n").encode())
    dimension_process.stdin.flush()

    agent1res = json.loads(dimension_process.stdout.readline())
    agent2res = json.loads(dimension_process.stdout.readline())
    
    # Initialize agents
    obses = [Observation(0), Observation(1)]
    obses[0]["updates"] = agent1res
    obses[1]["updates"] = agent2res
    agents = [ac(obs, None) for ac, obs in zip(agent_constructors, obses)]
    
    all_actions = []
    with tqdm.tqdm() as pbar:
        match_status = {
            "status": None
        }
        while match_status["status"] != "finished":
            actions = [a(obs, None) for a, obs in zip(agents, obses)]
            all_actions.append(actions)
            dimension_process.stdin.write((json.dumps([{"action": act} for act in actions]) + "\n").encode())
            dimension_process.stdin.flush()

            agent1res = json.loads(dimension_process.stdout.readline())
            agent2res = json.loads(dimension_process.stdout.readline())
            match_status = json.loads(dimension_process.stdout.readline())
            obses = [Observation(0), Observation(1)]
            obses[0]["updates"] = agent1res
            obses[1]["updates"] = agent2res
            pbar.update()
    
    return generate_replay(env.configuration["seed"], all_actions)

In [8]:
from lux_ai.rl_agent import rl_agent

custom_replay = run_game([rl_agent.RLAgent, rl_agent.RLAgent], seed=42)
with open("/home/isaiah/lux_ai/replays/notebook_replay.json", "w") as f:
    f.write(json.dumps(custom_replay))
custom_replay

235it [00:11, 20.51it/s]


{'seed': 42,
 'allCommands': [[{'command': 'm u_1 e', 'agentID': 0},
   {'command': 'r 1 9', 'agentID': 0},
   {'command': "dst 'Turn: 2'", 'agentID': 0},
   {'command': 'p u_2', 'agentID': 1},
   {'command': 'r 14 9', 'agentID': 1},
   {'command': "dst 'Turn: 2'", 'agentID': 1}],
  [{'command': 'm u_1 s', 'agentID': 0},
   {'command': "dst 'Turn: 3'", 'agentID': 0},
   {'command': 'm u_2 e', 'agentID': 1},
   {'command': "dst 'Turn: 3'", 'agentID': 1}],
  [{'command': "dst 'Turn: 4'", 'agentID': 0},
   {'command': 'm u_2 w', 'agentID': 1},
   {'command': "dst 'Turn: 4'", 'agentID': 1}],
  [{'command': 'bcity u_1', 'agentID': 0},
   {'command': "dst 'Turn: 5'", 'agentID': 0},
   {'command': "dst 'Turn: 5'", 'agentID': 1}],
  [{'command': 'bcity u_1', 'agentID': 0},
   {'command': "dst 'Turn: 6'", 'agentID': 0},
   {'command': 'm u_2 e', 'agentID': 1},
   {'command': "dst 'Turn: 6'", 'agentID': 1}],
  [{'command': 'bcity u_1', 'agentID': 0},
   {'command': "dst 'Turn: 7'", 'agentID': 0}

In [5]:
with open("/home/isaiah/lux_ai/replays/1628197737825_f8GCRakX9s8f.json", "r") as f:
    replay = json.loads(f.readline())
replay

{'seed': 479063488,
 'allCommands': [[{'command': 'r 5 8', 'agentID': 0},
   {'command': 'm u_1 c', 'agentID': 0},
   {'command': "dt 5 8 'Can_act:_True' 16", 'agentID': 0},
   {'command': "dst 'Turn: 2'", 'agentID': 0},
   {'command': 'r 5 23', 'agentID': 1},
   {'command': "dt 5 23 'Can_act:_True' 16", 'agentID': 1},
   {'command': "dst 'Turn: 2'", 'agentID': 1}],
  [{'command': 'm u_1 c', 'agentID': 0},
   {'command': "dt 5 8 'Can_act:_True' 16", 'agentID': 0},
   {'command': "dst 'Turn: 3'", 'agentID': 0},
   {'command': "dt 5 23 'Can_act:_True' 16", 'agentID': 1},
   {'command': "dst 'Turn: 3'", 'agentID': 1}],
  [{'command': 'm u_1 c', 'agentID': 0},
   {'command': "dt 5 8 'Can_act:_True' 16", 'agentID': 0},
   {'command': "dst 'Turn: 4'", 'agentID': 0},
   {'command': "dt 5 23 'Can_act:_True' 16", 'agentID': 1},
   {'command': "dst 'Turn: 4'", 'agentID': 1}],
  [{'command': 'm u_1 w', 'agentID': 0},
   {'command': "dt 5 8 'Can_act:_True' 16", 'agentID': 0},
   {'command': "dst '

In [2]:
import json
from kaggle_environments import make
import tqdm

from lux_ai.rl_agent import rl_agent

env = make("lux_ai_2021", debug=True)
obses = env.reset()
while obses[0]["observation"]["updates"][1] != "32 32":
    env = make("lux_ai_2021", debug=True)
    obses = env.reset()

all_actions = []
agents = [rl_agent.RLAgent(o["observation"], None) for o in obses]
with tqdm.tqdm() as pbar:
    while not env.done:
        actions = [a(o["observation"], None) for a, o in zip(agents, obses)]
        all_actions.append(actions)
        
        obses = env.step(actions)
        pbar.update()

31it [00:01, 16.11it/s]


In [5]:
agents[0].game_state.players[0].cities["c_1"].citytiles[0].pos

Position: (4, 6)

In [4]:
env.render(mode='ipython', width=1200, height=1000)
#
#json.loads(env.render(mode='json'))["steps"]

In [13]:
#env.render(mode='html')
#env.render(mode='ipython', height=700, width=800)
with open("/home/isaiah/lux_ai/replays/TEMP.json", 'w') as f:
    f.write(env.render(mode="json"))

In [16]:
json.loads(env.render(mode="json"))

{'configuration': {'actTimeout': 1,
  'episodeSteps': 361,
  'mapType': 'random',
  'runTimeout': 1200,
  'seed': 366889077},
 'description': 'A Novel AI Programming Challenge about Lux',
 'id': '7cca92da-f61d-11eb-a3b1-a85e45512f7a',
 'info': {},
 'name': 'lux_ai_2021',
 'rewards': [0, 0],
 'schema_version': 1,
 'specification': {'action': {'default': 0,
   'description': 'Actions',
   'type': 'array'},
  'agents': [2],
  'configuration': {'actTimeout': {'default': 1,
    'description': 'Maximum runtime (seconds) to obtain an action from an agent.',
    'minimum': 0,
    'type': 'number'},
   'episodeSteps': {'default': 361,
    'description': 'Maximum number of steps the environment can run. Total is this number -1. One complete game is 360 steps',
    'minimum': 2,
    'type': 'integer'},
   'mapType': {'default': 'random',
    'description': 'Map type to use',
    'type': 'string'},
   'parameters': {'description': 'Parameters for Lux AI 2021',
    'type': 'object'},
   'runTimeout

In [14]:
gym.spaces.MultiBinary([10], dtype=np.int64).dtype

TypeError: __init__() got an unexpected keyword argument 'dtype'

In [29]:
env.game_state.map.height, env.game_state.map.width, env.info.keys()

(32, 12, dict_keys(['actions_taken', 'available_actions_mask']))

In [31]:
env.game_state.players[0].units[0].pos

Position: (2, 14)

In [27]:
for i in range(1, 361):
    if (i - 1) % 40 >= 30:
        time = "NIGHT"
    else:
        time = "DAY"
    print(f"{i}: {time}")

1: DAY
2: DAY
3: DAY
4: DAY
5: DAY
6: DAY
7: DAY
8: DAY
9: DAY
10: DAY
11: DAY
12: DAY
13: DAY
14: DAY
15: DAY
16: DAY
17: DAY
18: DAY
19: DAY
20: DAY
21: DAY
22: DAY
23: DAY
24: DAY
25: DAY
26: DAY
27: DAY
28: DAY
29: DAY
30: DAY
31: NIGHT
32: NIGHT
33: NIGHT
34: NIGHT
35: NIGHT
36: NIGHT
37: NIGHT
38: NIGHT
39: NIGHT
40: NIGHT
41: DAY
42: DAY
43: DAY
44: DAY
45: DAY
46: DAY
47: DAY
48: DAY
49: DAY
50: DAY
51: DAY
52: DAY
53: DAY
54: DAY
55: DAY
56: DAY
57: DAY
58: DAY
59: DAY
60: DAY
61: DAY
62: DAY
63: DAY
64: DAY
65: DAY
66: DAY
67: DAY
68: DAY
69: DAY
70: DAY
71: NIGHT
72: NIGHT
73: NIGHT
74: NIGHT
75: NIGHT
76: NIGHT
77: NIGHT
78: NIGHT
79: NIGHT
80: NIGHT
81: DAY
82: DAY
83: DAY
84: DAY
85: DAY
86: DAY
87: DAY
88: DAY
89: DAY
90: DAY
91: DAY
92: DAY
93: DAY
94: DAY
95: DAY
96: DAY
97: DAY
98: DAY
99: DAY
100: DAY
101: DAY
102: DAY
103: DAY
104: DAY
105: DAY
106: DAY
107: DAY
108: DAY
109: DAY
110: DAY
111: NIGHT
112: NIGHT
113: NIGHT
114: NIGHT
115: NIGHT
116: NIGHT
117: NIGHT
1

In [11]:
cell = game_state.map.get_cell_by_pos(game_state.players[0].cities['c_1'].citytiles[0].pos)
cell.has_resource():

False

In [132]:
import itertools

for cell in itertools.chain(*game_state.map.map):
    pass

In [135]:
cell.road

0

In [107]:
player = game_state.players[0]
unit = player.units[0]
unit.cargo.coal

0

In [39]:
env.reset()
while not env.done:
    if (env.game_state.turn // 2) % 2 == 0:
        move = 'n'
    else:
        move = 's'
    # print(move)
    if len(env.game_state.players[0].units) > 0:
        actions = [[env.game_state.players[0].units[0].move(move)], []]
    else:
        actions = [[], []]
    returned = env.step(actions)

In [40]:
env.game_state.players[1].cities, env.game_state.players[0].units, returned

({}, [], (<lux_ai.lux.game.Game at 0x7f399f349640>, [0.0, 0.0], True, {}))

In [25]:
state = [{
    'action': [p.units[0].move('n'), list(p.cities.values())[0].citytiles[0].research()],
    #'reward': 0,
    #'info': {},
    #'observation': {},
    #'status': {}
} for p in game_state.players] * 2
print(state)
dimension_process.stdin.write((json.dumps(state) + "\n").encode())
dimension_process.stdin.flush()

[{'action': ['m u_1 n', 'r 5 7']}, {'action': ['m u_2 n', 'r 10 7']}, {'action': ['m u_1 n', 'r 5 7']}, {'action': ['m u_2 n', 'r 10 7']}]


In [24]:
[l for l in json.loads(dimension_process.stdout.readline()) if l.startswith('u') or l.startswith('rp')]

[]

In [28]:
json.loads(dimension_process.stdout.readline())

{'status': 'running', 'turn': 1, 'max': 361}

'm u_1 NORTH'

In [43]:
dimension_process.stdin.write((json.dumps(env.state) + "\n").encode())

2856

In [51]:
dimension_process.stdout.readline()

b'["rp 0 0","rp 1 0","r uranium 0 2 270","r coal 0 4 336","r coal 1 3 426","r coal 1 4 369","r coal 2 11 312","r wood 3 2 867","r wood 3 5 1164","r wood 4 0 869","r wood 4 2 1180","r wood 4 3 1204","r wood 4 5 1077","r wood 4 6 1026","r uranium 5 11 252","r coal 7 0 440","r wood 7 5 850","r wood 7 11 951","r wood 8 5 770","r wood 8 11 1122","r wood 9 0 866","r wood 9 1 1050","r wood 9 5 850","r wood 9 6 1221","r wood 9 7 1030","r wood 10 0 1260","r wood 10 6 1140","r wood 11 0 1261","r coal 11 9 360","r wood 11 10 1073","r wood 12 0 1261","r coal 12 9 360","r wood 12 10 1073","r wood 13 0 1260","r wood 13 6 1140","r wood 14 0 866","r wood 14 1 1050","r wood 14 5 850","r wood 14 6 1221","r wood 14 7 1030","r wood 15 5 770","r wood 15 11 1122","r coal 16 0 440","r wood 16 5 850","r wood 16 11 951","r uranium 18 11 252","r wood 19 0 869","r wood 19 2 1180","r wood 19 3 1204","r wood 19 5 1077","r wood 19 6 1026","r wood 20 2 867","r wood 20 5 1164","r coal 21 11 312","r coal 22 3 426","r 

In [18]:
game_state._update(agent1res)

In [41]:
env.step([['f'], ['f']])

[{'action': ['f'],
  'reward': 1001,
  'info': {},
  'observation': {'remainingOverageTime': 60,
   'step': 3,
   'reward': 1001,
   'player': 0,
   'updates': ['rp 0 0',
    'rp 1 0',
    'r uranium 0 2 270',
    'r coal 0 4 336',
    'r coal 1 3 426',
    'r coal 1 4 369',
    'r coal 2 11 312',
    'r wood 3 2 867',
    'r wood 3 5 1164',
    'r wood 4 0 869',
    'r wood 4 2 1180',
    'r wood 4 3 1204',
    'r wood 4 5 1077',
    'r wood 4 6 1026',
    'r uranium 5 11 252',
    'r coal 7 0 440',
    'r wood 7 5 850',
    'r wood 7 11 951',
    'r wood 8 5 790',
    'r wood 8 11 1122',
    'r wood 9 0 866',
    'r wood 9 1 1050',
    'r wood 9 5 850',
    'r wood 9 6 1221',
    'r wood 9 7 1030',
    'r wood 10 0 1260',
    'r wood 10 6 1140',
    'r wood 11 0 1261',
    'r coal 11 9 360',
    'r wood 11 10 1073',
    'r wood 12 0 1261',
    'r coal 12 9 360',
    'r wood 12 10 1073',
    'r wood 13 0 1260',
    'r wood 13 6 1140',
    'r wood 14 0 866',
    'r wood 14 1 1050',
   