# Lux AI Deep Reinforcement Learning Environment Example
See https://github.com/glmcdona/LuxPythonEnvGym for environment project and updates.

This is a python replica of the Lux game engine to speed up training. It reformats the agent problem into making a action decision per-unit for the team.

In [None]:
!pip install git+https://github.com/glmcdona/LuxPythonEnvGym.git
!pip install kaggle-environments -U

## Use GPU if available
Note: GPU provides very little speedup. I recommend using a CPU-only notebook usually.

In [1]:
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device
print(device)

cuda


# Define the RL agent logic
Edit this agent logic to implement your own observations, action space, and reward shaping.

In [8]:
%%writefile agent_policy.py
from luxai2021.game.match_controller import ActionSequence
import sys
import time
from functools import partial  # pip install functools

import numpy as np
from gym import spaces
import copy
import random

from luxai2021.env.agent import Agent
from luxai2021.game.actions import *
from luxai2021.game.game_constants import GAME_CONSTANTS
from luxai2021.game.position import Position


# https://codereview.stackexchange.com/questions/28207/finding-the-closest-point-to-a-list-of-points
def closest_node(node, nodes):
    dist_2 = np.sum((nodes - node) ** 2, axis=1)
    return np.argmin(dist_2)
def furthest_node(node, nodes):
    dist_2 = np.sum((nodes - node) ** 2, axis=1)
    return np.argmax(dist_2)

def smart_transfer_to_nearby(game, team, unit_id, unit, target_type_restriction=None, **kwarg):
    """
    Smart-transfers from the specified unit to a nearby neighbor. Prioritizes any
    nearby carts first, then any worker. Transfers the resource type which the unit
    has most of. Picks which cart/worker based on choosing a target that is most-full
    but able to take the most amount of resources.

    Args:
        team ([type]): [description]
        unit_id ([type]): [description]

    Returns:
        Action: Returns a TransferAction object, even if the request is an invalid
                transfer. Use TransferAction.is_valid() to check validity.
    """

    # Calculate how much resources could at-most be transferred
    resource_type = None
    resource_amount = 0
    target_unit = None

    if unit != None:
        for type, amount in unit.cargo.items():
            if amount > resource_amount:
                resource_type = type
                resource_amount = amount

        # Find the best nearby unit to transfer to
        unit_cell = game.map.get_cell_by_pos(unit.pos)
        adjacent_cells = game.map.get_adjacent_cells(unit_cell)

        
        for c in adjacent_cells:
            for id, u in c.units.items():
                # Apply the unit type target restriction
                if target_type_restriction == None or u.type == target_type_restriction:
                    if u.team == team:
                        # This unit belongs to our team, set it as the winning transfer target
                        # if it's the best match.
                        if target_unit is None:
                            target_unit = u
                        else:
                            # Compare this unit to the existing target
                            if target_unit.type == u.type:
                                # Transfer to the target with the least capacity, but can accept
                                # all of our resources
                                if( u.get_cargo_space_left() >= resource_amount and 
                                    target_unit.get_cargo_space_left() >= resource_amount ):
                                    # Both units can accept all our resources. Prioritize one that is most-full.
                                    if u.get_cargo_space_left() < target_unit.get_cargo_space_left():
                                        # This new target it better, it has less space left and can take all our
                                        # resources
                                        target_unit = u
                                    
                                elif( target_unit.get_cargo_space_left() >= resource_amount ):
                                    # Don't change targets. Current one is best since it can take all
                                    # the resources, but new target can't.
                                    pass
                                    
                                elif( u.get_cargo_space_left() > target_unit.get_cargo_space_left() ):
                                    # Change targets, because neither target can accept all our resources and 
                                    # this target can take more resources.
                                    target_unit = u
                            elif u.type == Constants.UNIT_TYPES.CART:
                                # Transfer to this cart instead of the current worker target
                                target_unit = u
    
    # Build the transfer action request
    target_unit_id = None
    if target_unit is not None:
        target_unit_id = target_unit.id

        # Update the transfer amount based on the room of the target
        if target_unit.get_cargo_space_left() < resource_amount:
            resource_amount = target_unit.get_cargo_space_left()
    
    return TransferAction(team, unit_id, target_unit_id, resource_type, resource_amount)

########################################################################################################################
# This is the Agent that you need to design for the competition
########################################################################################################################
class AgentPolicy(Agent):
    def __init__(self, mode="train", model=None) -> None:
        """
        Arguments:
            mode: "train" or "inference", which controls if this agent is for training or not.
            model: The pretrained model, or if None it will operate in training mode.
        """
        super().__init__()
        self.model = model
        self.mode = mode
        
        self.stats = None
        self.stats_last_game = None
        self.all_rewards = []

        # Define action and observation space
        # They must be gym.spaces objects
        # Example when using discrete actions:
        self.actionSpaceMapUnits = [
            partial(MoveAction, direction=Constants.DIRECTIONS.CENTER),  # This is the do-nothing action
            partial(MoveAction, direction=Constants.DIRECTIONS.NORTH),
            partial(MoveAction, direction=Constants.DIRECTIONS.WEST),
            partial(MoveAction, direction=Constants.DIRECTIONS.SOUTH),
            partial(MoveAction, direction=Constants.DIRECTIONS.EAST),
            smart_transfer_to_nearby, # Transfer to nearby
            SpawnCityAction,
            #PillageAction,
        ]
        self.actionSpaceMapCities = [
            SpawnWorkerAction,
            SpawnCartAction,
            ResearchAction,
        ]

        self.action_space = spaces.Discrete(max(len(self.actionSpaceMapUnits), len(self.actionSpaceMapCities)))
        

        # Observation space: (Basic minimum for a miner agent)
        # Object:
        #   1x is worker
        #   1x is cart
        #   1x is citytile
        #
        #   5x direction_nearest_wood
        #   1x distance_nearest_wood
        #   1x amount
        #
        #   5x direction_nearest_coal
        #   1x distance_nearest_coal
        #   1x amount
        #
        #   5x direction_nearest_uranium
        #   1x distance_nearest_uranium
        #   1x amount
        #
        #   5x direction_nearest_city
        #   1x distance_nearest_city
        #   1x amount of fuel
        #
        #   28x (the same as above, but direction, distance, and amount to the furthest of each)
        #
        #   5x direction_nearest_worker
        #   1x distance_nearest_worker
        #   1x amount of cargo
        # Unit:
        #   1x cargo size
        # State:
        #   1x is night
        #   1x percent of game done
        #   2x citytile counts [cur player, opponent]
        #   2x worker counts [cur player, opponent]
        #   2x cart counts [cur player, opponent]
        #   1x research points [cur player]
        #   1x researched coal [cur player]
        #   1x researched uranium [cur player]
        self.observation_shape = (3 + 7 * 5 * 2 + 1 + 1 + 1 + 2 + 2 + 2 + 3,)
        self.observation_space = spaces.Box(low=0, high=1, shape=
        self.observation_shape, dtype=np.float16)

        self.object_nodes = {}

    def get_agent_type(self):
        """
        Returns the type of agent. Use AGENT for inference, and LEARNING for training a model.
        """
        if self.mode == "train":
            return Constants.AGENT_TYPE.LEARNING
        else:
            return Constants.AGENT_TYPE.AGENT

    def get_observation(self, game, unit, city_tile, team, is_new_turn):
        """
        Implements getting a observation from the current game for this unit or city
        """
        observation_index = 0
        if is_new_turn:
            # It's a new turn this event. This flag is set True for only the first observation from each turn.
            # Update any per-turn fixed observation space that doesn't change per unit/city controlled.

            # Build a list of object nodes by type for quick distance-searches
            self.object_nodes = {}

            # Add resources
            for cell in game.map.resources:
                if cell.resource.type not in self.object_nodes:
                    self.object_nodes[cell.resource.type] = np.array([[cell.pos.x, cell.pos.y]])
                else:
                    self.object_nodes[cell.resource.type] = np.concatenate(
                        (
                            self.object_nodes[cell.resource.type],
                            [[cell.pos.x, cell.pos.y]]
                        ),
                        axis=0
                    )

            # Add your own and opponent units
            for t in [team, (team + 1) % 2]:
                for u in game.state["teamStates"][team]["units"].values():
                    key = str(u.type)
                    if t != team:
                        key = str(u.type) + "_opponent"

                    if key not in self.object_nodes:
                        self.object_nodes[key] = np.array([[u.pos.x, u.pos.y]])
                    else:
                        self.object_nodes[key] = np.concatenate(
                            (
                                self.object_nodes[key],
                                [[u.pos.x, u.pos.y]]
                            )
                            , axis=0
                        )

            # Add your own and opponent cities
            for city in game.cities.values():
                for cells in city.city_cells:
                    key = "city"
                    if city.team != team:
                        key = "city_opponent"

                    if key not in self.object_nodes:
                        self.object_nodes[key] = np.array([[cells.pos.x, cells.pos.y]])
                    else:
                        self.object_nodes[key] = np.concatenate(
                            (
                                self.object_nodes[key],
                                [[cells.pos.x, cells.pos.y]]
                            )
                            , axis=0
                        )

        # Observation space: (Basic minimum for a miner agent)
        # Object:
        #   1x is worker
        #   1x is cart
        #   1x is citytile
        #   5x direction_nearest_wood
        #   1x distance_nearest_wood
        #   1x amount
        #
        #   5x direction_nearest_coal
        #   1x distance_nearest_coal
        #   1x amount
        #
        #   5x direction_nearest_uranium
        #   1x distance_nearest_uranium
        #   1x amount
        #
        #   5x direction_nearest_city
        #   1x distance_nearest_city
        #   1x amount of fuel
        #
        #   5x direction_nearest_worker
        #   1x distance_nearest_worker
        #   1x amount of cargo
        #
        #   28x (the same as above, but direction, distance, and amount to the furthest of each)
        #
        # Unit:
        #   1x cargo size
        # State:
        #   1x is night
        #   1x percent of game done
        #   2x citytile counts [cur player, opponent]
        #   2x worker counts [cur player, opponent]
        #   2x cart counts [cur player, opponent]
        #   1x research points [cur player]
        #   1x researched coal [cur player]
        #   1x researched uranium [cur player]
        obs = np.zeros(self.observation_shape)
        
        # Update the type of this object
        #   1x is worker
        #   1x is cart
        #   1x is citytile
        observation_index = 0
        if unit is not None:
            if unit.type == Constants.UNIT_TYPES.WORKER:
                obs[observation_index] = 1.0 # Worker
            else:
                obs[observation_index+1] = 1.0 # Cart
        if city_tile is not None:
            obs[observation_index+2] = 1.0 # CityTile
        observation_index += 3
        
        pos = None
        if unit is not None:
            pos = unit.pos
        else:
            pos = city_tile.pos

        if pos is None:
            observation_index += 7 * 5 * 2
        else:
            # Encode the direction to the nearest objects
            #   5x direction_nearest
            #   1x distance
            for distance_function in [closest_node, furthest_node]:
                for key in [
                    Constants.RESOURCE_TYPES.WOOD,
                    Constants.RESOURCE_TYPES.COAL,
                    Constants.RESOURCE_TYPES.URANIUM,
                    "city",
                    str(Constants.UNIT_TYPES.WORKER)]:
                    # Process the direction to and distance to this object type

                    # Encode the direction to the nearest object (excluding itself)
                    #   5x direction
                    #   1x distance
                    if key in self.object_nodes:
                        if (
                                (key == "city" and city_tile is not None) or
                                (unit is not None and str(unit.type) == key and len(game.map.get_cell_by_pos(unit.pos).units) <= 1 )
                        ):
                            # Filter out the current unit from the closest-search
                            closest_index = closest_node((pos.x, pos.y), self.object_nodes[key])
                            filtered_nodes = np.delete(self.object_nodes[key], closest_index, axis=0)
                        else:
                            filtered_nodes = self.object_nodes[key]

                        if len(filtered_nodes) == 0:
                            # No other object of this type
                            obs[observation_index + 5] = 1.0
                        else:
                            # There is another object of this type
                            closest_index = distance_function((pos.x, pos.y), filtered_nodes)

                            if closest_index is not None and closest_index >= 0:
                                closest = filtered_nodes[closest_index]
                                closest_position = Position(closest[0], closest[1])
                                direction = pos.direction_to(closest_position)
                                mapping = {
                                    Constants.DIRECTIONS.CENTER: 0,
                                    Constants.DIRECTIONS.NORTH: 1,
                                    Constants.DIRECTIONS.WEST: 2,
                                    Constants.DIRECTIONS.SOUTH: 3,
                                    Constants.DIRECTIONS.EAST: 4,
                                }
                                obs[observation_index + mapping[direction]] = 1.0  # One-hot encoding direction

                                # 0 to 1 distance
                                distance = pos.distance_to(closest_position)
                                obs[observation_index + 5] = min(distance / 20.0, 1.0)

                                # 0 to 1 value (amount of resource, cargo for unit, or fuel for city)
                                if key == "city":
                                    # City fuel as % of upkeep for 200 turns
                                    c = game.cities[game.map.get_cell_by_pos(closest_position).city_tile.city_id]
                                    obs[observation_index + 6] = min(
                                        c.fuel / (c.get_light_upkeep() * 200.0),
                                        1.0
                                    )
                                elif key in [Constants.RESOURCE_TYPES.WOOD, Constants.RESOURCE_TYPES.COAL,
                                             Constants.RESOURCE_TYPES.URANIUM]:
                                    # Resource amount
                                    obs[observation_index + 6] = min(
                                        game.map.get_cell_by_pos(closest_position).resource.amount / 500,
                                        1.0
                                    )
                                else:
                                    # Unit cargo
                                    obs[observation_index + 6] = min(
                                        next(iter(game.map.get_cell_by_pos(
                                            closest_position).units.values())).get_cargo_space_left() / 100,
                                        1.0
                                    )

                    observation_index += 7

        if unit is not None:
            # Encode the cargo space
            #   1x cargo size
            obs[observation_index] = unit.get_cargo_space_left() / GAME_CONSTANTS["PARAMETERS"]["RESOURCE_CAPACITY"][
                "WORKER"]
            observation_index += 1
        else:
            observation_index += 1

        # Game state observations

        #   1x is night
        obs[observation_index] = game.is_night()
        observation_index += 1

        #   1x percent of game done
        obs[observation_index] = game.state["turn"] / GAME_CONSTANTS["PARAMETERS"]["MAX_DAYS"]
        observation_index += 1

        #   2x citytile counts [cur player, opponent]
        #   2x worker counts [cur player, opponent]
        #   2x cart counts [cur player, opponent]
        max_count = 30
        for key in ["city", str(Constants.UNIT_TYPES.WORKER), str(Constants.UNIT_TYPES.CART)]:
            if key in self.object_nodes:
                obs[observation_index] = len(self.object_nodes[key]) / max_count
            if (key + "_opponent") in self.object_nodes:
                obs[observation_index + 1] = len(self.object_nodes[(key + "_opponent")]) / max_count
            observation_index += 2

        #   1x research points [cur player]
        #   1x researched coal [cur player]
        #   1x researched uranium [cur player]
        obs[observation_index] = game.state["teamStates"][team]["researchPoints"] / 200.0
        obs[observation_index+1] = float(game.state["teamStates"][team]["researched"]["coal"])
        obs[observation_index+2] = float(game.state["teamStates"][team]["researched"]["uranium"])

        return obs

    def action_code_to_action(self, action_code, game, unit=None, city_tile=None, team=None):
        """
        Takes an action in the environment according to actionCode:
            actionCode: Index of action to take into the action array.
        Returns: An action.
        """
        # Map actionCode index into to a constructed Action object
        try:
            x = None
            y = None
            if city_tile is not None:
                x = city_tile.pos.x
                y = city_tile.pos.y
            elif unit is not None:
                x = unit.pos.x
                y = unit.pos.y
            
            if city_tile != None:
                action =  self.actionSpaceMapCities[action_code%len(self.actionSpaceMapCities)](
                    game=game,
                    unit_id=unit.id if unit else None,
                    unit=unit,
                    city_id=city_tile.city_id if city_tile else None,
                    citytile=city_tile,
                    team=team,
                    x=x,
                    y=y
                )

                # If the city action is invalid, default to research action automatically
                if not action.is_valid(game, actions_validated=[]):
                    action = ResearchAction(
                        game=game,
                        unit_id=unit.id if unit else None,
                        unit=unit,
                        city_id=city_tile.city_id if city_tile else None,
                        citytile=city_tile,
                        team=team,
                        x=x,
                        y=y
                    )
            else:
                action =  self.actionSpaceMapUnits[action_code%len(self.actionSpaceMapUnits)](
                    game=game,
                    unit_id=unit.id if unit else None,
                    unit=unit,
                    city_id=city_tile.city_id if city_tile else None,
                    citytile=city_tile,
                    team=team,
                    x=x,
                    y=y
                )
            
            return action
        except Exception as e:
            # Not a valid action
            print(e)
            return None

    def take_action(self, action_code, game, unit=None, city_tile=None, team=None):
        """
        Takes an action in the environment according to actionCode:
            actionCode: Index of action to take into the action array.
        """
        action = self.action_code_to_action(action_code, game, unit, city_tile, team)
        self.match_controller.take_action(action)
    
    def game_start(self, game):
        """
        This funciton is called at the start of each game. Use this to
        reset and initialize per game. Note that self.team may have
        been changed since last game. The game map has been created
        and starting units placed.

        Args:
            game ([type]): Game.
        """
        self.last_generated_fuel = game.stats["teamStats"][self.team]["fuelGenerated"]
        self.last_resources_collected = copy.deepcopy(game.stats["teamStats"][self.team]["resourcesCollected"])
        if self.stats != None:
            self.stats_last_game =  self.stats
        self.stats = {
            "rew/r_total": 0,
            "rew/r_wood": 0,
            "rew/r_coal": 0,
            "rew/r_uranium": 0,
            "rew/r_research": 0,
            "rew/r_city_tiles_end": 0,
            "rew/r_fuel_collected":0,
            "rew/r_units":0,
            "rew/r_city_tiles":0,
            "game/turns": 0,
            "game/research": 0,
            "game/unit_count": 0,
            "game/cart_count": 0,
            "game/city_count": 0,
            "game/city_tiles": 0,
            "game/wood_rate_mined": 0,
            "game/coal_rate_mined": 0,
            "game/uranium_rate_mined": 0,
        }
        self.is_last_turn = False

        # Calculate starting map resources
        type_map = {
            Constants.RESOURCE_TYPES.WOOD: "WOOD",
            Constants.RESOURCE_TYPES.COAL: "COAL",
            Constants.RESOURCE_TYPES.URANIUM: "URANIUM",
        }

        self.fuel_collected_last = 0
        self.fuel_start = {}
        self.fuel_last = {}
        for type, type_upper in type_map.items():
            self.fuel_start[type] = 0
            self.fuel_last[type] = 0
            for c in game.map.resources_by_type[type]:
                self.fuel_start[type] += c.resource.amount * game.configs["parameters"]["RESOURCE_TO_FUEL_RATE"][type_upper]

        self.research_last = 0
        self.units_last = 0
        self.city_tiles_last = 0
    
    
    
    def get_reward(self, game, is_game_finished, is_new_turn, is_game_error):
        """
        Returns the reward function for this step of the game.
        """
        if is_game_error:
            # Game environment step failed, assign a game lost reward to not incentivise this
            print("Game failed due to error")
            return -1.0

        if not is_new_turn and not is_game_finished:
            # Only apply rewards at the start of each turn
            return 0

        # Get some basic stats
        unit_count = len(game.state["teamStates"][self.team % 2]["units"])
        cart_count = 0
        for id, u in game.state["teamStates"][self.team % 2]["units"].items():
            if u.type == Constants.UNIT_TYPES.CART:
                cart_count += 1

        unit_count_opponent = len(game.state["teamStates"][(self.team + 1) % 2]["units"])
        research = min(game.state["teamStates"][self.team]["researchPoints"], 200.0) # Cap research points at 200
        city_count = 0
        city_count_opponent = 0
        city_tile_count = 0
        city_tile_count_opponent = 0
        for city in game.cities.values():
            if city.team == self.team:
                city_count += 1
            else:
                city_count_opponent += 1

            for cell in city.city_cells:
                if city.team == self.team:
                    city_tile_count += 1
                else:
                    city_tile_count_opponent += 1
        
        # Basic stats
        self.stats["game/research"] = research
        self.stats["game/city_tiles"] = city_tile_count
        self.stats["game/city_count"] = city_count
        self.stats["game/unit_count"] = unit_count
        self.stats["game/cart_count"] = cart_count
        self.stats["game/turns"] = game.state["turn"]

        rewards = {}

        # Give up to 1.0 reward for each resource based on % of total mined.
        type_map = {
            Constants.RESOURCE_TYPES.WOOD: "WOOD",
            Constants.RESOURCE_TYPES.COAL: "COAL",
            Constants.RESOURCE_TYPES.URANIUM: "URANIUM",
        }
        fuel_now = {}
        for type, type_upper in type_map.items():
            fuel_now = game.stats["teamStats"][self.team]["resourcesCollected"][type] * game.configs["parameters"]["RESOURCE_TO_FUEL_RATE"][type_upper]
            rewards["rew/r_%s" % type] = (fuel_now - self.fuel_last[type]) / self.fuel_start[type]
            self.stats["game/%s_rate_mined" % type] = fuel_now / self.fuel_start[type]
            self.fuel_last[type] = fuel_now
        
        # Give more incentive for coal and uranium
        rewards["rew/r_%s" % Constants.RESOURCE_TYPES.COAL] *= 2
        rewards["rew/r_%s" % Constants.RESOURCE_TYPES.URANIUM] *= 4

        rewards["rew/r_%s" % Constants.RESOURCE_TYPES.WOOD] *= 20
        
        # Give a reward based on amount of fuel collected. 1.0 reward for each 20K fuel gathered.
        fuel_collected = game.stats["teamStats"][self.team]["fuelGenerated"]
        rewards["rew/r_fuel_collected"] = ( (fuel_collected - self.fuel_collected_last) / 20000 )
        self.fuel_collected_last = fuel_collected

        # Give a reward for unit creation/death. 0.05 reward per unit.
        rewards["rew/r_units"] = (unit_count - self.units_last) * 0.05
        self.units_last = unit_count

        # Give a reward for unit creation/death. 0.1 reward per city.
        rewards["rew/r_city_tiles"] = (city_tile_count - self.city_tiles_last) * 1 #change 0.1->1
        self.city_tiles_last = city_tile_count

        # Tiny reward for research to help. Up to 0.5 reward for this.
        rewards["rew/r_research"] = (research - self.research_last) / (200 * 2)
        self.research_last = research
        
        # Give a reward up to around 50.0 based on number of city tiles at the end of the game
        rewards["rew/r_city_tiles_end"] = 0
        if is_game_finished:
            self.is_last_turn = True
            rewards["rew/r_city_tiles_end"] = city_tile_count*10 #1->10
        
        
        # Update the stats and total reward
        reward = 0
        for name, value in rewards.items():
            self.stats[name] += value
            reward += value
        self.stats["rew/r_total"] += reward

        # Print the final game stats sometimes
        if is_game_finished and random.random() <= 0.15:
            stats_string = []
            for key, value in self.stats.items():
                stats_string.append("%s=%.2f" % (key, value))
            print(",".join(stats_string))


        return reward
    

    

    def process_turn(self, game, team):
        """
        Decides on a set of actions for the current turn. Not used in training, only inference.
        Returns: Array of actions to perform.
        """
        start_time = time.time()
        actions = []
        new_turn = True

        # Inference the model per-unit
        units = game.state["teamStates"][team]["units"].values()
        for unit in units:
            if unit.can_act():
                obs = self.get_observation(game, unit, None, unit.team, new_turn)
                action_code, _states = self.model.predict(obs, deterministic=False)
                if action_code is not None:
                    actions.append(
                        self.action_code_to_action(action_code, game=game, unit=unit, city_tile=None, team=unit.team))
                new_turn = False

        # Inference the model per-city
        cities = game.cities.values()
        for city in cities:
            if city.team == team:
                for cell in city.city_cells:
                    city_tile = cell.city_tile
                    if city_tile.can_act():
                        obs = self.get_observation(game, None, city_tile, city.team, new_turn)
                        action_code, _states = self.model.predict(obs, deterministic=False)
                        if action_code is not None:
                            actions.append(
                                self.action_code_to_action(action_code, game=game, unit=None, city_tile=city_tile,
                                                           team=city.team))
                        new_turn = False

        time_taken = time.time() - start_time
        if time_taken > 0.5:  # Warn if larger than 0.5 seconds.
            print("WARNING: Inference took %.3f seconds for computing actions. Limit is 1 second." % time_taken,
                  file=sys.stderr)

        return actions



Overwriting agent_policy.py


# Build the environment for training

Notes on metrics:
* An Episode is a single game between your RL agent and it's opponent. This is generally 360 turns, spanning more than 360 unit + city decision steps.
* Mean episode length (ep_len_mean) is the number of decision made per game. The larger this gets, means that it is making more unit + city decision per game, meaning that more units and cities were alive for longer during the game.
* Episode reward mean (ep_rew_mean), is set up as micro-reward funciton for faster learning. Per turn it gets a small reward based on the number of cities and units alive. It gets a really big reward based on the number of cities and units alive at the end of the game.

In [7]:
#!pip install stable-baselines3

In [15]:
import argparse
import glob
import os
import random
from typing import Callable

from stable_baselines3 import PPO  # pip install stable-baselines3
from stable_baselines3.common.callbacks import CheckpointCallback
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3.common.vec_env import SubprocVecEnv

from importlib import reload
import agent_policy
reload(agent_policy) # Reload the file from disk incase the above agent-writing cell block was edited
from agent_policy import AgentPolicy

from luxai2021.env.agent import Agent
from luxai2021.env.lux_env import LuxEnvironment
from luxai2021.game.constants import LuxMatchConfigs_Default


# Default Lux configs
configs = LuxMatchConfigs_Default

# Create a default opponent agent
opponent = AgentPolicy(mode="inference", model=PPO.load('/projectnb/ds598xz/students/ziyechen/MyLuxAI/examples2/models/model'))

# Create a RL agent in training mode
player = AgentPolicy(mode="train")

# Create the Lux environment
env = LuxEnvironment(configs=configs,
                     learning_agent=player,
                     opponent_agent=opponent)

# Define the model, you can pick other RL algos from Stable Baselines3 instead if you like
model = PPO("MlpPolicy",
                env,
                verbose=1,
                tensorboard_log="./lux_tensorboard/",
                learning_rate=0.001,
                gamma=0.999,
                gae_lambda=0.95,
                batch_size=2048 * 8,
                n_steps=2048 * 8
            )

# Define a learning rate schedule
# (number of steps, learning_rate)
schedule = [
    (1000000, 0.01),
    (4000000, 0.001),
    (4000000, 0.0001),
    #(600, 0.001),
    #(600, 0.0001),
]

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


Running in inference-only mode.


# Train the agent against a dummy opponent

In [None]:
from stable_baselines3.common.utils import get_schedule_fn

print("Training model...")
run_id = 1

# Save a checkpoint every 1M steps
checkpoint_callback = CheckpointCallback(save_freq=1000000,
                                         save_path='./models/',
                                         name_prefix=f'rl_model_{run_id}')

# Train the policy
NN=0
for steps, learning_rate in schedule:
    model.lr_schedule = get_schedule_fn(learning_rate)
    model.learn(total_timesteps=steps,
                callback=checkpoint_callback,
                reset_num_timesteps = False)
    NN=NN+1
    print(NN)
    model.save(path=f'models_new/model_new_{NN}.zip')
# Save final model
model.save(path=f'models/model.zip')

print("Done training model.")

Training model...
Logging to ./lux_tensorboard/PPO_0
rew/r_total=0.37,rew/r_wood=0.19,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.13,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.05,rew/r_units=0.00,rew/r_city_tiles=0.00,game/turns=276.00,game/research=51.00,game/unit_count=0.00,game/cart_count=0.00,game/city_count=0.00,game/city_tiles=0.00,game/wood_rate_mined=0.19,game/coal_rate_mined=0.00,game/uranium_rate_mined=0.00
rew/r_total=0.10,rew/r_wood=0.05,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.03,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.01,rew/r_units=0.00,rew/r_city_tiles=0.00,game/turns=117.00,game/research=13.00,game/unit_count=0.00,game/cart_count=0.00,game/city_count=0.00,game/city_tiles=0.00,game/wood_rate_mined=0.05,game/coal_rate_mined=0.00,game/uranium_rate_mined=0.00
rew/r_total=0.47,rew/r_wood=0.31,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.12,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.04,rew/r_units=-0.00,rew/r_city_tiles=0.00,gam

rew/r_total=0.20,rew/r_wood=0.14,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.05,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.01,rew/r_units=0.00,rew/r_city_tiles=0.00,game/turns=236.00,game/research=21.00,game/unit_count=0.00,game/cart_count=0.00,game/city_count=0.00,game/city_tiles=0.00,game/wood_rate_mined=0.14,game/coal_rate_mined=0.00,game/uranium_rate_mined=0.00
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 116      |
|    ep_rew_mean     | 1.1      |
| time/              |          |
|    fps             | 95       |
|    iterations      | 1        |
|    time_elapsed    | 172      |
|    total_timesteps | 17130    |
---------------------------------
rew/r_total=0.03,rew/r_wood=0.01,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.02,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.01,rew/r_units=0.00,rew/r_city_tiles=0.00,game/turns=36.00,game/research=6.00,game/unit_count=0.00,game/cart_count=0.00,game/city_count=0.00,

rew/r_total=0.07,rew/r_wood=0.04,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.02,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.01,rew/r_units=0.00,rew/r_city_tiles=0.00,game/turns=111.00,game/research=10.00,game/unit_count=0.00,game/cart_count=0.00,game/city_count=0.00,game/city_tiles=0.00,game/wood_rate_mined=0.04,game/coal_rate_mined=0.00,game/uranium_rate_mined=0.00
rew/r_total=0.07,rew/r_wood=0.05,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.01,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.01,rew/r_units=0.00,rew/r_city_tiles=0.00,game/turns=71.00,game/research=4.00,game/unit_count=0.00,game/cart_count=0.00,game/city_count=0.00,game/city_tiles=0.00,game/wood_rate_mined=0.05,game/coal_rate_mined=0.00,game/uranium_rate_mined=0.00
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 117         |
|    ep_rew_mean          | 0.651       |
| time/                   |             |
|    fps                  | 

rew/r_total=0.06,rew/r_wood=0.03,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.02,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.01,rew/r_units=0.00,rew/r_city_tiles=0.00,game/turns=75.00,game/research=9.00,game/unit_count=0.00,game/cart_count=0.00,game/city_count=0.00,game/city_tiles=0.00,game/wood_rate_mined=0.03,game/coal_rate_mined=0.00,game/uranium_rate_mined=0.00
rew/r_total=0.08,rew/r_wood=0.03,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.04,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.01,rew/r_units=0.00,rew/r_city_tiles=0.00,game/turns=151.00,game/research=15.00,game/unit_count=0.00,game/cart_count=0.00,game/city_count=0.00,game/city_tiles=0.00,game/wood_rate_mined=0.03,game/coal_rate_mined=0.00,game/uranium_rate_mined=0.00
rew/r_total=0.08,rew/r_wood=0.05,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.02,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.01,rew/r_units=0.00,rew/r_city_tiles=0.00,game/turns=74.00,game/research=9.00,game/unit_count=0.00,ga

rew/r_total=0.04,rew/r_wood=0.02,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.01,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.00,rew/r_units=0.00,rew/r_city_tiles=0.00,game/turns=34.00,game/research=5.00,game/unit_count=0.00,game/cart_count=0.00,game/city_count=0.00,game/city_tiles=0.00,game/wood_rate_mined=0.02,game/coal_rate_mined=0.00,game/uranium_rate_mined=0.00
rew/r_total=0.03,rew/r_wood=0.02,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.01,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.01,rew/r_units=0.00,rew/r_city_tiles=0.00,game/turns=37.00,game/research=4.00,game/unit_count=0.00,game/cart_count=0.00,game/city_count=0.00,game/city_tiles=0.00,game/wood_rate_mined=0.02,game/coal_rate_mined=0.00,game/uranium_rate_mined=0.00
rew/r_total=0.10,rew/r_wood=0.07,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.03,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.00,rew/r_units=0.00,rew/r_city_tiles=0.00,game/turns=111.00,game/research=11.00,game/unit_count=0.00,ga

rew/r_total=66.85,rew/r_wood=0.36,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.16,rew/r_city_tiles_end=60.00,rew/r_fuel_collected=0.08,rew/r_units=0.25,rew/r_city_tiles=6.00,game/turns=191.00,game/research=64.00,game/unit_count=5.00,game/cart_count=0.00,game/city_count=4.00,game/city_tiles=6.00,game/wood_rate_mined=0.36,game/coal_rate_mined=0.00,game/uranium_rate_mined=0.00
rew/r_total=0.22,rew/r_wood=0.15,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.06,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.02,rew/r_units=0.00,rew/r_city_tiles=0.00,game/turns=114.00,game/research=22.00,game/unit_count=0.00,game/cart_count=0.00,game/city_count=0.00,game/city_tiles=0.00,game/wood_rate_mined=0.15,game/coal_rate_mined=0.00,game/uranium_rate_mined=0.00
rew/r_total=23.43,rew/r_wood=0.93,rew/r_coal=0.13,rew/r_uranium=0.00,rew/r_research=0.16,rew/r_city_tiles_end=20.00,rew/r_fuel_collected=0.16,rew/r_units=0.05,rew/r_city_tiles=2.00,game/turns=360.00,game/research=62.00,game/unit_count

rew/r_total=33.81,rew/r_wood=0.41,rew/r_coal=0.02,rew/r_uranium=0.00,rew/r_research=0.20,rew/r_city_tiles_end=30.00,rew/r_fuel_collected=0.13,rew/r_units=0.05,rew/r_city_tiles=3.00,game/turns=360.00,game/research=80.00,game/unit_count=1.00,game/cart_count=0.00,game/city_count=3.00,game/city_tiles=3.00,game/wood_rate_mined=0.41,game/coal_rate_mined=0.01,game/uranium_rate_mined=0.00
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 343         |
|    ep_rew_mean          | 9.07        |
| time/                   |             |
|    fps                  | 123         |
|    iterations           | 9           |
|    time_elapsed         | 1195        |
|    total_timesteps      | 148202      |
| train/                  |             |
|    approx_kl            | 0.008617438 |
|    clip_fraction        | 0.0867      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.77       |
|    explained_variance   | 

In [None]:
# Save final model
model.save(path=f'models_t/model_t.zip')

In [None]:
# Save final model
model.save(path=f'models/model.zip')

print("Done training model.")

# Set up a Kaggle Submission and lux replay environment for the agent

In [None]:
"""
This downloads two required python package dependencies that are not pre-installed
by Kaggle yet.

This places the following two packages in the current working directory:
    luxai2021
    stable_baselines3
"""

import os
import shutil
import subprocess
import tempfile

def localize_package(git, branch, folder):
    if os.path.exists(folder):
        print("Already localized %s" % folder)
    else:
        # https://stackoverflow.com/questions/51239168/how-to-download-single-file-from-a-git-repository-using-python
        # Create temporary dir
        t = tempfile.mkdtemp()

        args = ['git', 'clone', '--depth=1', git, t, '-b', branch]
        res = subprocess.Popen(args, stdout=subprocess.PIPE)
        output, _error = res.communicate()

        if not _error:
            print(output)
        else:
            print(_error)
        
        # Copy desired file from temporary dir
        shutil.move(os.path.join(t, folder), '.')
        # Remove temporary dir
        shutil.rmtree(t, ignore_errors=True)

localize_package('https://github.com/glmcdona/LuxPythonEnvGym.git', 'main', 'luxai2021')
localize_package('https://github.com/glmcdona/LuxPythonEnvGym.git', 'main', 'kaggle_submissions')
localize_package('https://github.com/DLR-RM/stable-baselines3.git', 'master', 'stable_baselines3')

In [None]:
# Move the dependent packages into kaggle submissions
!mv luxai2021 kaggle_submissions
!mv stable_baselines3 kaggle_submissions
!rm ./kaggle_submissions/agent_policy.py
!cp agent_policy.py kaggle_submissions

# Copy the agent and model to the submission 
!cp ./agent_policy.py kaggle_submissions
!cp ./models/model.zip kaggle_submissions

!ls kaggle_submissions

# 下面这部分在scc上运行不了，可以把上面训练好的model下载到本地去看动画

In [6]:
from kaggle_environments import make
import json
# run another match but with our empty agent
env = make("lux_ai_2021", configuration={"seed": 5621242, "loglevel": 2, "annotations": True}, debug=True)

# Play the environment where the RL agent plays against itself
steps = env.run(["./kaggle_submissions/main.py", "./kaggle_submissions/main.py"])

Loading environment llm_20_questions failed: No module named 'yaml'


KeyError: 'updates'

In [None]:
# Render the match
env.render(mode="ipython", width=1200, height=800)

# Prepare and submit the kaggle submission

In [None]:
!tar -czf submission.tar.gz -C kaggle_submissions .
!ls

# Another Model

In [7]:
import argparse
import glob
import os
import random
from typing import Callable

from stable_baselines3 import PPO  # pip install stable-baselines3
from stable_baselines3.common.callbacks import CheckpointCallback
from stable_baselines3.common.utils import set_random_seed
from stable_baselines3.common.vec_env import SubprocVecEnv

from importlib import reload
import agent_policy
reload(agent_policy) # Reload the file from disk incase the above agent-writing cell block was edited
from agent_policy import AgentPolicy

from luxai2021.env.agent import Agent
from luxai2021.env.lux_env import LuxEnvironment
from luxai2021.game.constants import LuxMatchConfigs_Default


# Default Lux configs
configs = LuxMatchConfigs_Default

# Create a default opponent agent
opponent = AgentPolicy(mode="inference", model=model)

# Create a RL agent in training mode
player = AgentPolicy(mode="train")

# Create the Lux environment
env = LuxEnvironment(configs=configs,
                     learning_agent=player,
                     opponent_agent=opponent)

# Define the model, you can pick other RL algos from Stable Baselines3 instead if you like
model = PPO("MlpPolicy",
                env,
                verbose=1,
                tensorboard_log="./lux_tensorboard4/",
                learning_rate=0.001,
                gamma=0.999,
                gae_lambda=0.95,
                batch_size=2048 * 8,
                n_steps=2048 * 8
            )

# Define a learning rate schedule
# (number of steps, learning_rate)
schedule = [
    (2000000, 0.01),
    (4000000, 0.001),
    (4000000, 0.0001),
    #(600, 0.001),
    #(600, 0.0001),
]

NameError: name 'model' is not defined

In [None]:
from stable_baselines3.common.utils import get_schedule_fn

print("Training model...")
run_id = 1

# Save a checkpoint every 1M steps
checkpoint_callback = CheckpointCallback(save_freq=1000000, #1000000
                                         save_path='./models_4/',
                                         name_prefix=f'rl_model_{run_id}')

# Train the policy
for steps, learning_rate in schedule:
    model.lr_schedule = get_schedule_fn(learning_rate)
    model.learn(total_timesteps=steps,
                callback=checkpoint_callback,
                reset_num_timesteps = False)

# Save final model
model.save(path=f'models_4/model_4.zip')

print("Done training model.")

Training model...
Logging to ./lux_tensorboard4/PPO_0
rew/r_total=0.03,rew/r_wood=0.01,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.01,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.01,rew/r_units=0.00,rew/r_city_tiles=0.00,game/turns=116.00,game/research=4.00,game/unit_count=0.00,game/cart_count=0.00,game/city_count=0.00,game/city_tiles=0.00,game/wood_rate_mined=0.01,game/coal_rate_mined=0.00,game/uranium_rate_mined=0.00
rew/r_total=0.06,rew/r_wood=0.03,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.02,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.01,rew/r_units=0.00,rew/r_city_tiles=0.00,game/turns=151.00,game/research=8.00,game/unit_count=0.00,game/cart_count=0.00,game/city_count=0.00,game/city_tiles=0.00,game/wood_rate_mined=0.03,game/coal_rate_mined=0.00,game/uranium_rate_mined=0.00
rew/r_total=0.02,rew/r_wood=0.01,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.01,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.00,rew/r_units=0.00,rew/r_city_tiles=0.00,game/

rew/r_total=0.05,rew/r_wood=0.04,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.01,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.00,rew/r_units=0.00,rew/r_city_tiles=0.00,game/turns=71.00,game/research=5.00,game/unit_count=0.00,game/cart_count=0.00,game/city_count=0.00,game/city_tiles=0.00,game/wood_rate_mined=0.04,game/coal_rate_mined=0.00,game/uranium_rate_mined=0.00
rew/r_total=0.04,rew/r_wood=0.02,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.02,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.01,rew/r_units=0.00,rew/r_city_tiles=0.00,game/turns=76.00,game/research=7.00,game/unit_count=0.00,game/cart_count=0.00,game/city_count=0.00,game/city_tiles=0.00,game/wood_rate_mined=0.02,game/coal_rate_mined=0.00,game/uranium_rate_mined=0.00
rew/r_total=0.02,rew/r_wood=0.01,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.01,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.00,rew/r_units=0.00,rew/r_city_tiles=0.00,game/turns=31.00,game/research=5.00,game/unit_count=0.00,game

rew/r_total=0.05,rew/r_wood=0.03,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.01,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.01,rew/r_units=0.00,rew/r_city_tiles=0.00,game/turns=37.00,game/research=5.00,game/unit_count=0.00,game/cart_count=0.00,game/city_count=0.00,game/city_tiles=0.00,game/wood_rate_mined=0.03,game/coal_rate_mined=0.00,game/uranium_rate_mined=0.00
rew/r_total=0.12,rew/r_wood=0.10,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.02,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.00,rew/r_units=0.00,rew/r_city_tiles=0.00,game/turns=191.00,game/research=8.00,game/unit_count=0.00,game/cart_count=0.00,game/city_count=0.00,game/city_tiles=0.00,game/wood_rate_mined=0.10,game/coal_rate_mined=0.00,game/uranium_rate_mined=0.00
rew/r_total=0.10,rew/r_wood=0.07,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.02,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.01,rew/r_units=0.00,rew/r_city_tiles=0.00,game/turns=157.00,game/research=7.00,game/unit_count=0.00,ga

rew/r_total=0.18,rew/r_wood=0.14,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.03,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.01,rew/r_units=0.00,rew/r_city_tiles=0.00,game/turns=151.00,game/research=13.00,game/unit_count=0.00,game/cart_count=0.00,game/city_count=0.00,game/city_tiles=0.00,game/wood_rate_mined=0.14,game/coal_rate_mined=0.00,game/uranium_rate_mined=0.00
rew/r_total=0.46,rew/r_wood=0.33,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.09,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.04,rew/r_units=0.00,rew/r_city_tiles=0.00,game/turns=158.00,game/research=34.00,game/unit_count=0.00,game/cart_count=0.00,game/city_count=0.00,game/city_tiles=0.00,game/wood_rate_mined=0.33,game/coal_rate_mined=0.00,game/uranium_rate_mined=0.00
rew/r_total=0.06,rew/r_wood=0.02,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.02,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.01,rew/r_units=0.00,rew/r_city_tiles=0.00,game/turns=72.00,game/research=8.00,game/unit_count=0.00,

rew/r_total=0.56,rew/r_wood=0.22,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.17,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.07,rew/r_units=0.10,rew/r_city_tiles=0.00,game/turns=360.00,game/research=66.00,game/unit_count=2.00,game/cart_count=0.00,game/city_count=0.00,game/city_tiles=0.00,game/wood_rate_mined=0.22,game/coal_rate_mined=0.00,game/uranium_rate_mined=0.00
rew/r_total=0.01,rew/r_wood=0.00,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.01,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.00,rew/r_units=0.00,rew/r_city_tiles=0.00,game/turns=31.00,game/research=4.00,game/unit_count=0.00,game/cart_count=0.00,game/city_count=0.00,game/city_tiles=0.00,game/wood_rate_mined=0.00,game/coal_rate_mined=0.00,game/uranium_rate_mined=0.00
rew/r_total=0.17,rew/r_wood=0.13,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.03,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.01,rew/r_units=0.00,rew/r_city_tiles=0.00,game/turns=276.00,game/research=13.00,game/unit_count=0.00,

rew/r_total=0.25,rew/r_wood=0.17,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.07,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.01,rew/r_units=0.00,rew/r_city_tiles=0.00,game/turns=314.00,game/research=29.00,game/unit_count=0.00,game/cart_count=0.00,game/city_count=0.00,game/city_tiles=0.00,game/wood_rate_mined=0.17,game/coal_rate_mined=0.00,game/uranium_rate_mined=0.00
rew/r_total=0.04,rew/r_wood=0.02,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.02,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.00,rew/r_units=0.00,rew/r_city_tiles=0.00,game/turns=71.00,game/research=7.00,game/unit_count=0.00,game/cart_count=0.00,game/city_count=0.00,game/city_tiles=0.00,game/wood_rate_mined=0.02,game/coal_rate_mined=0.00,game/uranium_rate_mined=0.00
rew/r_total=0.18,rew/r_wood=0.13,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.04,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.01,rew/r_units=0.00,rew/r_city_tiles=0.00,game/turns=111.00,game/research=16.00,game/unit_count=0.00,

rew/r_total=0.71,rew/r_wood=0.50,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.15,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.05,rew/r_units=-0.00,rew/r_city_tiles=0.00,game/turns=314.00,game/research=60.00,game/unit_count=0.00,game/cart_count=0.00,game/city_count=0.00,game/city_tiles=0.00,game/wood_rate_mined=0.50,game/coal_rate_mined=0.00,game/uranium_rate_mined=0.00
rew/r_total=67.51,rew/r_wood=0.76,rew/r_coal=0.05,rew/r_uranium=0.00,rew/r_research=0.36,rew/r_city_tiles_end=60.00,rew/r_fuel_collected=0.18,rew/r_units=0.15,rew/r_city_tiles=6.00,game/turns=360.00,game/research=145.00,game/unit_count=3.00,game/cart_count=3.00,game/city_count=1.00,game/city_tiles=6.00,game/wood_rate_mined=0.76,game/coal_rate_mined=0.05,game/uranium_rate_mined=0.00
rew/r_total=0.12,rew/r_wood=0.09,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.02,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.01,rew/r_units=0.00,rew/r_city_tiles=0.00,game/turns=72.00,game/research=9.00,game/unit_count=0

rew/r_total=0.18,rew/r_wood=0.15,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.03,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.00,rew/r_units=0.00,rew/r_city_tiles=0.00,game/turns=155.00,game/research=13.00,game/unit_count=0.00,game/cart_count=0.00,game/city_count=0.00,game/city_tiles=0.00,game/wood_rate_mined=0.15,game/coal_rate_mined=0.00,game/uranium_rate_mined=0.00
rew/r_total=56.30,rew/r_wood=0.92,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.23,rew/r_city_tiles_end=50.00,rew/r_fuel_collected=0.11,rew/r_units=0.05,rew/r_city_tiles=5.00,game/turns=233.00,game/research=90.00,game/unit_count=1.00,game/cart_count=0.00,game/city_count=1.00,game/city_tiles=5.00,game/wood_rate_mined=0.92,game/coal_rate_mined=0.00,game/uranium_rate_mined=0.00
rew/r_total=0.88,rew/r_wood=0.50,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.24,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.14,rew/r_units=-0.00,rew/r_city_tiles=0.00,game/turns=351.00,game/research=97.00,game/unit_count=

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 503         |
|    ep_rew_mean          | 14.5        |
| time/                   |             |
|    fps                  | 149         |
|    iterations           | 13          |
|    time_elapsed         | 1421        |
|    total_timesteps      | 212992      |
| train/                  |             |
|    approx_kl            | 0.009438872 |
|    clip_fraction        | 0.126       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.7        |
|    explained_variance   | 0.412       |
|    learning_rate        | 0.01        |
|    loss                 | 12.6        |
|    n_updates            | 120         |
|    policy_gradient_loss | -0.00272    |
|    value_loss           | 26.6        |
-----------------------------------------
rew/r_total=1.41,rew/r_wood=0.95,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.36,rew/r_city_tiles_end=0.00,rew/

rew/r_total=0.13,rew/r_wood=0.07,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.04,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.02,rew/r_units=0.00,rew/r_city_tiles=0.00,game/turns=156.00,game/research=16.00,game/unit_count=0.00,game/cart_count=0.00,game/city_count=0.00,game/city_tiles=0.00,game/wood_rate_mined=0.07,game/coal_rate_mined=0.00,game/uranium_rate_mined=0.00
rew/r_total=45.04,rew/r_wood=0.57,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.13,rew/r_city_tiles_end=40.00,rew/r_fuel_collected=0.09,rew/r_units=0.25,rew/r_city_tiles=4.00,game/turns=197.00,game/research=53.00,game/unit_count=5.00,game/cart_count=0.00,game/city_count=1.00,game/city_tiles=4.00,game/wood_rate_mined=0.57,game/coal_rate_mined=0.00,game/uranium_rate_mined=0.00
rew/r_total=11.17,rew/r_wood=0.11,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.03,rew/r_city_tiles_end=10.00,rew/r_fuel_collected=0.02,rew/r_units=0.00,rew/r_city_tiles=1.00,game/turns=160.00,game/research=14.00,game/unit_count

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 484         |
|    ep_rew_mean          | 14.7        |
| time/                   |             |
|    fps                  | 163         |
|    iterations           | 18          |
|    time_elapsed         | 1799        |
|    total_timesteps      | 294912      |
| train/                  |             |
|    approx_kl            | 0.023539234 |
|    clip_fraction        | 0.231       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.64       |
|    explained_variance   | 0.507       |
|    learning_rate        | 0.01        |
|    loss                 | 1.51        |
|    n_updates            | 170         |
|    policy_gradient_loss | -0.000771   |
|    value_loss           | 3.38        |
-----------------------------------------
rew/r_total=0.17,rew/r_wood=0.12,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.04,rew/r_city_tiles_end=0.00,rew/

rew/r_total=11.28,rew/r_wood=0.19,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.06,rew/r_city_tiles_end=10.00,rew/r_fuel_collected=0.03,rew/r_units=0.00,rew/r_city_tiles=1.00,game/turns=280.00,game/research=23.00,game/unit_count=0.00,game/cart_count=0.00,game/city_count=1.00,game/city_tiles=1.00,game/wood_rate_mined=0.19,game/coal_rate_mined=0.00,game/uranium_rate_mined=0.00
rew/r_total=0.53,rew/r_wood=0.30,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.16,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.07,rew/r_units=-0.00,rew/r_city_tiles=0.00,game/turns=319.00,game/research=65.00,game/unit_count=0.00,game/cart_count=0.00,game/city_count=0.00,game/city_tiles=0.00,game/wood_rate_mined=0.30,game/coal_rate_mined=0.00,game/uranium_rate_mined=0.00
rew/r_total=0.44,rew/r_wood=0.32,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.08,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.03,rew/r_units=0.00,rew/r_city_tiles=0.00,game/turns=233.00,game/research=33.00,game/unit_count=

rew/r_total=0.41,rew/r_wood=0.25,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.12,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.04,rew/r_units=-0.00,rew/r_city_tiles=0.00,game/turns=151.00,game/research=48.00,game/unit_count=0.00,game/cart_count=0.00,game/city_count=0.00,game/city_tiles=0.00,game/wood_rate_mined=0.25,game/coal_rate_mined=0.00,game/uranium_rate_mined=0.00
rew/r_total=0.09,rew/r_wood=0.06,rew/r_coal=0.00,rew/r_uranium=0.00,rew/r_research=0.02,rew/r_city_tiles_end=0.00,rew/r_fuel_collected=0.01,rew/r_units=0.00,rew/r_city_tiles=0.00,game/turns=73.00,game/research=8.00,game/unit_count=0.00,game/cart_count=0.00,game/city_count=0.00,game/city_tiles=0.00,game/wood_rate_mined=0.06,game/coal_rate_mined=0.00,game/uranium_rate_mined=0.00
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 733         |
|    ep_rew_mean          | 15.4        |
| time/                   |             |
|    fps                  |

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 711         |
|    ep_rew_mean          | 26.1        |
| time/                   |             |
|    fps                  | 188         |
|    iterations           | 29          |
|    time_elapsed         | 2526        |
|    total_timesteps      | 475136      |
| train/                  |             |
|    approx_kl            | 0.007736023 |
|    clip_fraction        | 0.0904      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.47       |
|    explained_variance   | 0.538       |
|    learning_rate        | 0.01        |
|    loss                 | 11.3        |
|    n_updates            | 280         |
|    policy_gradient_loss | -0.00489    |
|    value_loss           | 23.7        |
-----------------------------------------
rew/r_total=1.30,rew/r_wood=0.84,rew/r_coal=0.01,rew/r_uranium=0.00,rew/r_research=0.31,rew/r_city_tiles_end=0.00,rew/

In [14]:
# Move the dependent packages into kaggle submissions
!mv luxai2021 kaggle_submissions
!mv stable_baselines3 kaggle_submissions
!rm ./kaggle_submissions/agent_policy.py
!cp agent_policy.py kaggle_submissions

# Copy the agent and model to the submission 
!cp ./agent_policy.py kaggle_submissions
!cp ./models_3/model_3.zip kaggle_submissions

!ls kaggle_submissions

mv: cannot move 'luxai2021' to 'kaggle_submissions/luxai2021': File exists
mv: cannot move 'stable_baselines3' to 'kaggle_submissions/stable_baselines3': File exists
agent_policy.py		  luxai2021  main_lux-ai-2021.py  model_3.zip
download_dependencies.py  main.py    model.zip		  stable_baselines3


In [15]:
!tar -czf submission_2.tar.gz -C kaggle_submissions .
!ls

DQN_training.ipynb  models
__pycache__	    models_2
agent_policy.py     models_3
kaggle_submissions  reinforcement-learning-openai-ppo-with-python-game.ipynb
log.txt		    stable_baselines3
lux_tensorboard     submission.tar.gz
lux_tensorboard2    submission_2.tar.gz
lux_tensorboard3    train.py
luxai2021
