In [1]:
# Start writing code here...

In [2]:
!pip install flatland-rl
!pip install tensorforce


Collecting msgpack==0.6.1
  Using cached msgpack-0.6.1-cp37-cp37m-manylinux1_x86_64.whl (245 kB)
Processing /root/.cache/pip/wheels/32/34/78/36550f249167fda9e42e1dd9af84b400abf6c162d1c07ab4e1/gym-0.14.0-py3-none-any.whl
Collecting pyglet<=1.3.2,>=1.2.0
  Using cached pyglet-1.3.2-py2.py3-none-any.whl (1.0 MB)
[31mERROR: tensorforce 0.6.3 has requirement gym>=0.18.0, but you'll have gym 0.14.0 which is incompatible.[0m
[31mERROR: tensorforce 0.6.3 has requirement msgpack>=1.0.2, but you'll have msgpack 0.6.1 which is incompatible.[0m
Installing collected packages: msgpack, pyglet, gym
  Attempting uninstall: msgpack
    Found existing installation: msgpack 1.0.2
    Uninstalling msgpack-1.0.2:
      Successfully uninstalled msgpack-1.0.2
  Attempting uninstall: pyglet
    Found existing installation: pyglet 1.5.0
    Uninstalling pyglet-1.5.0:
      Successfully uninstalled pyglet-1.5.0
  Attempting uninstall: gym
    Found existing installation: gym 0.18.0
    Uninstalling gym-0.18

In [3]:
from tensorforce import Agent, Environment

In [21]:
from flatland.envs.rail_env import RailEnv, RailEnvActions
import numpy as np


class OurEnv(RailEnv):
    def reset(self, *args, render=True, **kwargs):
        observation, info_dict = super().reset(*args, **kwargs)
       #self.env_renderer = RenderTool(env)
        if render:
            self.step({0: RailEnvActions.MOVE_FORWARD})
            render_env(self)
        return observation

class TensorforceEnv(Environment):
    def __init__(self, rail_env, agent):
        self._rail_env = rail_env
        self.num_agent = agent

        state, _ = self._rail_env.reset()
        self._state = self.process_state(state)
        
        super().__init__()

    def process_state(self, state):
        return np.array(state[0][1]).flatten()

    def process_reward(self, reward):
        reward_sum = 0
        for _, train_reward in reward.items():
            reward_sum += train_reward
        return reward_sum

    def states(self):
        #state = self._rail_env._get_observations()
        #return dict(type='float', shape=(width,height,16))
        #return self._state
        return dict(type="float", min_value=-1000.0, max_value=1000.0, shape=self._state.shape)

    def actions(self):
        return dict(type='int', num_values=len(RailEnvActions), shape=(self.num_agent))

    # Optional additional steps to close environment
    def close(self):
        # Maybe render?
        super().close()

    def reset(self):
        state, info = self._rail_env.reset()
        state = self.process_state(state)
        self._state = state
        #state = np.random.random(size=(8,))
        return state

    def execute(self, actions):
        actions = {index: RailEnvActions(value) for index, value in enumerate(actions)}
        
        state, reward, done, info  = self._rail_env.step(actions)
        terminal = done["__all__"]
        state = self.process_state(state)
        reward = self.process_reward(reward)
        #needed?
        self._state = state
    
        
        return state, terminal, reward 

In [22]:
from flatland.utils.rendertools import RenderTool
import matplotlib.pyplot as plt

def render_env(env, figsize=(8, 8)):
  """Show the environment using matplotlib"""
  env_renderer = RenderTool(env, gl="PILSVG")
  # img is a numpy array
  img = env_renderer.render_env(show=True, return_image=True)

  plt.figure(figsize=figsize)
  plt.imshow(img)
  plt.show()


In [23]:
from flatland.envs.rail_generators import sparse_rail_generator
from flatland.envs.schedule_generators import sparse_schedule_generator
from flatland.envs.rail_env import RailEnv, RailEnvActions
from flatland.envs.rail_generators import complex_rail_generator
from flatland.envs.observations import GlobalObsForRailEnv

seed = 69 #nice 

width = 10 # @param{type: "integer"}
height = 10 # @param{type: "integer"}
agents =  5 # @param{type: "integer"}

WINDOW_LENGTH =   22# @param{type: "integer"}


random_rail_generator = complex_rail_generator(
    nr_start_goal=10, # @param{type:"integer"} number of start and end goals 
                      # connections, the higher the easier it should be for 
                      # the trains
    nr_extra=10, # @param{type:"integer"} extra connections 
                 # (useful for alternite paths), the higher the easier
    min_dist=10,
    max_dist=99999,
    seed=seed
)


env = RailEnv(
    width=width,
    height=height,
    rail_generator=random_rail_generator,
    obs_builder_object=GlobalObsForRailEnv(),
    number_of_agents=agents
)
_ = env.reset()
environment = TensorforceEnv(env, agents)

In [24]:
from tensorforce.agents import DeepQNetwork

agent = DeepQNetwork.create(
    agent='tensorforce',
    environment=environment,  # alternatively: states, actions, (max_episode_timesteps)
    memory=10000,
    update=dict(unit='timesteps', batch_size=64),
    optimizer=dict(type='adam', learning_rate=3e-4),
    policy=dict(network='auto'),
    objective='policy_gradient',
    reward_estimation=dict(horizon=20)
    
)

In [25]:
# Train for 300 episodes
for _ in range(300):

    # Initialize episode
    states = environment.reset()
    terminal = False

    while not terminal:
        # Episode timestep
        actions = agent.act(states=states)
        print(actions)
        states, terminal, reward = environment.execute(actions=actions)
        agent.observe(terminal=terminal, reward=reward)

agent.close()
environment.close()

[0 3 3 0 2]
[4 4 2 0 4]
[1 1 1 4 2]
[3 3 1 3 0]
[1 2 1 1 0]
[1 2 3 2 1]
[3 3 0 2 0]
[3 4 3 3 0]
[0 4 0 1 0]
[1 0 2 2 1]
[1 4 3 4 2]
[4 0 3 0 4]
[4 0 2 1 4]
[3 1 2 3 4]
[0 0 0 2 1]
[3 1 4 0 2]
[0 0 3 1 1]
[3 0 2 3 2]
[4 3 0 2 1]
[3 3 4 0 4]
[4 0 1 0 0]
[1 3 0 0 2]
[2 1 3 0 0]
[3 4 0 3 3]
[0 0 2 3 1]
[1 1 1 2 1]
[4 4 3 3 2]
[4 1 3 0 3]
[0 2 2 4 2]
[4 3 3 4 3]
[1 2 2 0 2]
[3 4 0 4 2]
[4 2 2 1 1]
[3 4 4 2 0]
[4 0 0 2 0]
[1 2 0 1 0]
[3 2 1 3 4]
[2 0 3 3 1]
[4 3 4 4 1]
[3 1 0 1 0]
[1 1 4 0 2]
[2 2 3 3 4]
[4 1 3 2 1]
[1 1 1 0 2]
[0 0 0 0 4]
[2 1 1 4 2]
[2 0 0 2 1]
[0 0 2 1 3]
[0 3 2 2 1]
[1 0 1 3 4]
[1 2 1 0 2]
[3 3 4 4 2]
[4 2 1 4 2]
[4 0 2 2 0]
[4 3 4 0 3]
[2 1 3 2 2]
[1 3 1 2 4]
[1 0 3 4 1]
[3 0 0 0 3]
[1 0 3 0 4]
[1 1 3 1 4]
[2 0 1 1 1]
[2 1 1 1 1]
[3 1 4 0 4]
[2 2 2 2 0]
[4 4 2 0 3]
[2 4 1 1 3]
[2 3 0 3 0]
[2 0 3 4 3]
[3 2 4 3 4]
[1 1 1 4 1]
[3 4 0 0 1]
[3 0 3 1 4]
[4 3 2 4 4]
[0 2 2 2 2]
[4 3 4 3 3]
[4 3 2 0 3]
[4 2 2 3 2]
[4 2 4 0 1]
[3 4 3 0 4]
[4 0 1 1 1]
[1 4 4 3 0]
[0 3 3 0 2]
[4 0

TypeError: 'NoneType' object is not subscriptable

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=c8b2a743-4403-48d9-b1f8-a1215902878c' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>