In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import py_trees
import logging
from deep_traffic.libtraffic import env
from behavior_tree.agent import BasicBehaviorTreeAgent, GeneticProgrammingBehaviorTreeAgent
from behavior_tree.condition_checker import BasicConditionChecker, GeneticProgrammingConditionChecker
from behavior_tree.tree import BasicBehaviorTree, GeneticProgrammingBehaviorTree, ConditionSequenceNode, IntersectConditionSequenceNode
from train import train_GP_BT
from runner import play_episode
from config import *

In [3]:
logging.basicConfig(level=logging.DEBUG)

##### Environment

In [4]:
e = env.DeepTraffic(lanes_side=LANES_SIDE, 
                    patches_ahead=PATCHES_AHEAD,
                    patches_behind=PATCHES_BEHIND, 
                    history=HISTORY, 
                    obs=OBS)
obs_shape = e.obs_shape
print(obs_shape)

(7, 5, 20)


In [5]:
state = e.reset()
print(state.shape)

(7, 5, 20)


In [6]:
print(state[0].T)

[[1.     1.     1.     1.     1.    ]
 [1.     1.     1.     0.0325 1.    ]
 [1.     1.     1.     0.0325 1.    ]
 [1.     1.     1.     0.0325 0.0325]
 [1.     1.     1.     0.0325 0.0325]
 [1.     1.     1.     1.     0.0325]
 [1.     1.     0.0325 1.     0.0325]
 [1.     1.     0.0325 1.     1.    ]
 [1.     1.     0.0325 1.     1.    ]
 [1.     1.     0.0325 1.     1.    ]
 [1.     0.0325 1.     1.     1.    ]
 [1.     0.0325 1.     1.     1.    ]
 [1.     0.0325 1.     1.     1.    ]
 [1.     0.0325 1.     1.     1.    ]
 [1.     1.     1.     1.     1.    ]
 [1.     1.     0.04   1.     1.    ]
 [1.     1.     0.04   1.     1.    ]
 [1.     1.     0.04   1.     1.    ]
 [1.     1.     0.04   1.     0.0325]
 [1.     1.     1.     1.     0.0325]]


##### ConditionChecker

In [7]:
condition_checker = GeneticProgrammingConditionChecker()
condition_checker.update_state(state[0])

In [8]:
print(condition_checker.state.shape)

(5, 20)


In [9]:
print(condition_checker.state.T)

[[1.     1.     1.     1.     1.    ]
 [1.     1.     1.     0.0325 1.    ]
 [1.     1.     1.     0.0325 1.    ]
 [1.     1.     1.     0.0325 0.0325]
 [1.     1.     1.     0.0325 0.0325]
 [1.     1.     1.     1.     0.0325]
 [1.     1.     0.0325 1.     0.0325]
 [1.     1.     0.0325 1.     1.    ]
 [1.     1.     0.0325 1.     1.    ]
 [1.     1.     0.0325 1.     1.    ]
 [1.     0.0325 1.     1.     1.    ]
 [1.     0.0325 1.     1.     1.    ]
 [1.     0.0325 1.     1.     1.    ]
 [1.     0.0325 1.     1.     1.    ]
 [1.     1.     1.     1.     1.    ]
 [1.     1.     0.04   1.     1.    ]
 [1.     1.     0.04   1.     1.    ]
 [1.     1.     0.04   1.     1.    ]
 [1.     1.     0.04   1.     0.0325]
 [1.     1.     1.     1.     0.0325]]


##### BTAgent

In [10]:
agent = GeneticProgrammingBehaviorTreeAgent()

In [11]:
# display tree
agent.display_tree()

[o] root
    [-] InitRoot
        [o] SafetyNode
            --> SafetyCond
            --> SafetyAct
        --> LearnAct



In [12]:
# tick
agent.update_condition_checker(state[0])
agent.update_blackboard()
print(agent.blackboard)

Blackboard Client
  Client Data
    name              : root
    namespace         : /
    unique_identifier : e95f649a-5b00-49a3-a7e2-14733b19b36d
    read              : {'/action'}
    write             : {'/speed', '/enable_learning', '/cell_condition'}
    exclusive         : set()
  Variables
    /action          : -
    /speed           : 80.0
    /cell_condition  : {'0': False, '1': False, '2': False, '3': False, '4': False, '5': False, '6': False, '7': False, '8': False, '9': False, '10': False, '11': False, '12': False, '13': False, '14': False, '15': False, '16': False, '17': False, '18': False, '19': False, '20': False, '21': False, '22': False, '23': False, '24': False, '25': False, '26': False, '27': False, '28': False, '29': False, '30': True, '31': True, '32': True, '33': True, '34': False, '35': False, '36': False, '37': False, '38': False, '39': False, '40': False, '41': False, '42': False, '43': False, '44': False, '45': False, '46': True, '47': True, '48': True, '49

##### Training

In [None]:
%%time
#py_trees.logging.level = py_trees.logging.Level.DEBUG
train_GP_BT(e, agent, episodes=10, steps=200)

INFO:root:Episode 1/10
INFO:root:step 1/200: Learn Action triggered
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
INFO:root:step 1/200: new action sets, appended as a new Behavior
INFO:root:step 2/200: Learn Action triggered
INFO:root:step 2/200: duplicated action sets, intersect conditions with an existed Behavior
INFO:root:step 3/200: Learn Action triggered
INFO:root:step 3/200: duplicated action sets, intersect conditions with an existed Behavior
INFO:root:step 4/200: Learn Action triggered
INFO:root:step 4/200: duplicated action sets, intersect conditions with an existed Behavior
INFO:root:step 6/200: Learn Action triggered
INFO:root:step 6/200: duplicated action sets, intersect conditions with an existed Behavior
INFO:root:step 7/200: Learn Action triggered
INFO:root:step 7/200: duplicated action sets, intersect conditions with an existed Behavior
INFO:root:step 8/200: Learn Action triggered
INFO:root:step 8/200: duplicated action sets, intersect conditions with an exi

In [None]:
# display tree
agent.display_tree()

##### Play Episode

In [None]:
agent.blackboard.enable_learning = False
play_episode(e, agent, verbose=True, reset=True)