In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import py_trees
import logging
from deep_traffic.libtraffic import env
from behavior_tree.agent import BasicBehaviorTreeAgent, GeneticProgrammingBehaviorTreeAgent
from behavior_tree.condition_checker import BasicConditionChecker, GeneticProgrammingConditionChecker
from behavior_tree.tree import BasicBehaviorTree, GeneticProgrammingBehaviorTree, ConditionSequenceNode, IntersectConditionSequenceNode
from train import train_GP_BT
from runner import play_episode
from config import *

In [3]:
logging.basicConfig(level=logging.DEBUG)

##### Environment

In [4]:
e = env.DeepTraffic(lanes_side=LANES_SIDE, 
                    patches_ahead=PATCHES_AHEAD,
                    patches_behind=PATCHES_BEHIND, 
                    history=HISTORY, 
                    obs=OBS)
obs_shape = e.obs_shape
print(obs_shape)

(7, 3, 20)


In [5]:
state = e.reset()
print(state.shape)

(7, 3, 20)


In [6]:
print(state[0].T)

[[0.0325 1.     1.    ]
 [1.     1.     1.    ]
 [1.     1.     1.    ]
 [1.     1.     1.    ]
 [1.     1.     1.    ]
 [1.     1.     1.    ]
 [1.     1.     1.    ]
 [1.     1.     1.    ]
 [1.     1.     1.    ]
 [1.     1.     1.    ]
 [1.     1.     1.    ]
 [0.0325 1.     1.    ]
 [0.0325 1.     1.    ]
 [0.0325 1.     1.    ]
 [0.0325 1.     1.    ]
 [1.     0.04   1.    ]
 [1.     0.04   1.    ]
 [1.     0.04   1.    ]
 [1.     0.04   1.    ]
 [1.     1.     1.    ]]


##### ConditionChecker

In [7]:
condition_checker = GeneticProgrammingConditionChecker()
condition_checker.update_state(state[0])

In [8]:
print(condition_checker.state.shape)

(3, 20)


In [9]:
print(condition_checker.state.T)

[[0.0325 1.     1.    ]
 [1.     1.     1.    ]
 [1.     1.     1.    ]
 [1.     1.     1.    ]
 [1.     1.     1.    ]
 [1.     1.     1.    ]
 [1.     1.     1.    ]
 [1.     1.     1.    ]
 [1.     1.     1.    ]
 [1.     1.     1.    ]
 [1.     1.     1.    ]
 [0.0325 1.     1.    ]
 [0.0325 1.     1.    ]
 [0.0325 1.     1.    ]
 [0.0325 1.     1.    ]
 [1.     0.04   1.    ]
 [1.     0.04   1.    ]
 [1.     0.04   1.    ]
 [1.     0.04   1.    ]
 [1.     1.     1.    ]]


##### BTAgent

In [10]:
agent = GeneticProgrammingBehaviorTreeAgent()

In [11]:
# display tree
agent.display_tree()

[o] root
    [-] InitRoot
        [-] SafetyNode
            --> SafetyCond
            --> SafetyAct
        --> LearnAct



In [12]:
# tick
agent.update_condition_checker(state[0])
agent.update_blackboard()
print(agent.blackboard)

Blackboard Client
  Client Data
    name              : root
    namespace         : /
    unique_identifier : f93b7c55-16cb-4b77-8a64-4bd94ae50840
    read              : {'/action'}
    write             : {'/cell_condition', '/enable_learning', '/speed'}
    exclusive         : set()
  Variables
    /action          : -
    /speed           : 80.0
    /cell_condition  : {'0': True, '1': False, '2': False, '3': False, '4': False, '5': False, '6': False, '7': False, '8': False, '9': False, '10': False, '11': True, '12': True, '13': True, '14': True, '15': False, '16': False, '17': False, '18': False, '19': False, '20': False, '21': False, '22': False, '23': False, '24': False, '25': False, '26': False, '27': False, '28': False, '29': False, '30': False, '31': False, '32': False, '33': False, '34': False, '35': True, '36': True, '37': True, '38': True, '39': False, '40': False, '41': False, '42': False, '43': False, '44': False, '45': False, '46': False, '47': False, '48': False, '49':

##### Training

In [19]:
%%time
#py_trees.logging.level = py_trees.logging.Level.DEBUG
train_GP_BT(e, agent, episodes=10, steps=200)

INFO:root:Episode 1/10
INFO:root:Episode 2/10
INFO:root:Episode 3/10
INFO:root:step 1/200: Learn Action triggered
INFO:root:step 1/200: no single action found, resort to idle action
INFO:root:step 2/200: Learn Action triggered
INFO:root:step 2/200: new action sets, appended as a new Behavior
INFO:root:step 4/200: Learn Action triggered
INFO:root:step 4/200: duplicated action sets, intersect conditions with an existed Behavior
INFO:root:Episode 4/10
INFO:root:step 21/200: Learn Action triggered
INFO:root:step 21/200: new action sets, appended as a new Behavior
INFO:root:Episode 5/10
INFO:root:step 1/200: Learn Action triggered
INFO:root:step 1/200: no single action found, resort to idle action
INFO:root:step 2/200: Learn Action triggered
INFO:root:step 2/200: duplicated action sets, intersect conditions with an existed Behavior
INFO:root:Episode 6/10
INFO:root:Episode 7/10
INFO:root:step 35/200: Learn Action triggered
INFO:root:step 35/200: new action sets, appended as a new Behavior
IN

Wall time: 30.9 s


In [18]:
# display tree
agent.display_tree()

[o] root
    [-] Behavior8
        [-] Cond229
            --> F0
            --> F1
            --> F2
            --> F3
            --> F4
            --> F5
            --> F6
            --> F7
            --> F8
            --> F9
            --> F10
            --> F11
            --> F12
            --> F13
            --> F14
            --> F15
            --> F16
            --> F17
            --> F18
            --> F19
            --> T20
            --> T21
            --> F22
            --> F23
            --> F24
            --> F25
            --> F26
            --> F27
            --> F28
            --> F29
            --> F30
            --> F31
            --> F32
            --> F33
            --> F34
            --> T35
            --> T36
            --> T37
            --> T38
            --> F39
            --> F40
            --> F41
            --> F42
            --> F43
            --> F44
            --> F45
            --> F46
            --> F47
   

##### Play Episode

In [22]:
agent.blackboard.enable_learning = False
play_episode(e, agent, verbose=True, reset=True)

Step: 0
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

X X _ _ X X X X _ _ _ _ _ _ _ O O O O _ 

_ _ _ X X X X _ _ _ X X X X _ _ _ _ _ _ 

Step: 0
noAction 80 3
Reward: 1.0
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

X X _ _ _ X X X X _ _ _ _ _ _ O O O O _ 

_ _ _ X X X X _ _ _ _ X X X X _ _ _ _ _ 

**************************************************

Step: 1
noAction 80 3
Reward: 1.0
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

X X _ _ _ _ X X X X _ _ _ _ _ O O O O _ 

_ _ _ X X X X _ _ _ _ X X X X _ _ _ _ _ 

**************************************************

Step: 2
action change: noAction -> decelerate
decelerate 79 3
Reward: 0.95
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

X X X _ _ _ X X X X _ _ _ _ _ O O O O _ 

_ _ _ _ X X X X _ _ _ X X X X _ _ _ _ _ 

**************************************************

Step: 3
decelerate 78 3
Reward: 0.9
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 

X X X _ _ _ _ X X X X _ _ _ _ O O O O _ 

_ _ _ _ X X X X _ _ _ _ X X X X _ _ _ _ 

***************************

35.88