In [1]:
from treelib import Node, Tree
import numpy as np
import tqdm
import time

# Explorer Class

In [2]:
class Explorer():
    def __init__(self, viable_probability=0.8, success_probability=0.01):
        self.tree = Tree()
        self.tree.create_node(identifier="root", data={})
        self.current_position = self.tree["root"]
        self.possible_actions = [1, 2, 3]
        self.terminated = False
        
        self.viable_probability = viable_probability
        self.success_probability = success_probability
        self.depth = 0
        
    def explore_node(self):
        explored_nodes = self.tree.children(self.current_position.identifier)
        tried_actions = [node.data["action"] for node in explored_nodes]
        local_possible_actions = []
        for action in self.possible_actions:
            if action not in tried_actions:
                local_possible_actions.append(action)
                
        # Explore unexplored nodes
        for action in local_possible_actions:
            self.explore_leaf(action)
        
        best_perf = -np.inf
        future_node = None
        children = self.tree.children(self.current_position.identifier)
        
        # Evaluate all children nodes
        for child in children:
            
            # Check for success
            if child.data["success"]:
                self.terminated = True
                future_node = child
                break
            
            # Pick the best one
            if child.data["perf"] > best_perf and child.data["viable"]:
                best_perf = child.data["perf"]
                future_node = child
                
                
        # Come back if no child is viable
        if future_node is None:
            self.current_position.data["viable"] = False
            
            # Check for root
            if self.current_position.identifier == "root":
                self.terminated = True
            else:
                self.current_position = self.tree.parent(self.current_position.identifier)
                self.depth = self.depth - 1
            
        else:    
            self.current_position = future_node
            self.depth = self.depth + 1
        
            
    def explore_leaf(self, action):        
        data = {"action": action, 
                "perf": np.random.random(), 
                "viable": np.random.random() < self.viable_probability, 
                "success": np.random.random() < self.success_probability}
        self.tree.create_node(parent=self.current_position, data=data)
        
    def reconstruct_trajectory(self):
        reconstruct_position = self.current_position
        
        if self.terminated:
            action_list = []
            
            while reconstruct_position.identifier != "root":
                action_list.append(reconstruct_position.data["action"])
                reconstruct_position = self.tree.parent(reconstruct_position.identifier)
                
            action_list.reverse()
            return action_list

In [3]:
explorer = Explorer(viable_probability = 1.0, success_probability=0.01)
while explorer.terminated is False:
    print(explorer.depth)
    explorer.explore_node()
    explorer.viable_probability = explorer.viable_probability/1.02
    
    time.sleep(0.5)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
25
24
23
22
21
22
23
24
25
26
25
24
25
26
27
26
25
24
23
22
21
22
21
20
19
20
21
22
23
24
23
24
25
26
27
26
25
24
23
22
21
20
19
18
19
20
19
18
19
20
21
22
21
22
21
20
21
20
19
18
17
18
17
16
17
18
17
16
15
14
15
16
17


In [4]:
explorer.reconstruct_trajectory()

[1, 2, 2, 2, 3, 2, 3, 2, 1, 2, 3, 3, 2, 3, 1, 3, 1, 3]