## Class State

This class was designed to provide an agent the capability to travel between edges on a graph object. The class makes a game out of the task by providing options as an action space and keeping track of current state information.

In [1]:
from polypocket import StreetMap
from polypocket import Enviroment
from polypocket import generate



### Generating a polygon for testing class

In [20]:
center = (-121.885254, 37.335796)
dist = 350 # Meters

# Creates polygon with given dimensions
poly = StreetMap(center, dist)

n_houses = 3 # Houses
n_gasstations = 2 # Gas stations

# Instructs class to generate random coordinate points within polygon
objects = generate(streetmap=poly, houses=n_houses, gasstations=n_gasstations).get('osmids')

# Instantiates enviroment
env = Enviroment(poly.G, objects)

### Class Source Code

In [28]:
import numpy as np

class State():
       
    def __init__(self, enviroment, start_position, 
                 gas_level=20, kpg=18, refuel_at=0.25, gas_price=5, 
                 target_value=100):
        '''
        This class was designed to provide an agent the capability to travel 
        between edges on a graph object. The class makes a game out of the 
        task by providing options as an action space and keeping track of 
        current state information.
        
        params: 
                enviroment:        Generated via polypocket.Enviroment
                start_position:    Node_Id, should be a node available in enviroment
                gas_level:         Default:20, Gallons of gas available to agent
                kpg:               Default:18, Kilometers per gallon
                refuel_at:         Default:0.25, Threshhold percent of gas_level before agent should refuel
                gas_price:         Default:5, Cost per gallon of gas in dollars
                target_value:      Default:100, Reward for visiting a target node
        
        attributes:
                start:                Returns start_position
                distance:             Returns total distance traveled
                position:             Returns current position
                gas:                  Returns current gas_level in gallons
                max_gas:              Returns maximum gas_level in gallons
                kpg:                  Returns fuel efficiency (constant)
                refuel_at:            Returns threshhold percent of max_gas when agent will refuel
                gas_price:            Returns the price of gas per gallon (constant)
                cost:                 Returns the current total cost of the route
                route:                Returns array of shape(s,) for each state; [position|state(by index)]
                targets:              Returns array of shape(n,); [target]
                gasstations:          Returns array of shape(n,); [gasstation]
                remaining_targets:    Returns array of shape(n,); [remaining_target]
                target_value:         Returns the reward for visiting a target
                env:                  Returns array of shape(n, 5); 
                                          [edgeStart, edgeEnd, length, bool_is_remaining_target*1, bool_is_gasstation*1]
                choices:              Returns action space as array of shape(n, 4); [choice_node, edge_attribudes]
                
        methods: 
                gas_percent(self):               Returns current level of gas as percent of maximum
                choices(self):                   Returns available choices as rows in an array
                set_position(self, position):    Functional-Changes current position of agent to provided position
                                                 if position is valid choice
        '''
        # Validates start position
        if start_position not in np.array([enviroment.nodes[:,0], enviroment.norm_nodes[:,0]]):
            raise ValueError('start_position not within map.')
        
        self.start     = start_position
        self.distance  = 0 # Total distance travelled
        
        self.position  = start_position
        
        self.gas       = gas_level
        self.max_gas   = gas_level
        self.kpg       = kpg
        self.refuel_at = refuel_at
        
        self.gas_price = gas_price
        self.cost      = 0
        
        self.route       = np.array(start_position)    
        self.targets     = enviroment.norm_houses
        self.gasstations = enviroment.norm_gasstations
        
        self.remaining_targets = enviroment.norm_houses
        self.target_value = target_value
        self.env = enviroment.norm_flagged_edges
        
        self.choices = self.env[self.env[:,0]==self.position][:,1:]
        
        
    def gas_percent(self):
        return self.gas/self.max_gas
    
    
    def set_position(self, position):
        
        # Updates targets and env if position is target
        def update_targets(self):
            mask = np.where(self.remaining_targets == position) # Mask for updating targets
            self.remaining_targets = np.delete(self.remaining_targets, mask) # Updates targets
            self.env[:,3] = np.isin(self.env[:,1], self.remaining_targets)*1 # Updates enviroment
        
        # Refuels if position is gasstation
        def refuel(self):
            if self.gas_percent() <= self.refuel_at: # if gas is <= refuel at percent
                gas_needed = self.max_gas-self.gas # Defines amount of gas needed
                self.cost += gas_needed/self.gas_price # Adds cost of gas needed to route cost
                self.gas   = self.max_gas # Resets gas to mac_gas
   

        # Prevents travelling to current location
        if position == self.position:
            raise AttributeError(f'{position} already occupies the current state.')
        
        # Updates attributes if arg was valid
        if position in self.choices[:,0]:
            length         = self.choices[self.choices[:,0] == position][:,-1]
            self.distance += length
            self.gas      -= length/self.kpg
            self.position  = position
            self.choices   = self.env[self.env[:,0]==self.position][:,1:]
            self.route     = np.append(self.route, position)
        else:
            raise AttributeError(f'Can not travel to {position} from {self.position}. Refer to self.choices.')
        
        # Checks if position is target or gasstation
        if position in self.remaining_targets:
            update_targets(self)
        if position in self.gasstations:
            refuel(self)

### Example Usage

##### Creating an instance

In [22]:
start = 0
state = State(env, start)

##### Reviewing current data

In [23]:
print(f'Position: {state.position}')
print(f'Gas: {state.gas}')
print(f'Max Gas: {state.max_gas}')
print(f'Distance Travelled: {state.distance}')
print(f'Remaining Targets: {state.remaining_targets}')
print(f'Action Space: \n{state.choices}')

Position: 0
Gas: 20
Max Gas: 20
Distance Travelled: 0
Remaining Targets: [ 3  3 15]
Action Space: 
[[ 11.     52.473   0.      0.   ]
 [ 10.     72.482   0.      1.   ]
 [  1.    102.603   0.      0.   ]]


##### Travelling To A Target
via random choices

In [24]:
# Bool mask of choices
choices_is_target = np.isin(state.choices[:,0], state.remaining_targets)
target_nearby = any(choices_is_target)

# Updates position randomly until target is nearby
while not target_nearby:
    choice = np.random.choice(state.choices[:,0])
    state.set_position(choice)
    
    # Bool mask of choices
    choices_is_target = np.isin(state.choices[:,0], state.remaining_targets)
    target_nearby = any(choices_is_target)

# Prints data
print(f'Position: {state.position}')
print(f'Gas: {state.gas}')
print(f'Max Gas: {state.max_gas}')
print(f'Distance Travelled: {state.distance}')
print(f'Remaining Targets: {state.remaining_targets}')
print(f'Action Space: \n{state.choices}')

Position: 14.0
Gas: [19.61111111]
Max Gas: 20
Distance Travelled: [7.]
Remaining Targets: [ 3  3 15]
Action Space: 
[[ 22.     61.129   0.      0.   ]
 [ 15.    109.162   1.      0.   ]]


With the above status report, we can see that the nearby_target is 15. A mask can be used to select this, but I will do so manually below for a demonstration of how the status reprort updates remaining targets. Please keep in mind, the state does not currently end an episode if gas has run out. 

In [26]:
state.set_position(15)

# Prints data
print(f'Position: {state.position}')
print(f'Gas: {state.gas}')
print(f'Max Gas: {state.max_gas}')
print(f'Distance Travelled: {state.distance}')
print(f'Remaining Targets: {state.remaining_targets}')
print(f'Action Space: \n{state.choices}')

Position: 15
Gas: [19.61111111]
Max Gas: 20
Distance Travelled: [7.]
Remaining Targets: [3 3]
Action Space: 
[[  3.     27.169   1.      0.   ]
 [ 14.    109.162   0.      0.   ]]


You can also review the route taken thus far easily using the route attribute

In [27]:
state.route

array([ 0.,  1.,  0., 10.,  9., 10.,  9., 10.,  7., 16.,  8., 16.,  8.,
       16.,  7., 16.,  8.,  1.,  2.,  1.,  0.,  1.,  0., 10.,  7., 16.,
        8., 16.,  8., 16.,  7., 16.,  7., 16.,  8.,  1.,  2.,  1.,  2.,
       19., 20., 19.,  2.,  1.,  0., 11.,  0.,  1.,  0.,  1.,  2.,  1.,
        0., 11., 12., 11.,  0.,  1.,  2., 19., 20., 19., 20., 19.,  2.,
        1.,  0., 10.,  7., 16.,  8.,  1.,  2., 19., 13., 19.,  2.,  1.,
        2.,  1.,  2.,  1.,  0., 10.,  7., 16., 17., 16., 17., 16.,  8.,
        1.,  0., 11., 12., 11., 12., 11., 12., 11.,  0., 10.,  7., 16.,
        7., 16., 17., 16.,  8.,  1.,  0., 11., 12., 11., 12., 11., 12.,
       11.,  0., 11.,  0.,  1.,  2., 19.,  2., 19., 13., 22., 21., 22.,
       14., 15.])