In [1]:
# Dependencies
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from utils.config import load_config_nb

sns.set('notebook', font_scale=1.1, rc={'figure.figsize': (10, 3)})
sns.set_style('ticks', rc={'figure.facecolor': 'none', 'axes.facecolor': 'none'})
%config InlineBackend.figure_format = 'svg'

import yaml
from nocturne.envs.base_env import BaseEnv

### Settings

In [2]:
env_config = load_config_nb("env_config")

env_config.data_path = "../data_lp/"
env_config.subscriber.use_observations = False
env_config.subscriber.use_ego_state = False
env_config.subscriber.use_current_position = True
env_config.dt = 0.1 # Default is 0.1
env_config.normalize_state = False

# Action space
env_config.accel_discretization = 1
env_config.steering_discretization = 3
env_config.accel_lower_bound = 0.0
env_config.accel_upper_bound = 0.0
env_config.steering_lower_bound = -0.5
env_config.steering_upper_bound = 0.5

In [3]:
# Make environment
env = BaseEnv(env_config)

### Linear Programming

- TODO: Debug transitions. 

In [4]:
# Make environment
env = BaseEnv(env_config)
env.reset()

{3: array([ 9.03282031e+03, -2.71831323e+03,  1.10296458e-01,  0.00000000e+00])}

In [6]:
env.actions_to_idx

{(0.0, -0.5): [0], (0.0, 0.0): [1], (0.0, 0.5): [2]}

In [5]:
from itertools import combinations_with_replacement

# Constants
NUM_REP_ACTION = 1
AGENT_ID = 3

# Reset environment and get initial state
init_state = env.reset()[AGENT_ID]

# Get maximum number of actions
max_actions = 5
print(f"Max actions: {max_actions}")


# Define number of states, reward, transition matrix, and discount factor
num_actions = env.action_space.n
num_states = num_actions ** max_actions
states = {idx: None for idx in range(num_states)}
reward = np.full(fill_value=np.nan, shape=(num_states, ))
transition = np.zeros(shape=(num_states, num_actions, num_states))
gamma = 1.0

# Set initial state and reward
states[0] = init_state
reward[0] = 0

# Get all possible action sequences
action_seqs = list(combinations_with_replacement(env.idx_to_actions, r=max_actions))

# Dictionary to store final state reward
final_state_reward = {}

# Iterate through all action sequences
state_idx = 1
for action_seq_idx, action_seq in enumerate(action_seqs):

    # Print progress
    # print("\rAction index: {} / {}".format(action_seq_idx + 1, len(action_seqs)), end="")
    print(f"Action index: {action_seq_idx + 1} / {len(action_seqs)}")
    print(f"Action sequence: {action_seq}")
    
    # Reset environment
    obs = env.reset()
    
    # Step through scene using action sequence
    for action_idx, action in enumerate(action_seq):
        
        # Reset reward for action
        total_reward = 0

        # Repeat action
        for _ in range(NUM_REP_ACTION):
            obs, rew, done, info = env.step({AGENT_ID: action})
            total_reward += rew[AGENT_ID]
            if done['__all__']:  # Stop if agent is done
                final_state_reward[state_idx] = 10 if info[AGENT_ID]['goal_achieved'] else -10
                total_reward += final_state_reward[state_idx]
                break
        
        # Get next state
        next_state = obs[AGENT_ID]

        # Store state, reward, and transition
        states[state_idx] = next_state
        reward[state_idx] = total_reward
        transition[(state_idx - 1) if (action_idx != 0) else 0, action, state_idx] = 1

        # Increment state index
        state_idx += 1

        if done['__all__']:  # Stop if agent is done
            print("GOAL ACHIEVCED!" if info[AGENT_ID]['goal_achieved'] else "Oh no, the car crashed!")
            break
    if not done['__all__']:
        print("The car is not done driving...")
    print("\n")
print("\n")

# # Combine duplicate states
# for state1_idx in range(num_states):
#     state1 = states[state1_idx]
#     for state2_idx in range(state1_idx + 1, num_states):
#         state2 = states[state2_idx]
        
#         # Skip if state2 is None
#         if state2 is None:
#             continue
        
#         # If states are the same, combine them. That is, add the transition probabilities and reward of state2 to state1
#         # and remove state2.
#         if np.all(state1 == state2):
#             transition[state1_idx, :, :] = np.clip(transition[state1_idx, :, :] + transition[state2_idx, :, :], a_min=0, a_max=1)
#             transition[:, :, state1_idx] = np.clip(transition[:, :, state1_idx] + transition[:, :, state2_idx], a_min=0, a_max=1)
#             if state2_idx in final_state_reward:
#                 final_state_reward[state1_idx] = final_state_reward[state2_idx]
#             states[state2_idx] = None

# # Remove duplicate states
# reward = reward[[state is not None for state in states.values()]]
# states = [state for state in states.values() if state is not None]
# transition = transition[:len(states), :, :len(states)]

# Display number of states
print(f"Number of states: {len(states)}")

Max actions: 5
Action index: 1 / 21
Action sequence: (0, 0, 0, 0, 0)
The car is not done driving...


Action index: 2 / 21
Action sequence: (0, 0, 0, 0, 1)
The car is not done driving...


Action index: 3 / 21
Action sequence: (0, 0, 0, 0, 2)
The car is not done driving...


Action index: 4 / 21
Action sequence: (0, 0, 0, 1, 1)
The car is not done driving...


Action index: 5 / 21
Action sequence: (0, 0, 0, 1, 2)
The car is not done driving...


Action index: 6 / 21
Action sequence: (0, 0, 0, 2, 2)
The car is not done driving...


Action index: 7 / 21
Action sequence: (0, 0, 1, 1, 1)
The car is not done driving...


Action index: 8 / 21
Action sequence: (0, 0, 1, 1, 2)
The car is not done driving...


Action index: 9 / 21
Action sequence: (0, 0, 1, 2, 2)
The car is not done driving...


Action index: 10 / 21
Action sequence: (0, 0, 2, 2, 2)
The car is not done driving...


Action index: 11 / 21
Action sequence: (0, 1, 1, 1, 1)
The car is not done driving...


Action index: 12 / 21
Acti

In [85]:
try:
    from pyomo.environ import *
except ModuleNotFoundError:
    !pip install pyomo
    from pyomo.environ import *

In [86]:
# Step 0: Create an instance of the model
model = ConcreteModel()

# Step 1: Define index sets
states_set = list(range(len(states)))
actions_set = list(range(num_actions))

# Step 2: Define the decision 
model.v = Var(states_set, initialize=0)

# Step 3: Define Objective
@model.Objective(sense=minimize)
def objective(m):
    return sum([model.v[s] for s in states_set])

# Step 4: Constraints
@model.Constraint(states_set, actions_set)
def state_value_constraint(m, s, a):
    return model.v[s] >= reward[s] + gamma * sum([transition[s, a, s2] * model.v[s2] for s2 in states_set])

# Step 5: Solve
# results = SolverFactory('cbc').solve(model)
results = SolverFactory('glpk').solve(model)
results.write()

# = Solver Results                                         =
# ----------------------------------------------------------
#   Problem Information
# ----------------------------------------------------------
Problem: 
- Name: unknown
  Lower bound: 22.6284593015338
  Upper bound: 22.6284593015338
  Number of objectives: 1
  Number of constraints: 162
  Number of variables: 54
  Number of nonzeros: 265
  Sense: minimize
# ----------------------------------------------------------
#   Solver Information
# ----------------------------------------------------------
Solver: 
- Status: ok
  Termination condition: optimal
  Statistics: 
    Branch and bound: 
      Number of bounded subproblems: 0
      Number of created subproblems: 0
  Error rc: 0
  Time: 0.0028409957885742188
# ----------------------------------------------------------
#   Solution Information
# ----------------------------------------------------------
Solution: 
- number of solutions: 0
  number of solutions displayed: 0


In [87]:
model.display()

Model unknown

  Variables:
    v : Size=54, Index=v_index
        Key : Lower : Value              : Upper : Fixed : Stale : Domain
          0 :  None :   9.40449722058958 :  None : False : False :  Reals
          1 :  None :   2.34598886628482 :  None : False : False :  Reals
          2 :  None :  0.562390328977552 :  None : False : False :  Reals
          3 :  None :  0.135833511725475 :  None : False : False :  Reals
          4 :  None : 0.0388176533963026 :  None : False : False :  Reals
          5 :  None :  0.019434425027664 :  None : False : False :  Reals
          6 :  None :  0.834157425975136 :  None : False : False :  Reals
          7 :  None :  0.252332584389005 :  None : False : False :  Reals
          8 :  None : 0.0970672279043636 :  None : False : False :  Reals
          9 :  None : 0.0388665033359367 :  None : False : False :  Reals
         10 :  None :  0.019431914374765 :  None : False : False :  Reals
         11 :  None :  0.834000546099583 :  None : Fa

In [88]:
values = np.array([val.value for val in model.v.values()])

In [89]:
opt_path = [0]
for _ in range(15):
    opt_next = np.multiply(transition[opt_path[-1], :, :], values).sum(axis=1).argmax()
    opt_path.append(opt_next)
    print(opt_next)

0
0
0
0
0
0
0
0
0
0
0
0
0
0
0


In [90]:
pd.DataFrame(np.multiply(transition[opt_path[-1], :, :], values))

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,44,45,46,47,48,49,50,51,52,53
0,0.0,2.345989,0.0,0.0,0.0,0.0,0.834157,0.0,0.0,0.0,...,0.0,0.0,0.659011,0.0,0.0,0.0,0.0,0.058112,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [94]:
transition[0, 0, :]

array([0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1.,
       0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0.,
       0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
       1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0.,
       0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0.