In [1]:
import logging
import platform, psutil
import math

from fimdpenv import setup
from fimdpenv.UUVEnv import SingleAgentEnv
from fipomdp import ConsPOMDP
from fipomdp.pomcp import OnlineStrategy

setup()
logging_level = logging.WARNING

## UUV POMDP Single Agent

Run following 2 cells for logging

In [2]:
log_file_name = "UUVExperiments"  # Change for your needs
logging_level = logging.INFO # set to INFO for logging to be active

In [3]:
logging.basicConfig(filename=f"{log_file_name}.log",
                    filemode='w',  # Erase previous log
                    format='%(asctime)s %(levelname)-8s %(message)s',
                    level=logging.INFO,
                    datefmt='%Y-%m-%d %H:%M:%S')
logging.info('START')

Device logs

In [4]:
uname = platform.uname()
logging.info(f"Node name: {uname.node}")
logging.info(f"System: {uname.system}")
logging.info(f"Release: {uname.release}")
logging.info(f"Version: {uname.version}")
logging.info(f"Machine: {uname.machine}")
logging.info(f"Processor: {uname.processor}")
logging.info(f"RAM: {str(round(psutil.virtual_memory().total / (1024.0 **3)))} GB")

Node name: simonbrlej-ThinkPad-T14-Gen-1
System: Linux
Release: 5.11.0-44-generic
Version: #48~20.04.2-Ubuntu SMP Tue Dec 14 15:36:44 UTC 2021
Machine: x86_64
Processor: x86_64
RAM: 15 GB


##### Create environment with observations

In [2]:
from fipomdp.environment_utils import set_cross_observations_to_UUV_grid

In [2]:
logging.info('Creating UUV environment with observations.')

env = SingleAgentEnv(grid_size=[20, 20], capacity=20, reloads=[64, 69, 74, 164, 169, 174, 264, 269, 274, 364, 369, 374], targets=[103, 209, 210, 270], init_state=399, enhanced_actionspace=0)

mdp, targets = env.get_consmdp()
mdp.__class__ = ConsPOMDP
set_cross_observations_to_UUV_grid(mdp, (env.grid_size[0], env.grid_size[1]))

logging.info('Environment created')

NameError: name 'logging' is not defined

##### Calculate belief support cmdp and guessing cmdp

In [None]:
cpomdp = mdp
capacity = env.capacities[0]
init_energy = capacity
init_obs = 399
init_bel_supp = tuple([399])
exploration = 0.9
random_seed = 1

cpomdp.compute_guessing_cmdp_initial_state([399])
strategy = OnlineStrategy(cpomdp, capacity, init_energy, init_obs, init_bel_supp, targets, exploration, random_seed, False)

In [None]:
strategy.tree.action_shield

In [3]:
import math

env = SingleAgentEnv(grid_size=[2, 2], capacity=20, reloads=[0], targets=[2], init_state=0, enhanced_actionspace=0)

mdp, targets = env.get_consmdp()
mdp.__class__ = ConsPOMDP
set_cross_observations_to_UUV_grid(mdp, (env.grid_size[0], env.grid_size[1]))

cpomdp = mdp
capacity = env.capacities[0]
init_energy = capacity
init_obs = 0
init_bel_supp = tuple([0])
exploration = 0.9
random_seed = 1

strategy = OnlineStrategy(cpomdp, capacity, init_energy, init_obs, init_bel_supp, targets, exploration, random_seed, True)
print(strategy.tree.action_shield)
print(env.capacities[0])
print(init_energy)

[0, -1, -1, -1, -1, -1, -1, -1]
[0, 2, -1, -1, -1, -1, -1, -1]
[0, 2, 2, -1, -1, -1, -1, -1]
[0, 2, 2, 4, -1, -1, -1, -1]
[0, 2, 2, 4, -1, -1, -1, -1]
[0, 2, 2, 4, 4, -1, -1, -1]
[0, 2, 2, 4, 4, -1, -1, -1]
[0, 2, 2, 4, 4, 4, -1, -1]
[0, 2, 2, 4, 4, 4, -1, -1]
[0, 2, 2, 4, 4, 4, 4, -1]
[0, 2, 2, 4, 4, 4, 4, -1]
[0, 2, 2, 4, 4, 4, 4, -1]
[0, 2, 2, 4, 4, 4, 4, -1]
[0, 2, 2, 4, 4, 4, 4, -1]
[0, 2, 2, 4, 4, 4, 4, -1]
[0, 2, 2, 4, 4, 4, 4, 4]
[0, 2, 2, 4, 4, 4, 4, 4]
[0, 2, 2, 4, 4, 4, 4, 4]
[0, 2, 2, 4, 4, 4, 4, 4]
[0, 2, 2, 4, 4, 4, 4, 4]
[0, 2, 2, 4, 4, 4, 4, 4]
[(4, 0——Strong East[2]——>{1: 1.0}), (2, 0——Strong North[2]——>{0: 1.0}), (4, 0——Strong South[2]——>{2: 1.0}), (2, 0——Strong West[2]——>{0: 1.0}), (5, 0——Weak East[1]——>{1: 0.3333, 2: 0.3333, 3: 0.3334}), (5, 0——Weak North[1]——>{1: 0.3333, 2: 0.3333, 3: 0.3334}), (5, 0——Weak South[1]——>{1: 0.3333, 2: 0.3333, 3: 0.3334}), (5, 0——Weak West[1]——>{1: 0.3333, 2: 0.3333, 3: 0.3334}), (4, 1——Strong East[2]——>{1: 1.0}), (4, 1——Strong North[2

In [5]:
from fimdp.objectives import BUCHI, SAFE, POS_REACH
strategy.solver.guess_min_levels[BUCHI]



[0, 2, 2, 4, 4, 4, inf, 4, inf, 4, inf, 4, inf, 4, inf, 4, 4, 4, inf, inf, inf]

In [4]:
strategy.next_action(10)


ALL BELIEF SUPPS: [(1,), (2,), (1, 2)]
BELIEF SUPP ACITON: 0——Weak South[1]——>{1: 0.3333, 2: 0.3333, 3: 0.3334}, SRC BELIEF: [0]
OBS_DISTR: {2: 0.95, 3: 0.05}
MATCHING STATE ACITON: 0——Weak South[1]——>{1: 0.2, 2: 0.8}
PICKED BELIEF SUPP: (2,)

ALL BELIEF SUPPS: [(1,)]
BELIEF SUPP ACITON: 0——Strong East[2]——>{1: 1.0}, SRC BELIEF: [0]
OBS_DISTR: {1: 0.95, 3: 0.05}
MATCHING STATE ACITON: 0——Strong East[2]——>{1: 1.0}
PICKED BELIEF SUPP: (1,)

ALL BELIEF SUPPS: [(1,), (2,), (1, 2)]
BELIEF SUPP ACITON: 0——Weak East[1]——>{1: 0.3333, 2: 0.3333, 3: 0.3334}, SRC BELIEF: [0]
OBS_DISTR: {2: 0.95, 3: 0.05}
MATCHING STATE ACITON: 0——Weak East[1]——>{1: 0.65, 2: 0.35}
PICKED BELIEF SUPP: (2,)

ALL BELIEF SUPPS: [(1,), (2,), (1, 2)]
BELIEF SUPP ACITON: 0——Weak West[1]——>{1: 0.3333, 2: 0.3333, 3: 0.3334}, SRC BELIEF: [0]
OBS_DISTR: {2: 0.95, 3: 0.05}
MATCHING STATE ACITON: 0——Weak West[1]——>{1: 0.02, 2: 0.98}
PICKED BELIEF SUPP: (2,)

ALL BELIEF SUPPS: [(1,), (2,), (1, 2)]
BELIEF SUPP ACITON: 0——Weak N

0——Strong East[2]——>{1: 1.0}

In [5]:
strategy.update_obs(1)

In [6]:
strategy.next_action(10)


ALL BELIEF SUPPS: [(1,)]
BELIEF SUPP ACITON: 1——Strong North[2]——>{1: 1.0}, SRC BELIEF: [1]
OBS_DISTR: {1: 0.95, 3: 0.05}
MATCHING STATE ACITON: 1——Strong North[2]——>{1: 1.0}
PICKED BELIEF SUPP: (1,)

ALL BELIEF SUPPS: [(1,)]
BELIEF SUPP ACITON: 1——Strong East[2]——>{1: 1.0}, SRC BELIEF: [1]
OBS_DISTR: {1: 0.95, 3: 0.05}
MATCHING STATE ACITON: 1——Strong East[2]——>{1: 1.0}
PICKED BELIEF SUPP: (1,)

ALL BELIEF SUPPS: [(0,), (3,)]
BELIEF SUPP ACITON: 1——Weak North[1]——>{0: 0.5, 4: 0.5}, SRC BELIEF: [1]
OBS_DISTR: {1: 0.05, 2: 0.05, 3: 0.9}
MATCHING STATE ACITON: 1——Weak North[1]——>{0: 0.14, 3: 0.86}
PICKED BELIEF SUPP: (3,)

ALL BELIEF SUPPS: [(0,), (3,)]
BELIEF SUPP ACITON: 1——Weak West[1]——>{0: 0.5, 4: 0.5}, SRC BELIEF: [1]
OBS_DISTR: {1: 0.05, 2: 0.05, 3: 0.9}
MATCHING STATE ACITON: 1——Weak West[1]——>{0: 0.64, 3: 0.36}
PICKED BELIEF SUPP: (3,)

ALL BELIEF SUPPS: [(0,)]
BELIEF SUPP ACITON: 1——Strong West[2]——>{0: 1.0}, SRC BELIEF: [1]
OBS_DISTR: {0: 1.0}
MATCHING STATE ACITON: 1——Strong

1——Strong East[2]——>{1: 1.0}

In [7]:
strategy.update_obs(1)

In [8]:
strategy.next_action(10)


ALL BELIEF SUPPS: [(1,)]
BELIEF SUPP ACITON: 1——Strong North[2]——>{1: 1.0}, SRC BELIEF: [1]
OBS_DISTR: {1: 0.95, 3: 0.05}
MATCHING STATE ACITON: 1——Strong North[2]——>{1: 1.0}
PICKED BELIEF SUPP: (1,)

ALL BELIEF SUPPS: [(1,)]
BELIEF SUPP ACITON: 1——Strong East[2]——>{1: 1.0}, SRC BELIEF: [1]
OBS_DISTR: {1: 0.95, 3: 0.05}
MATCHING STATE ACITON: 1——Strong East[2]——>{1: 1.0}
PICKED BELIEF SUPP: (1,)

ALL BELIEF SUPPS: [(0,), (3,)]
BELIEF SUPP ACITON: 1——Weak North[1]——>{0: 0.5, 4: 0.5}, SRC BELIEF: [1]
OBS_DISTR: {1: 0.05, 2: 0.05, 3: 0.9}
MATCHING STATE ACITON: 1——Weak North[1]——>{0: 0.14, 3: 0.86}
PICKED BELIEF SUPP: (3,)

ALL BELIEF SUPPS: [(0,), (3,)]
BELIEF SUPP ACITON: 1——Weak West[1]——>{0: 0.5, 4: 0.5}, SRC BELIEF: [1]
OBS_DISTR: {1: 0.05, 2: 0.05, 3: 0.9}
MATCHING STATE ACITON: 1——Weak West[1]——>{0: 0.64, 3: 0.36}
PICKED BELIEF SUPP: (3,)

ALL BELIEF SUPPS: [(0,)]
BELIEF SUPP ACITON: 1——Strong West[2]——>{0: 1.0}, SRC BELIEF: [1]
OBS_DISTR: {0: 1.0}
MATCHING STATE ACITON: 1——Strong

1——Strong East[2]——>{1: 1.0}

In [9]:
strategy.update_obs(1)

In [10]:
strategy.next_action(10)


ALL BELIEF SUPPS: [(1,)]
BELIEF SUPP ACITON: 1——Strong North[2]——>{1: 1.0}, SRC BELIEF: [1]
OBS_DISTR: {1: 0.95, 3: 0.05}
MATCHING STATE ACITON: 1——Strong North[2]——>{1: 1.0}
PICKED BELIEF SUPP: (1,)

ALL BELIEF SUPPS: [(1,)]
BELIEF SUPP ACITON: 1——Strong East[2]——>{1: 1.0}, SRC BELIEF: [1]
OBS_DISTR: {1: 0.95, 3: 0.05}
MATCHING STATE ACITON: 1——Strong East[2]——>{1: 1.0}
PICKED BELIEF SUPP: (1,)

ALL BELIEF SUPPS: [(0,), (3,)]
BELIEF SUPP ACITON: 1——Weak North[1]——>{0: 0.5, 4: 0.5}, SRC BELIEF: [1]
OBS_DISTR: {1: 0.05, 2: 0.05, 3: 0.9}
MATCHING STATE ACITON: 1——Weak North[1]——>{0: 0.14, 3: 0.86}
PICKED BELIEF SUPP: (3,)

ALL BELIEF SUPPS: [(0,), (3,)]
BELIEF SUPP ACITON: 1——Weak West[1]——>{0: 0.5, 4: 0.5}, SRC BELIEF: [1]
OBS_DISTR: {1: 0.05, 2: 0.05, 3: 0.9}
MATCHING STATE ACITON: 1——Weak West[1]——>{0: 0.64, 3: 0.36}
PICKED BELIEF SUPP: (3,)

ALL BELIEF SUPPS: [(0,)]
BELIEF SUPP ACITON: 1——Strong West[2]——>{0: 1.0}, SRC BELIEF: [1]
OBS_DISTR: {0: 1.0}
MATCHING STATE ACITON: 1——Strong

1——Strong East[2]——>{1: 1.0}

In [11]:
strategy.update_obs(1)

In [12]:
strategy.next_action(10)


ALL BELIEF SUPPS: [(1,)]
BELIEF SUPP ACITON: 1——Strong North[2]——>{1: 1.0}, SRC BELIEF: [1]
OBS_DISTR: {1: 0.95, 3: 0.05}
MATCHING STATE ACITON: 1——Strong North[2]——>{1: 1.0}
PICKED BELIEF SUPP: (1,)

ALL BELIEF SUPPS: [(1,)]
BELIEF SUPP ACITON: 1——Strong East[2]——>{1: 1.0}, SRC BELIEF: [1]
OBS_DISTR: {1: 0.95, 3: 0.05}
MATCHING STATE ACITON: 1——Strong East[2]——>{1: 1.0}
PICKED BELIEF SUPP: (1,)

ALL BELIEF SUPPS: [(0,), (3,)]
BELIEF SUPP ACITON: 1——Weak North[1]——>{0: 0.5, 4: 0.5}, SRC BELIEF: [1]
OBS_DISTR: {1: 0.05, 2: 0.05, 3: 0.9}
MATCHING STATE ACITON: 1——Weak North[1]——>{0: 0.14, 3: 0.86}
PICKED BELIEF SUPP: (3,)

ALL BELIEF SUPPS: [(0,), (3,)]
BELIEF SUPP ACITON: 1——Weak West[1]——>{0: 0.5, 4: 0.5}, SRC BELIEF: [1]
OBS_DISTR: {1: 0.05, 2: 0.05, 3: 0.9}
MATCHING STATE ACITON: 1——Weak West[1]——>{0: 0.64, 3: 0.36}
PICKED BELIEF SUPP: (3,)

ALL BELIEF SUPPS: [(0,)]
BELIEF SUPP ACITON: 1——Strong West[2]——>{0: 1.0}, SRC BELIEF: [1]
OBS_DISTR: {0: 1.0}
MATCHING STATE ACITON: 1——Strong

1——Strong East[2]——>{1: 1.0}

In [13]:
strategy.update_obs(1)

In [14]:
strategy.next_action(10)


ALL BELIEF SUPPS: [(1,)]
BELIEF SUPP ACITON: 1——Strong North[2]——>{1: 1.0}, SRC BELIEF: [1]
OBS_DISTR: {1: 0.95, 3: 0.05}
MATCHING STATE ACITON: 1——Strong North[2]——>{1: 1.0}
PICKED BELIEF SUPP: (1,)

ALL BELIEF SUPPS: [(1,)]
BELIEF SUPP ACITON: 1——Strong East[2]——>{1: 1.0}, SRC BELIEF: [1]
OBS_DISTR: {1: 0.95, 3: 0.05}
MATCHING STATE ACITON: 1——Strong East[2]——>{1: 1.0}
PICKED BELIEF SUPP: (1,)

ALL BELIEF SUPPS: [(0,), (3,)]
BELIEF SUPP ACITON: 1——Weak North[1]——>{0: 0.5, 4: 0.5}, SRC BELIEF: [1]
OBS_DISTR: {1: 0.05, 2: 0.05, 3: 0.9}
MATCHING STATE ACITON: 1——Weak North[1]——>{0: 0.14, 3: 0.86}
PICKED BELIEF SUPP: (3,)

ALL BELIEF SUPPS: [(0,), (3,)]
BELIEF SUPP ACITON: 1——Weak West[1]——>{0: 0.5, 4: 0.5}, SRC BELIEF: [1]
OBS_DISTR: {1: 0.05, 2: 0.05, 3: 0.9}
MATCHING STATE ACITON: 1——Weak West[1]——>{0: 0.64, 3: 0.36}
PICKED BELIEF SUPP: (3,)

ALL BELIEF SUPPS: [(0,)]
BELIEF SUPP ACITON: 1——Strong West[2]——>{0: 1.0}, SRC BELIEF: [1]
OBS_DISTR: {0: 1.0}
MATCHING STATE ACITON: 1——Strong

ValueError: Couldn't match observation, for belief_supps: [(3,)] and state: 1

In [3]:
strategy.tree.cpomdp.get_state_obs_probs(1)

NameError: name 'strategy' is not defined

In [None]:
from collections import deque

queue = deque()
queue.append(strategy.tree.root)
while len(queue) > 0:
    node = queue.popleft()
    for child in node.children:
        queue.append(child)
        print(child.visits, child.val)
        print()

In [None]:
from fipomdp.energy_solvers import ConsPOMDPBasicES

solver = ConsPOMDPBasicES(cpomdp, [0], capacity, targets, False)
solver.compute_posreach()
solver.compute_buchi()

In [None]:


solver.compute_safe()
solver.guess_min_levels[SAFE]