In [1]:
%load_ext autoreload
%autoreload 2


# Adjusting the python path to manage compatability between the notebook and the server
import sys
import os
from pathlib import Path
sys.path.append(os.path.abspath(os.path.join('..')))

import numpy as np
from solver.pomdp import *
from solver.helpers import *
from fractions import Fraction as frac

/Users/marcos/Documents/github_projects/POMDPs/visualizer/notebooks
/Users/marcos/Documents/github_projects/POMDPs/visualizer/notebooks
/Users/marcos/Documents/github_projects/POMDPs/visualizer/solver/utility_functions/generated_models


## Budget 1

5x5 grid with target at the bottom right

In [3]:
budget = 1
gridSize = (5, 5)
target = Node(4, 4)
strategies = list([  
    Strategy({ Action.DOWN: frac(1, 2), Action.RIGHT: frac(1, 2) }) 
])
enumerated_strategies = dict(enumerate(strategies, start=1))

pomdp = POMDP(gridSize=gridSize, target=target, model='grid', budget=budget)

# Assign strategies to the nodes
for node in pomdp.nodes: node.assign_strategy(enumerated_strategies[1], 1)
observations =  {n: n.strategy_id for n in pomdp.nodes if n != target}

# ---------------------------------------------------------
# get one specific utility value
# print(pomdp.utility(enumerated_strategies, assignments)[0])
# ---------------------------------------------------------

combinations_, U = pomdp.generate_points(enumerated_strategies, observations, sections=15, write_to_file=True)
plot_actions_3D(combinations_, U, actions = [Action.DOWN, Action.RIGHT])

# get the probability distribution of the actions that maximizes the utility
max_index = np.argmax(U)
approx = np.around(list(map(float, combinations_[max_index][0])), 2)

print('Max utility value: ')
print(combinations_[max_index][0])
print(approx)
print(U[max_index])

[[0. 0. 1. 0.]
 [0. 1. 0. 0.]] (2, 4)


Max utility value: 
[Fraction(0, 1) Fraction(1, 2) Fraction(1, 2) Fraction(0, 1)]
[0.  0.5 0.5 0. ]
0.15641547861507127


5x5 grid with the target in the bottom middle

In [4]:
budget = 1
gridSize = (5, 5)
target = Node(4, 2)
strategies = list([ 
    Strategy({ Action.DOWN: frac(1, 3), Action.RIGHT: frac(1, 3), Action.LEFT: frac(1, 3) }) 
])
enumerated_strategies = dict(enumerate(strategies, start=1))

pomdp = POMDP(gridSize=gridSize, target=target, model='grid', budget=budget)

# Assign strategies to the nodes
for node in pomdp.nodes: node.assign_strategy(enumerated_strategies[1], 1)
observations =  {n: n.strategy_id for n in pomdp.nodes if n != target}

# ---------------------------------------------------------
# get one specific utility value
# print(pomdp.utility(enumerated_strategies, assignments)[0])
# ---------------------------------------------------------

combinations_, U = pomdp.generate_points(enumerated_strategies, observations, sections=10, write_to_file=True)
plot_actions_4D(combinations_, U, actions = [Action.DOWN, Action.RIGHT, Action.LEFT])

# get the probability distribution of the actions that maximizes the utility
max_index = np.argmax(U)
approx = np.around(list(map(float, combinations_[max_index][0])), 2)

print('Max utility value: ')
print(combinations_[max_index][0])
print(approx)
print(U[max_index]) # 0.07480985633519664

Max utility value: 
[Fraction(0, 1) Fraction(1, 3) Fraction(1, 3) Fraction(1, 3)]
[0.   0.33 0.33 0.33]
0.08


5x5 grid with the target close to the bottom left.

In [5]:
budget = 1
gridSize = (5, 5)
target = Node(4, 1)
strategies = list([ 
    Strategy({ Action.DOWN: frac(1, 3), Action.RIGHT: frac(1, 3), Action.LEFT: frac(1, 3) }) 
])
enumerated_strategies = dict(enumerate(strategies, start=1))

pomdp = POMDP(gridSize=gridSize, target=target, model='grid', budget=budget)

# Assign strategies to the nodes
for node in pomdp.nodes: node.assign_strategy(enumerated_strategies[1], 1)
observations =  {n: n.strategy_id for n in pomdp.nodes if n != target}

# ---------------------------------------------------------
# get one specific utility value
# print(pomdp.utility(enumerated_strategies, assignments)[0])
# ---------------------------------------------------------

combinations_, U = pomdp.generate_points(enumerated_strategies, observations, sections=10, write_to_file=True)
plot_actions_4D(combinations_, U, actions = [Action.DOWN, Action.RIGHT, Action.LEFT])

# get the probability distribution of the actions that maximizes the utility
max_index = np.argmax(U)
approx = np.around(list(map(float, combinations_[max_index][0])), 2)

print('Max utility value: ')
print(combinations_[max_index][0])
print(approx)
print(U[max_index])

Max utility value: 
[Fraction(0, 1) Fraction(1, 9) Fraction(5, 9) Fraction(1, 3)]
[0.   0.11 0.56 0.33]
0.09577670518660968


15x15 grid with the target close to the bottom left.

In [4]:
budget = 1
gridSize = (15, 15)
target = Node(14, 1)
strategies = list([ 
    Strategy({ Action.DOWN: frac(1, 3), Action.RIGHT: frac(1, 3), Action.LEFT: frac(1, 3) }) 
])
enumerated_strategies = dict(enumerate(strategies, start=1))

pomdp = POMDP(gridSize=gridSize, target=target, model='grid', budget=budget)

# Assign strategies to the nodes
for node in pomdp.nodes: node.assign_strategy(enumerated_strategies[1], 1)
observations =  {n: n.strategy_id for n in pomdp.nodes if n != target}

# ---------------------------------------------------------
# get one specific utility value
# print(pomdp.utility(enumerated_strategies, assignments)[0])
# ---------------------------------------------------------

combinations_, U = pomdp.generate_points(enumerated_strategies, observations, sections=40, write_to_file=True)
plot_actions_4D(combinations_, U, actions = [Action.DOWN, Action.RIGHT, Action.LEFT])

# get the probability distribution of the actions that maximizes the utility
max_index = np.argmax(U)
approx = np.around(list(map(float, combinations_[max_index][0])), 2)

print('Max utility value: ')
print(combinations_[max_index][0])
print(approx)
print(U[max_index])

Max utility value: 
[Fraction(0, 1) Fraction(1, 13) Fraction(19, 39) Fraction(17, 39)]
[0.   0.08 0.49 0.44]
0.036534377905936506


25x25 grid with the target close to the bottom left.

In [12]:
budget = 1
gridSize = (25, 25)
target = Node(24, 1)
strategies = list([ 
    Strategy({ Action.DOWN: frac(1, 3), Action.RIGHT: frac(1, 3), Action.LEFT: frac(1, 3) }) 
])
enumerated_strategies = dict(enumerate(strategies, start=1))

pomdp = POMDP(gridSize=gridSize, target=target, model='grid', budget=budget)

# Assign strategies to the nodes
for node in pomdp.nodes: node.assign_strategy(enumerated_strategies[1], 1)
observations =  {n: n.strategy_id for n in pomdp.nodes if n != target}

# ---------------------------------------------------------
# get one specific utility value
# print(pomdp.utility(enumerated_strategies, assignments)[0])
# ---------------------------------------------------------

combinations_, U = pomdp.generate_points(enumerated_strategies, observations, sections=15, write_to_file=True)
plot_actions_4D(combinations_, U, actions = [Action.DOWN, Action.RIGHT, Action.LEFT])

# get the probability distribution of the actions that maximizes the utility
max_index = np.argmax(U)
approx = np.around(list(map(float, combinations_[max_index][0])), 2)

print('Max utility value: ')
print(combinations_[max_index][0])
print(approx)
print(U[max_index])

Max utility value: 
[Fraction(0, 1) Fraction(1, 14) Fraction(1, 2) Fraction(3, 7)]
[0.   0.07 0.5  0.43]
0.022927422605160164


# Budget 2

5x5 grid with target close to the bottom left

In [6]:
budget = 2
gridSize = (5, 5)
target = Node(4, 1)
strategies = list([
    Strategy({ Action.RIGHT: frac(1, 3) }),
    Strategy({ Action.DOWN: frac(1, 3), Action.LEFT: frac(1, 3) }), 
])
enumerated_strategies = dict(enumerate(strategies, start=1))

pomdp = POMDP(gridSize=gridSize, target=target, model='grid', budget=budget)

# Assign strategies to the nodes
for node in pomdp.nodes: 
    if node.j < 1: node.assign_strategy(enumerated_strategies[1], 1)
    else: node.assign_strategy(enumerated_strategies[2], 2)

observations =  {n: n.strategy_id for n in pomdp.nodes if n != target}

# ---------------------------------------------------------
# get one specific utility value
# print(pomdp.utility(enumerated_strategies, assignments)[0])
# ---------------------------------------------------------

combinations_, U = pomdp.generate_points(enumerated_strategies, observations, sections=90, write_to_file=True)

# get the probability distribution of the actions that maximizes the utility
max_index = np.argmax(U)
print('Max utility value: ')

print(combinations_[max_index])
print([list(map(lambda x : round(float(x), 2), x)) for x in combinations_[max_index]])
print(U[max_index]) 

Max utility value: 
[[Fraction(0, 1) Fraction(1, 1) Fraction(0, 1) Fraction(0, 1)]
 [Fraction(0, 1) Fraction(0, 1) Fraction(56, 89) Fraction(33, 89)]]
[[0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 0.63, 0.37]]
0.15807418381325913


In [7]:
budget = 2
gridSize = (5, 5)
target = Node(4, 1)
strategies = list([
    Strategy({ Action.RIGHT: frac(1, 3) }),
    Strategy({ Action.DOWN: frac(1, 3), Action.LEFT: frac(1, 3) }), 
])
enumerated_strategies = dict(enumerate(strategies, start=1))

pomdp = POMDP(gridSize=gridSize, target=target, model='grid', budget=budget)

# Assign strategies to the nodes
for node in pomdp.nodes: 
    if node.j <= 1: node.assign_strategy(enumerated_strategies[1], 1)
    else: node.assign_strategy(enumerated_strategies[2], 2)

observations =  {n: n.strategy_id for n in pomdp.nodes if n != target}

# ---------------------------------------------------------
# get one specific utility value
# print(pomdp.utility(enumerated_strategies, assignments)[0])
# ---------------------------------------------------------

combinations_, U = pomdp.generate_points(enumerated_strategies, observations, sections=90, write_to_file=True)

# get the probability distribution of the actions that maximizes the utility
max_index = np.argmax(U)
print('Max utility value: ')

print(combinations_[max_index])
print([list(map(lambda x : round(float(x), 2), x)) for x in combinations_[max_index]])
print(U[max_index]) 

Max utility value: 
[[Fraction(0, 1) Fraction(1, 1) Fraction(0, 1) Fraction(0, 1)]
 [Fraction(0, 1) Fraction(0, 1) Fraction(55, 89) Fraction(34, 89)]]
[[0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 0.62, 0.38]]
0.12026029388044755


In [8]:
budget = 2
gridSize = (5, 5)
target = Node(4, 1)
strategies = list([
    Strategy({ Action.DOWN: frac(1, 3), Action.RIGHT: frac(1, 3) }), 
    Strategy({ Action.LEFT: frac(1, 3) }),
])
enumerated_strategies = dict(enumerate(strategies, start=1))

pomdp = POMDP(gridSize=gridSize, target=target, model='grid', budget=budget)

# Assign strategies to the nodes
for node in pomdp.nodes: 
    if node.j < 1: node.assign_strategy(enumerated_strategies[1], 1)
    else: node.assign_strategy(enumerated_strategies[2], 2)

observations =  {n: n.strategy_id for n in pomdp.nodes if n != target}

# ---------------------------------------------------------
# get one specific utility value
# print(pomdp.utility(enumerated_strategies, assignments)[0])
# ---------------------------------------------------------

combinations_, U = pomdp.generate_points(enumerated_strategies, observations, sections=90, write_to_file=True)

# get the probability distribution of the actions that maximizes the utility
max_index = np.argmax(U)
print('Max utility value: ')

print(combinations_[max_index])
print([list(map(lambda x : round(float(x), 2), x)) for x in combinations_[max_index]])
print(U[max_index]) 

Max utility value: 
[[Fraction(0, 1) Fraction(28, 89) Fraction(61, 89) Fraction(0, 1)]
 [Fraction(0, 1) Fraction(0, 1) Fraction(0, 1) Fraction(1, 1)]]
[[0.0, 0.31, 0.69, 0.0], [0.0, 0.0, 0.0, 1.0]]
0.11502425801889572


In [4]:
budget = 2
gridSize = (5, 5)
target = Node(4, 1)
strategies = list([
    Strategy({ Action.DOWN: frac(1, 3), Action.RIGHT: frac(1, 3) }), 
    Strategy({ Action.LEFT: frac(1, 3) }),
])
enumerated_strategies = dict(enumerate(strategies, start=1))

pomdp = POMDP(gridSize=gridSize, target=target, model='grid', budget=budget)

# Assign strategies to the nodes
for node in pomdp.nodes: 
    if node.j <= 1: node.assign_strategy(enumerated_strategies[1], 1)
    else: node.assign_strategy(enumerated_strategies[2], 2)

observations =  {n: n.strategy_id for n in pomdp.nodes if n != target}

# ---------------------------------------------------------
# get one specific utility value
# print(pomdp.utility(enumerated_strategies, assignments)[0])
# ---------------------------------------------------------

combinations_, U = pomdp.generate_points(enumerated_strategies, observations, sections=90, write_to_file=True)

# get the probability distribution of the actions that maximizes the utility
max_index = np.argmax(U)
print('Max utility value: ')

print(combinations_[max_index])
print([list(map(lambda x : round(float(x), 2), x)) for x in combinations_[max_index]])
print(U[max_index]) 

Max utility value: 
[[Fraction(0, 1) Fraction(16, 89) Fraction(73, 89) Fraction(0, 1)]
 [Fraction(0, 1) Fraction(0, 1) Fraction(0, 1) Fraction(1, 1)]]
[[0.0, 0.18, 0.82, 0.0], [0.0, 0.0, 0.0, 1.0]]
0.20021738975729864


In [3]:
budget = 2
gridSize = (5, 5)
target = Node(4, 1)
strategies = list([
    Strategy({ Action.LEFT: frac(1, 3), Action.RIGHT: frac(1, 3) }), 
    Strategy({ Action.DOWN: frac(1, 3) }),
])
enumerated_strategies = dict(enumerate(strategies, start=1))

pomdp = POMDP(gridSize=gridSize, target=target, model='grid', budget=budget)

# Assign strategies to the nodes
for node in pomdp.nodes: 
    if node.i == 4: node.assign_strategy(enumerated_strategies[1], 1)
    else: node.assign_strategy(enumerated_strategies[2], 2)

observations =  {n: n.strategy_id for n in pomdp.nodes if n != target}

# ---------------------------------------------------------
# get one specific utility value
# print(pomdp.utility(enumerated_strategies, assignments)[0])
# ---------------------------------------------------------

combinations_, U = pomdp.generate_points(enumerated_strategies, observations, sections=90, write_to_file=True)

# get the probability distribution of the actions that maximizes the utility
max_index = np.argmax(U)
print('Max utility value: ')

print(combinations_[max_index])
print([list(map(lambda x : round(float(x), 2), x)) for x in combinations_[max_index]])
print(U[max_index]) 

Max utility value: 
[[Fraction(0, 1) Fraction(18, 89) Fraction(0, 1) Fraction(71, 89)]
 [Fraction(0, 1) Fraction(0, 1) Fraction(1, 1) Fraction(0, 1)]]
[[0.0, 0.2, 0.0, 0.8], [0.0, 0.0, 1.0, 0.0]]
0.19756139927484928


# Convergence Analysis

Current Implementation is fidgety since python bugs out when generating so many files, a new approach is to be found.

In [7]:
def f(x):
    budget = 1
    gridSize = (x, x)

    target = Node(x-1, 1)
    strategies = list([ 
        Strategy({ Action.DOWN: frac(1, 3), Action.RIGHT: frac(1, 3), Action.LEFT: frac(1, 3) }) 
    ])
    enumerated_strategies = dict(enumerate(strategies, start=1))

    pomdp = POMDP(gridSize=gridSize, target=target, model='grid', budget=budget)
    
    # Assign strategies to the nodes
    for node in pomdp.nodes: node.assign_strategy(enumerated_strategies[1], 1)
    observations =  {n: n.strategy_id for n in pomdp.nodes if n != target}

    # ---------------------------------------------------------
    # get one specific utility value
    # print(pomdp.utility(enumerated_strategies, assignments)[0])
    # ---------------------------------------------------------
    # print(len(pomdp.ordered_nodes))
    # print(len(observations))
    combinations_, U = pomdp.generate_points(enumerated_strategies, observations, sections=20, write_to_file=True)
    # plot_actions_4D(combinations_, U, actions = [Action.DOWN, Action.RIGHT, Action.LEFT])

    # get the probability distribution of the actions that maximizes the utility
    max_index = np.argmax(U)
    approx = np.around(list(map(float, combinations_[max_index][0])), 2) # the probability distribution of the actions that maximizes the utility
    
    # print('Max utility value: ')
    # print(combinations_[max_index][0])
    # print(approx)
    # print(U[max_index])
    
    return approx

    
    
for i in range(3,15):
    print(f'{i}: {f(i)}')

NameError: name 'ys12_1' is not defined

# Simulated Annealing

In [6]:
budget = 1
gridSize = (10, 10)
target = Node(9, 9)
strategies = list([  
    Strategy({ Action.DOWN: frac(1, 2), Action.RIGHT: frac(1, 2) }) 
])
enumerated_strategies = dict(enumerate(strategies, start=1))

pomdp = POMDP(gridSize=gridSize, target=target, model='grid', budget=budget)

# Assign strategies to the nodes
for node in pomdp.nodes: node.assign_strategy(enumerated_strategies[1], 1)
observations =  {n: n.strategy_id for n in pomdp.nodes if n != target}

# ---------------------------------------------------------
# get one specific utility value
# print(pomdp.utility(enumerated_strategies, assignments)[0])
# ---------------------------------------------------------
# print(len(pomdp.ordered_nodes))
# print(len(observations))

initial_temperature = 10.0  # High initial temperature to encourage exploration
cooling_rate = 0.01  # Low cooling rate to increase exploitation
max_iterations = 200  # Sufficient iterations to explore the search space
neighborhood_scale = 0.1  # Small perturbations for neighborhood exploration around (0.5, 0.5)


best_solution, best_U, points, utilities = pomdp.simulated_annealing(
    enumerated_strategies, 
    observations, 
    initial_temperature, 
    cooling_rate, 
    max_iterations, 
    neighborhood_scale, 
)

plot_actions_3D(np.array(points), utilities, actions = [Action.DOWN, Action.RIGHT])

approximated_solution = np.around(list(map(float, best_solution[0])), 2)

print('Best solution: ', approximated_solution)
print('Best utility: ', best_U)

Best solution:  [0.  0.5 0.5 0. ]
Best utility:  0.076220597530872


In [9]:
budget = 1
gridSize = (10, 10)
target = Node(9, 1)
strategies = list([  
    Strategy({ Action.DOWN: frac(1, 3), Action.RIGHT: frac(1, 3), Action.LEFT: frac(1, 3) }) 
])
enumerated_strategies = dict(enumerate(strategies, start=1))

pomdp = POMDP(gridSize=gridSize, target=target, model='grid', budget=budget)

# Assign strategies to the nodes
for node in pomdp.nodes: node.assign_strategy(enumerated_strategies[1], 1)
observations =  {n: n.strategy_id for n in pomdp.nodes if n != target}

# ---------------------------------------------------------
# get one specific utility value
# print(pomdp.utility(enumerated_strategies, assignments)[0])
# ---------------------------------------------------------
# print(len(pomdp.ordered_nodes))
# print(len(observations))

initial_temperature = 20.0  # High initial temperature to encourage exploration
cooling_rate = 0.99  # Gradual cooling rate to balance exploration and exploitation
max_iterations = 10  # Sufficient iterations to explore the search space
neighborhood_scale = 0.1  # Small perturbations for neighborhood exploration around (0.5, 0.5)

# initial_temperature = 0.01  # Low initial temperature to focus search near the known maximum point
# cooling_rate = 0.9999  # Faster cooling rate to shift towards exploitation sooner
# max_iterations = 100  # Reduced maximum iterations to limit exploration
# neighborhood_scale = 0.1  # Small perturbations for narrow exploration around the target point

best_solution, best_U, points, utilities = pomdp.simulated_annealing(
    enumerated_strategies, 
    observations, 
    initial_temperature, 
    cooling_rate, 
    max_iterations, 
    neighborhood_scale, 
)

print(points)
print(utilities)

plot_actions_4D(np.array(points), utilities, actions = [Action.DOWN, Action.RIGHT, Action.LEFT])

approximated_solution = np.around(list(map(float, best_solution[0])), 2)

print('Best solution: ', approximated_solution)
print('Best utility: ', best_U)

ValueError: Exceeds the limit (4300 digits) for integer string conversion: value has 5007 digits; use sys.set_int_max_str_digits() to increase the limit