In [1]:
import algorithmes.policy_iteration as pi
import environnements.lineworld as lw
import environnements.gridworld as gw
from utils import load_config

In [2]:
congig_file = "config.yaml"

# LineWorld Environnement

## Create LineWorld environnement

In [3]:
config_lineworld = load_config(congig_file, "LineWorld")
game = "lineworld"

In [4]:
S = config_lineworld["states"]
A = config_lineworld["actions"]
R = config_lineworld["rewards"]
T = config_lineworld["terminals"]

In [5]:
lineworld_mdp = lw.create_lineworld(S, A, R)

## Dynamic Programming

### Policy Iteration

In [6]:
policy, V = pi.policy_iteration(game, lineworld_mdp, S, A, R, T, gamma=0.999)
print("Optimal Policy:")
print(policy)
print("Value Function:")
print(V)

Iteration: 1
__X__
_X___
X____
Steps: [2, np.int64(1), np.int64(0)]
Total Reward: -1
Iteration: 2
__X__
___X_
____X
Steps: [2, np.int64(3), np.int64(4)]
Total Reward: 1
Optimal Policy:
[[1. 0.]
 [0. 1.]
 [0. 1.]
 [0. 1.]
 [1. 0.]]
Value Function:
[0.       0.998001 0.999    1.       0.      ]


In [7]:
# Play the game with the optimal policy
steps, total_reward = lw.play_game(policy, lineworld_mdp, R, T)
print(f"Final Steps: {steps}")
print(f"Final Total Reward: {total_reward}")

__X__
___X_
____X
Final Steps: [2, np.int64(3), np.int64(4)]
Final Total Reward: 1


# GridWorld Environnement


## Create GridWorld environnement

In [8]:
config_gridworld = load_config(congig_file, "GridWorld")
game = "gridworld"

In [9]:
S = config_gridworld["states"]
A = config_gridworld["actions"]
R = config_gridworld["rewards"]
T = config_gridworld["terminals"]

In [10]:
gridworld_mdp = gw.create_gridworld(S, A, R)

## Dynamic Programming

### Policy Iteration

In [11]:
policy, V = pi.policy_iteration(game, gridworld_mdp, S, A, R, T, gamma=0.999)
print("Optimal Policy:")
print(policy)
print("Value Function:")
print(V)

Iteration: 1
_ _ _ _ _
_ _ _ _ _
_ _ X _ _
_ _ _ _ _
_ _ _ _ _
******************************
_ _ _ _ _
_ _ X _ _
_ _ _ _ _
_ _ _ _ _
_ _ _ _ _
******************************
******************************
_ _ X _ _
_ _ _ _ _
_ _ _ _ _
_ _ _ _ _
_ _ _ _ _
******************************
Steps: [12, np.int64(7), np.int64(2)]
Total Reward: -1
Iteration: 2
_ _ _ _ _
_ _ _ _ _
_ _ X _ _
_ _ _ _ _
_ _ _ _ _
******************************
_ _ _ _ _
_ _ _ _ _
_ _ _ _ _
_ _ X _ _
_ _ _ _ _
******************************
******************************
_ _ _ _ _
_ _ _ _ _
_ _ _ _ _
_ _ _ X _
_ _ _ _ _
******************************
Steps: [12, np.int64(17), np.int64(18)]
Total Reward: 1
Iteration: 3
_ _ _ _ _
_ _ _ _ _
_ _ X _ _
_ _ _ _ _
_ _ _ _ _
******************************
_ _ _ _ _
_ _ _ _ _
_ _ _ _ _
_ _ X _ _
_ _ _ _ _
******************************
******************************
_ _ _ _ _
_ _ _ _ _
_ _ _ _ _
_ _ _ X _
_ _ _ _ _
******************************
Steps: [12, np.int64(17), np

In [12]:
steps, total_reward = gw.play_game(policy, gridworld_mdp, R, T, 6)
print(f"Final Steps: {steps}")
print(f"Final Total Reward: {total_reward}")

_ _ _ _ _
_ X _ _ _
_ _ _ _ _
_ _ _ _ _
_ _ _ _ _
******************************
_ _ _ _ _
_ _ _ _ _
_ X _ _ _
_ _ _ _ _
_ _ _ _ _
******************************
******************************
_ _ _ _ _
_ _ _ _ _
_ _ _ _ _
_ X _ _ _
_ _ _ _ _
******************************
******************************
_ _ _ _ _
_ _ _ _ _
_ _ _ _ _
_ _ X _ _
_ _ _ _ _
******************************
******************************
_ _ _ _ _
_ _ _ _ _
_ _ _ _ _
_ _ _ X _
_ _ _ _ _
******************************
Final Steps: [6, np.int64(11), np.int64(16), np.int64(17), np.int64(18)]
Final Total Reward: 1
