# Example usage

## Import the package with pip install

In [1]:
%%capture
# Install required packages
! pip install git+https://github.com/CassandraDurr/value_iteration.git

## Import functions from the package 
In this example we are going to perform value iteration in two ways. In the first example we will provide python inputs to the `value_iteration` function, and in the second example we will input the same data, but from `csv` format.

In [2]:
from value_iteration import value_iteration, load_mdp_from_csv

In [3]:
# Example 1: Create Markov Decision Process (MDP) inputs

# Define state space
S = ["healthy", "sick"]

# Define which actions ("relax" and "party") can occur at each state ("healthy" and "sick")
A = {
    "healthy": ["relax", "party"],
    "sick": ["relax", "party"],
}

# Define the transition probabilities. The key is in the form (current state, action, next state).
P = {
    ("healthy", "relax", "healthy"): 0.95,
    ("healthy", "relax", "sick"): 0.05,
    ("sick", "relax", "healthy"): 0.5,
    ("sick", "relax", "sick"): 0.5,
    ("healthy", "party", "healthy"): 0.7,
    ("healthy", "party", "sick"): 0.3,
    ("sick", "party", "healthy"): 0.1,
    ("sick", "party", "sick"): 0.9,
}

# Define the reward function. The key is in the form (current state, action).
R = {
    ("healthy", "relax"): 7,
    ("healthy", "party"): 10,
    ("sick", "relax"): 0,
    ("sick", "party"): 2,
}

# Run value iteration algorithm
optimal_values, optimal_policy = value_iteration(
    S, A, P, R, gamma=0.9, theta=1e-6, printing=True
)

# Display results
print("Optimal State Values:", optimal_values)
print("Optimal Policy:", optimal_policy)

Iteration 1, max value change: 10.0
Iteration 2, max value change: 6.84
Iteration 3, max value change: 5.2272
Iteration 4, max value change: 4.537296000000001
Iteration 5, max value change: 4.053473279999995
Iteration 6, max value change: 3.642709190400005
Iteration 7, max value change: 3.277463254272
Iteration 8, max value change: 2.949541425768956
Iteration 9, max value change: 2.654555692638411
Iteration 10, max value change: 2.3890944370749096
Iteration 11, max value change: 2.150183969833492
Iteration 12, max value change: 1.935165388614024
Iteration 13, max value change: 1.7416488165901214
Iteration 14, max value change: 1.5674839289618632
Iteration 15, max value change: 1.4107355349912112
Iteration 16, max value change: 1.2696619812986825
Iteration 17, max value change: 1.1426957831340019
Iteration 18, max value change: 1.0284262048143447
Iteration 19, max value change: 0.9255835843317755
Iteration 20, max value change: 0.8330252258984032
Iteration 21, max value change: 0.749722

In [4]:
# Example 2: Loading data from csv files

# Obtain states, actions, transition probabilities and rewards
S, A, P, R = load_mdp_from_csv(transitions_filepath="example_data/transitions.csv")

# Run value iteration
optimal_values, optimal_policy = value_iteration(
    S, A, P, R, gamma=0.9, theta=1e-6, printing=True
)

# Display results
print("Optimal State Values:", optimal_values)
print("Optimal Policy:", optimal_policy)

Iteration 1, max value change: 10.0
Iteration 2, max value change: 6.84
Iteration 3, max value change: 5.2272
Iteration 4, max value change: 4.537296000000001
Iteration 5, max value change: 4.053473279999995
Iteration 6, max value change: 3.642709190400005
Iteration 7, max value change: 3.277463254272
Iteration 8, max value change: 2.949541425768956
Iteration 9, max value change: 2.654555692638411
Iteration 10, max value change: 2.3890944370749096
Iteration 11, max value change: 2.150183969833492
Iteration 12, max value change: 1.935165388614024
Iteration 13, max value change: 1.7416488165901214
Iteration 14, max value change: 1.5674839289618632
Iteration 15, max value change: 1.4107355349912112
Iteration 16, max value change: 1.2696619812986825
Iteration 17, max value change: 1.1426957831340019
Iteration 18, max value change: 1.0284262048143447
Iteration 19, max value change: 0.9255835843317755
Iteration 20, max value change: 0.8330252258984032
Iteration 21, max value change: 0.749722