# Example usage

## Import the package with pip install

In [1]:
%%capture
# Install required packages
! pip install git+https://github.com/CassandraDurr/value_iteration.git

## Import functions from the package 
In this example we are going to perform value iteration in two ways. In the first example we will provide python inputs to the `value_iteration` function, and in the second example we will input the same data, but from `csv` format.

In [2]:
from value_iteration import value_iteration, load_mdp_from_csv

In [3]:
# Example 1: Create Markov Decision Process (MDP) inputs

# Define state space
S = ["A", "B", "C", "D"]

# Define which actions ("1", "2", and "3") can occur at each state ("A", "B", "C", "D")
A = {
    "A": ["1", "2"],
    "B": ["1", "3"],
    "C": ["2", "3"],
    "D": ["1"],
}

# Define the transition probabilities. The key is in the form (current state, action, next state).
P = {
    ("A", "1", "B"): 0.8,
    ("A", "1", "C"): 0.2,
    ("A", "2", "C"): 1.0,
    ("B", "1", "D"): 1.0,
    ("B", "3", "C"): 0.5,
    ("B", "3", "D"): 0.5,
    ("C", "2", "D"): 1.0,
}

# Define the reward function. The key is in the form (current state, action, next state).
R = {
    ("A", "1", "B"): 5,
    ("A", "1", "C"): 10,
    ("A", "2", "C"): 10,
    ("B", "1", "D"): -1,
    ("B", "3", "C"): 2,
    ("B", "3", "D"): 0,
    ("C", "2", "D"): 8,
}

# Run value iteration algorithm
optimal_values, optimal_policy = value_iteration(
    S, A, P, R, gamma=0.9, theta=1e-9, printing=True
)

# Display results
print("Optimal State Values:", optimal_values)
print("Optimal Policy:", optimal_policy)

Iteration 1, max value change: 10.0
Iteration 2, max value change: 7.199999999999999
Iteration 3, max value change: 0
Optimal State Values: {'A': 17.2, 'B': 4.6, 'C': 8.0, 'D': 0.0}
Optimal Policy: {'A': '2', 'B': '3', 'C': '2', 'D': '1'}


In [4]:
# Example 2: Loading data from csv files

# Obtain states, actions, transition probabilities and rewards
S, A, P, R = load_mdp_from_csv(
    transitions_filepath="example_data/transitions.csv",
    state_actions_filepath="example_data/state_actions.csv",
)

# Run value iteration
optimal_values, optimal_policy = value_iteration(
    S, A, P, R, gamma=0.9, theta=1e-9, printing=True
)

# Display results
print("Optimal State Values:", optimal_values)
print("Optimal Policy:", optimal_policy)

Iteration 1, max value change: 10.0
Iteration 2, max value change: 7.199999999999999
Iteration 3, max value change: 0
Optimal State Values: {'A': 17.2, 'B': 4.6, 'C': 8.0, 'D': 0.0}
Optimal Policy: {'A': '2', 'B': '3', 'C': '2', 'D': '1'}
