# Example usage

## Import the package with pip install

In [1]:
%%capture
# Install required packages
! pip install git+https://github.com/CassandraDurr/value_iteration.git

## Import functions from the package 
In this example we are going to perform value iteration in two ways. In the first example we will provide python inputs to the `value_iteration` function, and in the second example we will input the same data, but from `csv` format.

In [2]:
from value_iteration import value_iteration, load_mdp_from_csv

In [None]:
# Example 1: Create Markov Decision Process (MDP) inputs

# Define state space
S = ["healthy", "sick"]

# Define which actions ("relax" and "party") can occur at each state ("healthy" and "sick")
A = {
    "healthy": ["relax", "party"],
    "sick": ["relax", "party"],
}

# Define the transition probabilities. The key is in the form (current state, action, next state).
P = {
    ("healthy", "relax", "healthy"): 0.95,
    ("healthy", "relax", "sick"): 0.05,
    ("sick", "relax", "healthy"): 0.5,
    ("sick", "relax", "sick"): 0.5,
    ("healthy", "party", "healthy"): 0.7,
    ("healthy", "party", "sick"): 0.3,
    ("sick", "party", "healthy"): 0.1,
    ("sick", "party", "sick"): 0.9,
}

# Define the reward function. The key is in the form (current state, action).
R = {
    ("healthy", "relax"): 7,
    ("healthy", "party"): 10,
    ("sick", "relax"): 0,
    ("sick", "party"): 2,
}

# Run value iteration algorithm
optimal_values, optimal_policy = value_iteration(
    S, A, P, R, gamma=0.9, theta=1e-9, printing=True
)

# Display results
print("Optimal State Values:", optimal_values)
print("Optimal Policy:", optimal_policy)

In [None]:
# Example 2: Loading data from csv files

# Obtain states, actions, transition probabilities and rewards
S, A, P, R = load_mdp_from_csv(transitions_filepath="example_data/transitions.csv")

# Run value iteration
optimal_values, optimal_policy = value_iteration(
    S, A, P, R, gamma=0.9, theta=1e-9, printing=True,
)

# Display results
print("Optimal State Values:", optimal_values)
print("Optimal Policy:", optimal_policy)