# Example usage

## Import the package with pip install

In [1]:
%%capture
# Install required packages
! pip install git+https://github.com/CassandraDurr/value_iteration.git

## Import functions from the package 
In this example we are going to perform the same value iteration in a number of ways.
- In the first example we will provide python inputs to the value iteration algorithm. We will solve the problem in a synchronous fashion. 
- The second example follows the same format as the first however the problem is solved asynchronously.
- In the last example we will input the same data, but from `csv` format, and we will solve the problem with synchronous value iteration.

In [2]:
from value_iteration import ValueIteration, AsynchValueIteration, load_mdp_from_csv, MDP

### Synchronous value iteration with python inputs

In [None]:
# Example 1: Create Markov Decision Process (MDP) inputs

# Define state space
S = ["healthy", "sick"]

# Define which actions ("relax" and "party") can occur at each state ("healthy" and "sick")
A = {
    "healthy": ["relax", "party"],
    "sick": ["relax", "party"],
}

# Define the transition probabilities. The key is in the form (current state, action, next state).
P = {
    ("healthy", "relax", "healthy"): 0.95,
    ("healthy", "relax", "sick"): 0.05,
    ("sick", "relax", "healthy"): 0.5,
    ("sick", "relax", "sick"): 0.5,
    ("healthy", "party", "healthy"): 0.7,
    ("healthy", "party", "sick"): 0.3,
    ("sick", "party", "healthy"): 0.1,
    ("sick", "party", "sick"): 0.9,
}

# Define the reward function. The key is in the form (current state, action).
R = {
    ("healthy", "relax"): 7,
    ("healthy", "party"): 10,
    ("sick", "relax"): 0,
    ("sick", "party"): 2,
}

# Create MDP data class
mdp = MDP(states=S, actions=A, probabilities=P, rewards=R)

# Setup value iteration class (synchronous version)
value_itr = ValueIteration(mdp=mdp, gamma=0.9, theta=1e-6, printing=True)

# Run value iteration algorithm
optimal_values, optimal_policy = value_itr.value_iteration()

# Display results
print("Optimal State Values:", optimal_values)
print("Optimal Policy:", optimal_policy)

### Asynchronous value iteration with python inputs

In [None]:
# Example 2: Create Markov Decision Process (MDP) inputs

# Create MDP data class
mdp = MDP(states=S, actions=A, probabilities=P, rewards=R)

# Setup value iteration class (asynchronous version)
value_itr = AsynchValueIteration(mdp=mdp, gamma=0.9, theta=1e-6, printing=True)

# Run value iteration algorithm
optimal_values, optimal_policy = value_itr.value_iteration()

# Display results
print("Optimal State Values:", optimal_values)
print("Optimal Policy:", optimal_policy)

### Synchronous value iteration with csv input

In [None]:
# Example 3: Loading data from csv files

# Obtain states, actions, transition probabilities and rewards
S, A, P, R = load_mdp_from_csv(transitions_filepath="example_data/transitions.csv")

# Create MDP data class
mdp = MDP(states=S, actions=A, probabilities=P, rewards=R)

# Setup value iteration class (synchronous version)
value_itr = ValueIteration(mdp=mdp, gamma=0.9, theta=1e-6, printing=True)

# Run value iteration algorithm
optimal_values, optimal_policy = value_itr.value_iteration()

# Display results
print("Optimal State Values:", optimal_values)
print("Optimal Policy:", optimal_policy)