In [5]:
# Install the MDP toolbox
!pip install pymdptoolbox

# Import necessary packages
import mdptoolbox.example
import mdptoolbox.mdp
import numpy as np

# ---------------------------------------
# Step 1: Define Tiny Forest MDP
# ---------------------------------------
P, R = mdptoolbox.example.forest()

# Show Transition and Reward Matrices
print("Transition Matrix for Wait (P[0]):")
print(P[0])

print("\nTransition Matrix for Cut (P[1]):")
print(P[1])

print("\nReward for Wait:")
print(R[:, 0])

print("\nReward for Cut:")
print(R[:, 1])

# ---------------------------------------
# Step 2: Policy Iteration - Base Case
# ---------------------------------------
model = mdptoolbox.mdp.PolicyIteration(P, R, 0.9)  # gamma = 0.9
model.run()

# Output results
print("\nOptimal Value Function:")
print(model.V)

print("\nOptimal Policy (0=Wait, 1=Cut):")
print(model.policy)



Transition Matrix for Wait (P[0]):
[[0.1 0.9 0. ]
 [0.1 0.  0.9]
 [0.1 0.  0.9]]

Transition Matrix for Cut (P[1]):
[[1. 0. 0.]
 [1. 0. 0.]
 [1. 0. 0.]]

Reward for Wait:
[0. 0. 4.]

Reward for Cut:
[0. 1. 2.]

Optimal Value Function:
(26.244000000000014, 29.484000000000016, 33.484000000000016)

Optimal Policy (0=Wait, 1=Cut):
(0, 0, 0)


In [6]:
# Install the MDP toolbox
!pip install pymdptoolbox

# Imports
import mdptoolbox.example
import mdptoolbox.mdp

# Default Tiny Forest MDP
P, R = mdptoolbox.example.forest()

print("Reward for Cut:", R[:, 1])  # Expect [0. 1. 2.]

# Solve using Policy Iteration
model = mdptoolbox.mdp.PolicyIteration(P, R, 0.9)
model.run()

print("\nOptimal Value Function:", model.V)
print("Optimal Policy (0=Wait, 1=Cut):", model.policy)


Reward for Cut: [0. 1. 2.]

Optimal Value Function: (26.244000000000014, 29.484000000000016, 33.484000000000016)
Optimal Policy (0=Wait, 1=Cut): (0, 0, 0)


In [7]:
# Increase fire probability by parameters: S=3, r1=4, r2=2, p=0.8
P2, R2 = mdptoolbox.example.forest(3, 4, 2, 0.8)

# Solve again
model2 = mdptoolbox.mdp.PolicyIteration(P2, R2, 0.9)
model2.run()

print("Policy with High Fire Risk (p=0.8):", model2.policy)


Policy with High Fire Risk (p=0.8): (0, 1, 0)
