In [9]:
%pip install gym

Note: you may need to restart the kernel to use updated packages.


In [10]:
import gymnasium as gym
import numpy as np

In [11]:
env = gym.make("FrozenLake-v1", is_slippery=True)
n_states, n_actions = env.observation_space.n, env.action_space.n
policy = np.ones((n_states, n_actions)) / n_actions

In [12]:
learning_rate, gamma, episodes = 0.1, 0.99, 2000

In [13]:
for _ in range(episodes):
    state, _ = env.reset()
    trajectory = [] # Store (state, action, reward)
    
    # Generate Episode
    done = False
    while not done:
        action = np.random.choice(n_actions, p=policy[state])
        next_state, reward, done, _, _ = env.step(action)
        trajectory.append((state, action, reward))
        state = next_state

    # Update Policy (Monte Carlo)
    G = 0
    for s, a, r in reversed(trajectory):
        G = r + gamma * G
        # Simple Gradient Ascent Update
        policy[s, a] += learning_rate * G 
        # Normalize to keep probabilities valid
        policy[s] = np.maximum(policy[s], 0)
        policy[s] /= np.sum(policy[s])

In [14]:
success_count = 0
for _ in range(1000):
    state, _ = env.reset()
    done = False
    while not done:
        state, reward, done, _, _ = env.step(np.argmax(policy[state]))
        success_count += reward

In [15]:
print(f"Success Rate: {success_count/1000:.2%}")

# 4. Display Learned Policy
arrows = ["<", "v", ">", "^"]
print("\nLearned Best Actions:")
for s in range(n_states):
    best_a = np.argmax(policy[s])
    print(f"State {s:02d}: {arrows[best_a]} (Prob: {policy[s, best_a]:.2f})")

Success Rate: 46.30%

Learned Best Actions:
State 00: > (Prob: 1.00)
State 01: ^ (Prob: 1.00)
State 02: > (Prob: 1.00)
State 03: ^ (Prob: 1.00)
State 04: < (Prob: 1.00)
State 05: < (Prob: 0.25)
State 06: > (Prob: 0.99)
State 07: < (Prob: 0.25)
State 08: ^ (Prob: 1.00)
State 09: v (Prob: 1.00)
State 10: < (Prob: 1.00)
State 11: < (Prob: 0.25)
State 12: < (Prob: 0.25)
State 13: > (Prob: 1.00)
State 14: v (Prob: 1.00)
State 15: < (Prob: 0.25)
