### Import Packages

In [4]:
import numpy as np
import random

### Monte Carlo

In [7]:


def monte_carlo_policy_evaluation(env, num_episodes, gamma):
    # Initialize value function V with zeros for all states
    V = np.zeros(env.observation_space.n)

    # Initialize empty dictionary to store state returns
    returns = {}
    
    for episode in range(num_episodes):
        # Generate an episode following the current policy
        episode_states = []
        episode_rewards = []
        
        state = env.reset()
        done = False
        
        while not done:
            action = random.choice(range(env.action_space.n))
            next_state, reward, done, _ = env.step(action)
            
            episode_states.append(state)
            episode_rewards.append(reward)
            
            state = next_state
        
        # Update value function using Monte Carlo method
        G = 0  # total return
        for t in reversed(range(len(episode_states))):
            state = episode_states[t]
            reward = episode_rewards[t]
            G = gamma * G + reward
            
            if state not in episode_states[:t]:
                if state in returns:
                    returns[state].append(G)
                else:
                    returns[state] = [G]
                V[state] = np.mean(returns[state])

    return V


In [8]:
import gym

# Create the environment
env = gym.make('FrozenLake-v1')

# Set the number of episodes and discount factor
num_episodes = 1000
gamma = 0.99

# Perform Monte Carlo policy evaluation
V = monte_carlo_policy_evaluation(env, num_episodes, gamma)

# Print the estimated value function
print("Estimated Value Function:")
for state in range(env.observation_space.n):
    print(f"State {state}: {V[state]}")


Estimated Value Function:
State 0: 0.0078944648793757
State 1: 0.00453153966813202
State 2: 0.010454998545621399
State 3: 0.00896216680679798
State 4: 0.00982435616075753
State 5: 0.0
State 6: 0.032044606434241604
State 7: 0.0
State 8: 0.023840947054415314
State 9: 0.05231187469575378
State 10: 0.11572921429711924
State 11: 0.0
State 12: 0.0
State 13: 0.11692833452142856
State 14: 0.3086344482758621
State 15: 0.0
