# Frozen Lake Example for Value Iteration
![value_iteration](../images/lecture_1/value_iteration.png)

In [1]:
import gymnasium as gym
import numpy as np
from typing import List
env=gym.make("FrozenLake-v1",map_name="4x4",is_slippery=True)

## State-Action Q

In [2]:
def q_state_action(v: np.ndarray, state: int, action: int, P, gamma):
    q = 0
    for (probability, s_next, reward, _) in P[state][action]:
        q += probability*(reward+gamma*v[s_next])
    return q


In [3]:
def value_iteration(num_states: int, num_actions: int, P, gamma,epsilon=1e-5):
    policy=[0]*num_states
    v_prev=np.zeros(num_states)
    while True:
        # one step look ahead
        v_next=v_prev.copy()
        delta=0.0
        for s_i in range(num_states):
            qs=[q_state_action(v_prev,s_i,action,P,gamma) for action in range(num_actions)]
            max_qs=np.max(qs)
            v_next[s_i]=max_qs
            delta=max(delta,np.power(max_qs-v_prev[s_i],2))
        if delta<epsilon:
            break
        else:
            # choose the optimal action at each state
            for s_i in range(num_states):
                qs=[q_state_action(v_next,s_i,action,P,gamma) for action in range(num_actions)]
                idx_maxqs=np.argmax(qs)
                policy[s_i]=idx_maxqs
        
        # update v table
        v_prev=v_next
        
    return policy

In [4]:
policy=value_iteration(env.observation_space.n,env.action_space.n,env.P,0.9)
policy

[0, 3, 0, 3, 0, 0, 0, 0, 3, 1, 0, 0, 0, 2, 1, 0]

## Test the Selected Policy

In [5]:
def policy_test(env, policy: List, num_test: int):
    state = env.reset()[0]
    total_reward = 0
    for _ in range(num_test):
        while True:
            next_state, reward, is_terminated, truncated, info = env.step(
                policy[state])
            state = next_state
            if is_terminated:
                total_reward += reward
                state = env.reset()[0]
                break
    print("won %d of %d games" % (total_reward, num_test))
    
policy_test(env,policy,1000)


won 767 of 1000 games
