In [9]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [10]:
import gymnasium as gym
import numpy as np
import time
from gymnasium.envs.registration import register, registry
from gymnasium.envs.toy_text.frozen_lake import generate_random_map


def log(message):
    print(f"🔹 {message}")


#  Core Dynamic Programming: Value Iteration
def value_iteration(env, discount=0.99, threshold=1e-8):
    log(" Starting Value Iteration...")
    start_time = time.time()

    transition_probs = env.unwrapped.P  # Access raw environment transitions
    num_states = env.observation_space.n
    num_actions = env.action_space.n

    value_function = np.zeros(num_states)
    policy = np.zeros(num_states, dtype=int)

    iteration = 0
    while True:
        delta = 0
        for state in range(num_states):
            action_values = np.zeros(num_actions)
            for action in range(num_actions):
                if action not in transition_probs[state]:
                    continue
                for prob, next_state, reward, done in transition_probs[state][action]:
                    action_values[action] += prob * (reward + discount * value_function[next_state])
            best_action = np.argmax(action_values)
            delta = max(delta, abs(action_values[best_action] - value_function[state]))
            value_function[state] = action_values[best_action]
            policy[state] = best_action
        iteration += 1
        if delta < threshold:
            break

    elapsed = time.time() - start_time
    log(f" Converged in {iteration} iterations ( {elapsed:.3f} sec)")
    return policy, value_function


#  Test a learned policy by running episodes
def evaluate_policy(env, policy, episodes=100):
    total_rewards = []
    for episode in range(episodes):
        state, _ = env.reset()
        done = False
        reward_sum = 0
        while not done:
            action = policy[state]
            state, reward, terminated, truncated, _ = env.step(action)
            done = terminated or truncated
            reward_sum += reward
        total_rewards.append(reward_sum)
    avg_reward = np.mean(total_rewards)
    log(f" Average reward over {episodes} episodes: {avg_reward:.3f}")
    return avg_reward


#  Register custom environments (with checks)
def register_custom_envs():
    log(" Setting up custom Frozen Lake environments...")

    # Remove existing registrations (if re-running)
    for env_id in ["CustomFrozenLake-v1", "ExpandedFrozenLake-v1"]:
        if env_id in registry:
            del registry[env_id]

    # 5x5 custom map
    custom_map = generate_random_map(size=5)

    # Custom Frozen Lake with no slipperiness
    register(
        id="CustomFrozenLake-v1",
        entry_point="gymnasium.envs.toy_text:FrozenLakeEnv",
        kwargs={"desc": custom_map, "is_slippery": False},
        max_episode_steps=100,
    )

    #  Expanded Frozen Lake with a teleport action (action 4)
    class ExpandedFrozenLake(gym.envs.toy_text.FrozenLakeEnv):
        def __init__(self, **kwargs):
            super().__init__(**kwargs)
            self.action_space = gym.spaces.Discrete(5)  # Original 4 + 1 teleport

            # Add teleport transitions to the transition table (P)
            for state in self.P:
                teleport_target = self.np_random.integers(0, self.observation_space.n)
                self.P[state][4] = [(1.0, teleport_target, 0.0, False)]

        def step(self, action):
            if action == 4:  # Teleport to random state
                self.s = self.np_random.integers(0, self.observation_space.n)
                return self.s, 0.0, False, False, {}
            return super().step(action)

    # Register the expanded version
    register(
        id="ExpandedFrozenLake-v1",
        entry_point=lambda **kwargs: ExpandedFrozenLake(**kwargs),
        kwargs={"desc": custom_map, "is_slippery": False},
        max_episode_steps=100,
    )


#  Run value iteration across all environments
def run_all_environments():
    register_custom_envs()

    # Setup environments
    envs = {
        "Original FrozenLake": gym.make("FrozenLake-v1", is_slippery=False),
        "Custom FrozenLake": gym.make("CustomFrozenLake-v1"),
        "Expanded FrozenLake": gym.make("ExpandedFrozenLake-v1"),
    }

    results = {}

    for env_name, env in envs.items():
        log(f"\n Running Value Iteration on {env_name}")
        policy, _ = value_iteration(env)
        avg_reward = evaluate_policy(env, policy)
        results[env_name] = avg_reward

    log("\n Summary of All Results:")
    for name, reward in results.items():
        print(f" {name}: Average Reward = {reward:.3f}")


# Run everything
if __name__ == "__main__":
    run_all_environments()


🔹  Setting up custom Frozen Lake environments...
🔹 
 Running Value Iteration on Original FrozenLake
🔹  Starting Value Iteration...
🔹  Converged in 7 iterations ( 0.002 sec)
🔹  Average reward over 100 episodes: 1.000
🔹 
 Running Value Iteration on Custom FrozenLake
🔹  Starting Value Iteration...
🔹  Converged in 9 iterations ( 0.005 sec)
🔹  Average reward over 100 episodes: 1.000
🔹 
 Running Value Iteration on Expanded FrozenLake
🔹  Starting Value Iteration...
🔹  Converged in 919 iterations ( 0.171 sec)
🔹  Average reward over 100 episodes: 1.000
🔹 
 Summary of All Results:
 Original FrozenLake: Average Reward = 1.000
 Custom FrozenLake: Average Reward = 1.000
 Expanded FrozenLake: Average Reward = 1.000
