In [10]:
import pandas as pd
import numpy as np

actions = ['up', 'down', 'left', 'right']
goal_state = 24
hole_states = [6, 9, 14, 20]
gamma = 0.9  # discount factor

def index_to_coords(index):
    return index // 5 + 1, index % 5 + 1  # (row, col)

def coords_to_index(row, col):
    return (row - 1) * 5 + (col - 1)

def get_next_state(state, action):
    row, col = index_to_coords(state)
    if action == 'up': row -= 1
    elif action == 'down': row += 1
    elif action == 'left': col -= 1
    elif action == 'right': col += 1
    if 1 <= row <= 5 and 1 <= col <= 5:
        return coords_to_index(row, col)
    return state

In [11]:
V = np.zeros(25)
V[goal_state] = 1
for hole in hole_states:
    V[hole] = -1

# Save initial values
v_data = []
for state in range(25):
    row, col = index_to_coords(state)
    v_data.append({
        'state': f"S{state}({row},{col})",
        'V(s)': V[state]
    })

df_v_values = pd.DataFrame(v_data)
df_v_values.to_csv("assets/v_values.csv", index=False)
print("✅ Saved: v_values.csv")

✅ Saved: v_values.csv


In [16]:
import numpy as np
import pandas as pd

# Example: goal_state and hole_states
goal_state = 24  # Goal state (index 24, S24)
hole_states = [6, 9, 14, 20]  # Hole states (indices)

# Initialize all values to 0
V = np.zeros(25)

# Set goal state to 1
V[goal_state] = 1

# Set hole states to -1
for hole in hole_states:
    V[hole] = -1

# Save initial values in DataFrame
v_data = []
for state in range(25):
    row, col = divmod(state, 5)  # Convert state index to (row, col) for the 5x5 grid
    v_data.append({
        'state': f"S{state}({row},{col})",
        'V(s)': V[state]
    })

# Convert to DataFrame and save to CSV
df_v_values = pd.DataFrame(v_data)
df_v_values.to_csv("assets/v_values.csv", index=False)

print("✅ Saved: v_values.csv")

✅ Saved: v_values.csv


In [22]:
import pandas as pd
import numpy as np

# Environment setup
actions = ['up', 'down', 'left', 'right']
goal_state = 24
hole_states = [6, 9, 14, 20]
gamma = 0.9  # discount factor
theta = 1e-4  # threshold for convergence

# Mapping from index to coordinates
def index_to_coords(index):
    return index // 5 + 1, index % 5 + 1  # (row, col) in 1-based index

def coords_to_index(row, col):
    return (row - 1) * 5 + (col - 1)

# Transition model
def get_next_state(state, action):
    row, col = index_to_coords(state)
    if action == 'up':
        row -= 1
    elif action == 'down':
        row += 1
    elif action == 'left':
        col -= 1
    elif action == 'right':
        col += 1

    if 1 <= row <= 5 and 1 <= col <= 5:
        return coords_to_index(row, col)
    else:
        return state  # invalid move results in staying in same state

# Initialize value table
V = np.zeros(25)
V[goal_state] = 1
for hole in hole_states:
    V[hole] = -1

# Store initial values
iterations_dict = {}
row_labels = [f"S{state}({index_to_coords(state)[0]},{index_to_coords(state)[1]})" for state in range(25)]
iterations_dict[0] = {label: round(V[i], 4) for i, label in enumerate(row_labels)}  # Initial values

# Value Iteration
iteration = 1
while True:
    delta = 0
    new_V = V.copy()
    for state in range(25):
        if state == goal_state or state in hole_states:
            continue
        values = []
        for action in actions:
            next_state = get_next_state(state, action)
            reward = 1 if next_state == goal_state else -1 if next_state in hole_states else 0
            values.append(reward + gamma * V[next_state])
        new_V[state] = max(values)
        delta = max(delta, abs(V[state] - new_V[state]))
    V = new_V
    iterations_dict[iteration] = {label: round(V[i], 4) for i, label in enumerate(row_labels)}
    iteration += 1
    if delta < theta:
        break

# Convert to DataFrame and save
df = pd.DataFrame(iterations_dict)
df.index.name = 'State'
df.to_csv("assets/value_iteration.csv")
print("✅ Saved: assets/value_iteration.csv")

✅ Saved: assets/value_iteration.csv


In [19]:
import matplotlib.pyplot as plt
import seaborn as sns
import os

df = pd.read_csv("assets/value_iteration.csv", index_col=0)
os.makedirs("assets/heatmaps", exist_ok=True)

for i in df.columns:
    data = df[i].values.reshape(5, 5)
    plt.figure(figsize=(6,5))
    sns.heatmap(data, annot=True, cmap="YlGnBu", cbar=False, fmt=".2f")
    plt.title(f"Iteration {i}")
    plt.savefig(f"assets/heatmaps/iter_{i}.png")
    plt.close()

print("✅ Saved heatmaps to assets/heatmaps/")

✅ Saved heatmaps to assets/heatmaps/


In [15]:
import imageio
import glob

img_paths = sorted(glob.glob("assets/heatmaps/*.png"), key=lambda x: int(x.split('_')[-1].split('.')[0]))
images = [imageio.v2.imread(path) for path in img_paths]
imageio.mimsave("assets/value_iteration.gif", images, duration=0.8, loop=0)
print("✅ GIF saved to assets/value_iteration.gif")

✅ GIF saved to assets/value_iteration.gif
