In [11]:
import gymnasium
import gymnasium_env
from gymnasium.wrappers import FlattenObservation
import matplotlib.pyplot as plt
import numpy as np

env = gymnasium.make('gymnasium_env/KnightWorldEnv-v0', size=12)

alpha = 0.1
gamma = 0.9
epsilon = 0.9
episodes = 10000
max_steps = 100
epsilon_decay = 0.999

state_space_size = env.observation_space.n
action_space_size = env.action_space.n
Q = np.zeros((state_space_size, action_space_size))

rewards = []

for episode in range(episodes):
    state, _ = env.reset()
    total_reward = 0

    for step in range(max_steps):
        if np.random.rand() < epsilon:
            action = env.action_space.sample()
        else:
            action = np.argmax(Q[state])

        next_state, reward, terminated, truncated, _ = env.step(action)
        done = terminated or truncated

        old_value = Q[state, action]
        next_max = np.max(Q[next_state])
        Q[state, action] = old_value + alpha * (reward + gamma * next_max - old_value)

        state = next_state
        total_reward += reward

        if done:
            break
    if epsilon > 0.01:
        epsilon *= epsilon_decay

    rewards.append(total_reward)

window = 1000
if len(rewards) > window:
    avg_rewards = [np.mean(rewards[i-window:i+1]) for i in range(window, len(rewards))]

    plt.plot(avg_rewards)
    plt.title("Krzywa uczenia - KnightWorldEnv (Q-learning)")
    plt.xlabel("Epizod")
    plt.ylabel("Średnia nagroda")
    plt.grid(True)
    plt.show()
else:
    print("Not enough episodes to calculate average rewards.")

np.save("Q_table.npy", Q)

AttributeError: 'Dict' object has no attribute 'n'

In [None]:
import time


env = gym.make("gymnasium_env/KnightWorldEnv-v0", is_slippery=True, render_mode="human")

Q = np.load("Q_table.npy")

state, _ = env.reset()
done = False
total_reward = 0
max_steps = 100

print("\nWytrenowany agent w akcji:")

for _ in range(max_steps):
    time.sleep(0.2)
    action = np.argmax(Q[state])
    next_state, reward, terminated, truncated, _ = env.step(action)
    done = terminated or truncated

    total_reward += reward
    state = next_state

    if done:
        break

print(f"\nSuma nagród zdobytych przez agenta: {total_reward}")
env.close()