In [None]:
import gym
import numpy as np

# Buat lingkungan FrozenLake
env = gym.make('FrozenLake-v1', is_slippery=True)


In [None]:
# Parameter Q-Learning
initial_learning_rate = 1.0
min_learning_rate = 0.003
decay_rate = 0.005
discount_factor = 0.99
epsilon = 1.0
min_epsilon = 0.1
epsilon_decay = 0.995

# Inisialisasi tabel Q
Q = np.zeros((env.observation_space.n, env.action_space.n))

def choose_action(state):
    if np.random.rand() < epsilon:
        return env.action_space.sample()
    else:
        return np.argmax(Q[state])


In [None]:
def get_learning_rate(episode):
    return max(min_learning_rate, min(initial_learning_rate, 1.0 - np.log10((episode + 1) * decay_rate)))

# Fungsi untuk melatih agen
num_episodes = 2000
for episode in range(num_episodes):
    state = env.reset()
    done = False
    while not done:
        action = choose_action(state)
        next_state, reward, done, _ = env.step(action)
        best_next_action = np.argmax(Q[next_state])
        td_target = reward + discount_factor * Q[next_state][best_next_action]
        td_error = td_target - Q[state][action]
        learning_rate = get_learning_rate(episode)
        Q[state][action] += learning_rate * td_error
        state = next_state
    epsilon = max(min_epsilon, epsilon * epsilon_decay)


In [None]:
# Fungsi untuk mengevaluasi agen
def evaluate_agent(num_episodes=100):
    total_rewards = 0
    for episode in range(num_episodes):
        state = env.reset()
        done = False
        while not done:
            action = np.argmax(Q[state])
            next_state, reward, done, _ = env.step(action)
            total_rewards += reward
            state = next_state
    return total_rewards / num_episodes

# Evaluasi agen
print(f"Rata-rata imbalan per episode: {evaluate_agent()}")


In [None]:
# Visualisasi hasil pelatihan
import matplotlib.pyplot as plt

episodes = range(num_episodes)
rewards = [evaluate_agent() for _ in episodes]

plt.plot(episodes, rewards)
plt.xlabel('Episode')
plt.ylabel('Total Imbalan')
plt.title('Total Imbalan per Episode')
plt.show()