In [None]:
#@title Define the Environment
import numpy as np
import matplotlib.pyplot as plt

n_states = 16
n_actions = 4
goal_state = 15

Q_table = np.zeros((n_states, n_actions))

In [None]:
#@title Set Hyperparameters
learning_rate = 0.8       # α - Alpha
discount_factor = 0.95    # γ - Gamma
exploration_prob = 0.2    # ε - Epsilon
epochs = 1000

In [None]:
#@title Define the State Transition Function
def get_next_state(state, action):
    row, col = divmod(state, 4)

    if action == 0 and col > 0:
        col -= 1
    elif action == 1 and col < 3:
        col += 1
    elif action == 2 and row > 0:
        row -= 1
    elif action == 3 and row < 3:
        row += 1

    return row * 4 + col

In [None]:
#@title Implement the Q-Learning Algorithm

for epoch in range(epochs):
    current_state = np.random.randint(0, n_states)

    while True:
        if np.random.rand() < exploration_prob:
            action = np.random.randint(0, n_actions)
        else:
            action = np.argmax(Q_table[current_state])

        next_state = get_next_state(current_state, action)

        reward = 1 if next_state == goal_state else 0  # Kalau next_state == 15 (goal) reward = 1, Kalau belum sampai goal reward = 0

        Q_table[current_state, action] += learning_rate * (
            reward + discount_factor * np.max(Q_table[next_state]) - Q_table[current_state, action]
        )

        if next_state == goal_state:
            break

        current_state = next_state

In [None]:
#@title Output the Learned Q-Table

q_values_grid = np.max(Q_table, axis=1).reshape((4, 4))

plt.figure(figsize=(6, 6))
plt.imshow(q_values_grid, cmap='coolwarm', interpolation='nearest')
plt.colorbar(label='Q-value')
plt.title('Learned Q-values for Each State')
plt.xticks(np.arange(4), ['0', '1', '2', '3'])
plt.yticks(np.arange(4), ['0', '1', '2', '3'])
plt.gca().invert_yaxis()
plt.grid(True)

for i in range(4):
    for j in range(4):
        plt.text(j, i, f'{q_values_grid[i, j]:.2f}', ha='center', va='center', color='black')

plt.show()

print("Learned Q-table:")
print(Q_table)

# Merah/Merah Tua (16.5-17.0) = Q-value SANGAT TINGGI
# Orange/Merah Muda (15.0-16.5) = Q-value TINGGI
# Biru (13.0-14.5) = Q-value RENDAH

