# Q-Learning example

The following trains a **QController** to play the game. The **QController** has a Q-Table that indexes the Q-value for a given state and action. The states and actions are discretized from the (continuous) possible real states and actions.

In [None]:
from pod.board import PodBoard
from pod.ai.q_controller import QController
from pod.ai.rewards import regood

board = PodBoard.circle(5).shuffle()
q_con = QController(board, regood)

Here, we train the controller, progressively decreasing the learning rate and varying the amount of random exploration.

In [None]:
import matplotlib.pyplot as plt

rewards = []
for rate in range(5):
    lr = (7.5 - rate) / 10
    print("------ Learning rate {}".format(lr))
    for p in range(10):
        prob = (10 - p) / 10
        results = q_con.train(
            num_episodes=3000,
            prob_rand_action=prob,
            learning_rate=lr
        )
        avg = sum(results) / len(results)
        print("  P(random move) = {} ---> Average best reward: {}".format(prob, avg))
        rewards.append(avg)

plt.plot(rewards)
plt.legend(["Average best reward per epoch"])
plt.show()

Now that it has been trained, let's see the result!

In [None]:
from pod.drawer import Drawer
from pod.controller import SimpleController

drawer = Drawer(board, controllers=[q_con, SimpleController(board)])
#drawer.players[0].pod.pos = (board.checkpoints[0] + board.checkpoints[1]) * 0.5

drawer.animate(100)

In [None]:
drawer.chart_rewards(regood)

# Scratchpad

In [None]:
len(q_con.q_table)

In [None]:
regood(board, None, drawer.players[0].pod)

In [None]:
import matplotlib.pyplot as plt

#plt.rcParams['figure.figsize'] = [Constants.world_x() / 1000, Constants.world_y() / 1000]
plt.rcParams['figure.dpi'] = 100

fig, axes = plt.subplots(5, 1, figsize=(10,10))

for (idx, title) in enumerate(['vel.x', 'vel.y', 'check.x', 'check.y', 'check2 angle']):
    vals = [k[idx] for k in q_con.q_table.keys()]
    axes[idx].set_title(title)
    axes[idx].hist(vals)