In [2]:
# reference: https://www.youtube.com/watch?v=ZxXKISVkH6Y
import sys
import numpy as np
import math
import random

import gym
import gym_game

def simulate():
    global epsilon, epsilon_decay
    for episode in range(MAX_EPISODES):

        # Init environment
        state = env.reset()
        total_reward = 0

        # AI tries up to MAX_TRY times
        for t in range(MAX_TRY):

            # In the beginning, do random action to learn
            if random.uniform(0, 1) < epsilon:
                action = env.action_space.sample()
            else:
                action = np.argmax(q_table[state])

            # Do action and get result
            next_state, reward, done, _ = env.step(action)
            total_reward += reward

            # Get correspond q value from state, action pair
            q_value = q_table[state][action]
            best_q = np.max(q_table[next_state])

            # Q(state, action) <- (1 - a)Q(state, action) + a(reward + rmaxQ(next state, all actions))
            q_table[state][action] = (1 - learning_rate) * q_value + learning_rate * (reward + gamma * best_q)

            # Set up for the next iteration
            state = next_state

            # Draw games
            env.render()

            # When episode is done, print reward
            if done or t >= MAX_TRY - 1:
                print("Episode %d finished after %i time steps with total reward = %f." % (episode, t, total_reward))
                #print(q_table)
                break

        # exploring rate decay
        if epsilon >= 0.005:
            epsilon *= epsilon_decay



env = gym.make("Pygame-v0")
MAX_EPISODES = 999
MAX_TRY = 1000
epsilon = 0.5
epsilon_decay = 0.999
learning_rate = 0.1
gamma = 0.6
num_box = tuple((env.observation_space.high + np.ones(env.observation_space.shape)).astype(int))
q_table = np.zeros(num_box + (env.action_space.n,))
simulate()

Episode 0 finished after 25 time steps with total reward = -9804.500000.
Episode 1 finished after 14 time steps with total reward = -9932.500000.
Episode 2 finished after 17 time steps with total reward = -9857.500000.
Episode 3 finished after 9 time steps with total reward = -9953.500000.
Episode 4 finished after 61 time steps with total reward = -9429.500000.
Episode 5 finished after 19 time steps with total reward = -9911.500000.
Episode 6 finished after 53 time steps with total reward = -9551.500000.
Episode 7 finished after 34 time steps with total reward = -9685.000000.
Episode 8 finished after 21 time steps with total reward = -9824.000000.
Episode 9 finished after 14 time steps with total reward = -9889.500000.
Episode 10 finished after 18 time steps with total reward = -9865.500000.
Episode 11 finished after 40 time steps with total reward = -9634.000000.
Episode 12 finished after 41 time steps with total reward = -9649.000000.
Episode 13 finished after 13 time steps with tota

Episode 111 finished after 47 time steps with total reward = -9586.500000.
Episode 112 finished after 17 time steps with total reward = -9855.500000.
Episode 113 finished after 20 time steps with total reward = -9874.000000.
Episode 114 finished after 20 time steps with total reward = -9862.500000.
Episode 115 finished after 49 time steps with total reward = -9546.500000.
Episode 116 finished after 51 time steps with total reward = -9534.500000.
Episode 117 finished after 56 time steps with total reward = -9490.000000.
Episode 118 finished after 47 time steps with total reward = -9601.000000.
Episode 119 finished after 17 time steps with total reward = -9877.000000.
Episode 120 finished after 19 time steps with total reward = -9901.000000.
Episode 121 finished after 45 time steps with total reward = -9585.500000.
Episode 122 finished after 44 time steps with total reward = -9614.500000.
Episode 123 finished after 61 time steps with total reward = -9477.000000.
Episode 124 finished afte

Episode 221 finished after 19 time steps with total reward = -9846.000000.
Episode 222 finished after 33 time steps with total reward = -9703.500000.
Episode 223 finished after 18 time steps with total reward = -9846.000000.
Episode 224 finished after 12 time steps with total reward = -9921.500000.
Episode 225 finished after 63 time steps with total reward = -9396.000000.
Episode 226 finished after 61 time steps with total reward = -9433.000000.
Episode 227 finished after 17 time steps with total reward = -9872.500000.
Episode 228 finished after 41 time steps with total reward = -9625.000000.
Episode 229 finished after 63 time steps with total reward = -9441.500000.
Episode 230 finished after 46 time steps with total reward = -9566.500000.
Episode 231 finished after 62 time steps with total reward = -9453.000000.
Episode 232 finished after 63 time steps with total reward = -9421.000000.
Episode 233 finished after 43 time steps with total reward = -9611.500000.
Episode 234 finished afte

Episode 331 finished after 61 time steps with total reward = -9422.000000.
Episode 332 finished after 39 time steps with total reward = -9643.500000.
Episode 333 finished after 22 time steps with total reward = -9822.000000.
Episode 334 finished after 19 time steps with total reward = -9904.000000.
Episode 335 finished after 62 time steps with total reward = -9437.000000.
Episode 336 finished after 46 time steps with total reward = -9572.000000.
Episode 337 finished after 60 time steps with total reward = -9448.000000.
Episode 338 finished after 16 time steps with total reward = -9874.500000.
Episode 339 finished after 38 time steps with total reward = -9686.000000.
Episode 340 finished after 15 time steps with total reward = -9901.500000.
Episode 341 finished after 56 time steps with total reward = -9487.000000.
Episode 342 finished after 20 time steps with total reward = -9832.000000.
Episode 343 finished after 11 time steps with total reward = -9915.000000.
Episode 344 finished afte

Episode 441 finished after 118 time steps with total reward = -8887.000000.
Episode 442 finished after 61 time steps with total reward = -9439.000000.
Episode 443 finished after 62 time steps with total reward = -9459.000000.
Episode 444 finished after 14 time steps with total reward = -9921.500000.
Episode 445 finished after 91 time steps with total reward = -9251.000000.
Episode 446 finished after 67 time steps with total reward = -9405.500000.
Episode 447 finished after 10 time steps with total reward = -9937.000000.
Episode 448 finished after 13 time steps with total reward = -9909.000000.
Episode 449 finished after 11 time steps with total reward = -9933.000000.
Episode 450 finished after 65 time steps with total reward = -9422.000000.
Episode 451 finished after 15 time steps with total reward = -9892.000000.
Episode 452 finished after 61 time steps with total reward = -9417.500000.
Episode 453 finished after 60 time steps with total reward = -9436.000000.
Episode 454 finished aft

Episode 551 finished after 60 time steps with total reward = -9433.000000.
Episode 552 finished after 66 time steps with total reward = -9387.000000.
Episode 553 finished after 61 time steps with total reward = -9442.500000.
Episode 554 finished after 69 time steps with total reward = -9409.000000.
Episode 555 finished after 24 time steps with total reward = -9781.500000.
Episode 556 finished after 116 time steps with total reward = -8909.500000.
Episode 557 finished after 65 time steps with total reward = -9428.000000.
Episode 558 finished after 42 time steps with total reward = -9607.000000.
Episode 559 finished after 52 time steps with total reward = -9513.500000.
Episode 560 finished after 61 time steps with total reward = -9432.500000.
Episode 561 finished after 67 time steps with total reward = -9398.000000.
Episode 562 finished after 55 time steps with total reward = -9478.500000.
Episode 563 finished after 65 time steps with total reward = -9396.000000.
Episode 564 finished aft

Episode 661 finished after 63 time steps with total reward = -9425.500000.
Episode 662 finished after 121 time steps with total reward = -8897.000000.
Episode 663 finished after 48 time steps with total reward = -9563.000000.
Episode 664 finished after 68 time steps with total reward = -9405.500000.
Episode 665 finished after 59 time steps with total reward = -9452.500000.
Episode 666 finished after 80 time steps with total reward = -9265.000000.
Episode 667 finished after 15 time steps with total reward = -9875.500000.
Episode 668 finished after 63 time steps with total reward = -9429.000000.
Episode 669 finished after 84 time steps with total reward = -9252.500000.
Episode 670 finished after 23 time steps with total reward = -9808.500000.
Episode 671 finished after 32 time steps with total reward = -9704.000000.
Episode 672 finished after 57 time steps with total reward = -9478.000000.
Episode 673 finished after 47 time steps with total reward = -9557.500000.
Episode 674 finished aft

Episode 770 finished after 45 time steps with total reward = -9597.000000.
Episode 771 finished after 129 time steps with total reward = -8809.500000.
Episode 772 finished after 49 time steps with total reward = -9548.500000.
Episode 773 finished after 65 time steps with total reward = -9398.000000.
Episode 774 finished after 138 time steps with total reward = -8705.000000.
Episode 775 finished after 29 time steps with total reward = -9740.500000.
Episode 776 finished after 117 time steps with total reward = -8892.500000.
Episode 777 finished after 113 time steps with total reward = -8938.000000.
Episode 778 finished after 121 time steps with total reward = -8883.500000.
Episode 779 finished after 67 time steps with total reward = -9405.500000.
Episode 780 finished after 77 time steps with total reward = -9318.500000.
Episode 781 finished after 28 time steps with total reward = -9752.000000.
Episode 782 finished after 138 time steps with total reward = -8710.000000.
Episode 783 finishe

Episode 879 finished after 241 time steps with total reward = -7760.500000.
Episode 880 finished after 244 time steps with total reward = -7789.000000.
Episode 881 finished after 224 time steps with total reward = -7913.500000.
Episode 882 finished after 154 time steps with total reward = -8606.500000.
Episode 883 finished after 82 time steps with total reward = -9264.000000.
Episode 884 finished after 114 time steps with total reward = -8939.000000.
Episode 885 finished after 240 time steps with total reward = -7754.000000.
Episode 886 finished after 241 time steps with total reward = -7779.500000.
Episode 887 finished after 79 time steps with total reward = -9255.500000.
Episode 888 finished after 59 time steps with total reward = -9438.500000.
Episode 889 finished after 18 time steps with total reward = -9862.000000.
Episode 890 finished after 171 time steps with total reward = -8400.500000.
Episode 891 finished after 254 time steps with total reward = -7720.500000.
Episode 892 fini

Episode 988 finished after 243 time steps with total reward = -7766.000000.
Episode 989 finished after 67 time steps with total reward = -9384.000000.
Episode 990 finished after 119 time steps with total reward = -8901.500000.
Episode 991 finished after 130 time steps with total reward = -8839.000000.
Episode 992 finished after 66 time steps with total reward = -9398.500000.
Episode 993 finished after 63 time steps with total reward = -9422.500000.
Episode 994 finished after 125 time steps with total reward = -8857.500000.
Episode 995 finished after 118 time steps with total reward = -8943.000000.
Episode 996 finished after 248 time steps with total reward = -7745.500000.
Episode 997 finished after 273 time steps with total reward = -7580.000000.
Episode 998 finished after 139 time steps with total reward = -8789.500000.


In [4]:
q_table.shape

(11, 11, 11, 11, 11, 3)