## 1. Import libraries

In [1]:
import sys
sys.path.append('../')

%matplotlib inline
from collections import defaultdict
from typing import List
from sklearn.preprocessing import StandardScaler

import numpy as np
import time
import matplotlib.pyplot as plt
import seaborn as sns

import gym

import src.agents as agents
import src.episode as episode
import src.environment as environment
import src.aux_plots as aux_plots
import src.serializer as serializer

## 2. Auxiliary functions

### 2.1. Reduce state space

In [2]:
def reduce_state(ob):
    # Doesn't matter where we were hit
    ob[16] = 1 if ob[16] != 255 else 0

    # Reduce chicken y-position
    ob[14] = ob[14] // 3

    for b in range(108, 118):
        # The chicken is in the x-posistion ~49
        if ob[b] < 20 or ob[b] > 80:
            # We don't need to represent cars far from the chicken
            ob[b] = 0
        else:
            # Reduce the cars x-positions sample space
            ob[b] = ob[b] // 3

    return ob

### 2.2. Reward policy

In [3]:
def reward_policy(reward, ob, action):
    if reward == 1:
        reward = reward_policy.REWARD_IF_CROSS
    elif ob[16] == 1:  # Collision!
        reward = reward_policy.REWARD_IF_COLISION
    elif action != 1:  # Don't incentivate staying still
        reward = reward_policy.REWARD_IF_STILL

    return reward

### 2.3. Print results

In [4]:
def print_result(i, scores, total_reward, score):
#     if i % 10 == 0:
        print(f"Run [{i:4}] - Total reward: {total_reward:7.2f} Mean scores: {sum(scores) / len(scores):.2f} Means Scores[:-10]: {sum(scores[-10:]) / len(scores[-10:]):5.2f} Score: {score:2} ")

## 3. Running algorithms

### 3.1. Hyperparameters

In [5]:
RAM_mask = [
      14  # Chicken Y
    , 16  # Chicken Lane Collide
    , 108, 109, 110, 111, 112, 113, 114, 115, 116, 117  # Car X Coords
]

GAMMA = 0.99
AVAILABLE_ACTIONS = 2
N0 = 2.5
ALPHA = 0.00001

reward_policy.REWARD_IF_CROSS = 50
reward_policy.REWARD_IF_COLISION = -1
reward_policy.REWARD_IF_STILL = -0.1

### 3.2. Monte Carlo

In [None]:
env, initial_state = environment.get_env()

mc_agent = agents.MonteCarloControl(gamma=GAMMA, available_actions=AVAILABLE_ACTIONS, N0=N0)

In [None]:
def MonteCarloES(agent, reduce_s, reward_p, RAM_mask: List[int], render: bool=False):
    epi = episode.generate_episode(env, reduce_state=reduce_s, reward_policy=reward_p, agent=agent, RAM_mask=RAM_mask, render=render)
    return agent.update_policy(epi)

In [None]:
# %%time
# MonteCarloES(agent=mc_agent,reduce_s=reduce_state, reward_p=reward_policy, RAM_mask=RAM_mask, render=False)

In [None]:
%%time
scores = []
total_rewards = []


n_runs = 5

for i in range(n_runs):
    render = i % 201 == 200

    score, total_reward = MonteCarloES(agent=mc_agent,reduce_s=reduce_state, reward_p=reward_policy, RAM_mask=RAM_mask, render=render)

    scores.append(score)
    total_rewards.append(total_reward)

    print_result(i, scores, total_reward, score)

### 3.3. Q Learning

In [None]:
env, initial_state = environment.get_env()
ql_agent = agents.QLearning(gamma=GAMMA, available_actions=AVAILABLE_ACTIONS, N0=N0)

In [None]:
%%time
scores = []
total_rewards = []

n_runs = 1
render = False
for i in range(n_runs):
#     render = i % 200 == 0

    game_over = False
    state = env.reset()
    state = reduce_state(state)[RAM_mask].data.tobytes()  # Select useful bytes
    action = ql_agent.act(state)
    
    score = 0
    total_reward = 0

    while not game_over:
        if render:
            time.sleep(0.025)
            env.render()

        old_state = state
        ob, reward, game_over, _ = env.step(action)

        ob = reduce_state(ob)
        reward = reward_policy(reward, ob, action)

        total_reward += reward

        if reward == reward_policy.REWARD_IF_CROSS:
            score += 1

        state = ob[RAM_mask].data.tobytes()

        ql_agent.update_Q(old_state, state, action, reward)

        action = ql_agent.act(state)  # Next action

    scores.append(score)
    total_rewards.append(total_reward)

    print_result(i, scores, total_reward, score)

In [None]:
for key in ql_agent.Q:
    print('{}:{}'.format(np.frombuffer(key, dtype=np.uint8, count=-1), ql_agent.Q[key]))

### 3.4. Q Learning Approximator

In [6]:
env, initial_state = environment.get_env()
ql_agent_app = agents.QLearningLinearApprox(alpha=ALPHA, gamma=GAMMA, available_actions=AVAILABLE_ACTIONS, N0=N0, weights_length=len(RAM_mask), fixed_alpha=True, feat_type='all') #len(RAM_mask)
ql_agent_app.trainScaler(env, RAM_mask)

In [None]:
# state = env.reset()
# ql_agent_app.createFeature(reduce_state(state)[RAM_mask].data.tobytes(), 1)

In [None]:
# %%time
# scores = []
# total_rewards = []

# n_runs = 2
# render = False
# for i in range(n_runs):
# #     render = i % 200 == 0

#     game_over = False
#     state = env.reset()
#     state = reduce_state(state)[RAM_mask].data.tobytes()  # Select useful bytes
#     action = ql_agent_app.act(state)
    
#     score = 0
#     total_reward = 0

#     count = 0
#     print("Episode ",i)
#     while not game_over:
#         if render:
#             time.sleep(0.025)
#             env.render()

#         old_state = state
#         ob, reward, game_over, _ = env.step(action)

#         ob = reduce_state(ob)
#         reward = reward_policy(reward, ob, action)

#         total_reward += reward

#         if reward == reward_policy.REWARD_IF_CROSS:
#             score += 1

#         state = ob[RAM_mask].data.tobytes()
#         print("Run ", count)
#         ql_agent_app.update_W(old_state, state, action, reward)

#         action = ql_agent_app.act(state)  # Next action
#         count+=1
#         print('------------------------------------')
#     scores.append(score)
#     total_rewards.append(total_reward)

#     print_result(i, scores, total_reward, score)
    

In [7]:
%%time
scores = []
total_rewards = []
views = []

n_runs = 2
render = False
for i in range(n_runs):
#     render = i % 200 == 0

    game_over = False
    state = env.reset()
    views.append(state[RAM_mask])
    state = reduce_state(state)[RAM_mask].data.tobytes()  # Select useful bytes
    action = ql_agent_app.act(state)
    
    score = 0
    total_reward = 0

    count = 0
    print("Episode ",i)
    while not game_over:
        if render:
            time.sleep(0.025)
            env.render()

        old_state = state
        ob, reward, game_over, _ = env.step(action)

        views.append(ob[RAM_mask])
        ob = reduce_state(ob)
        reward = reward_policy(reward, ob, action)

        total_reward += reward

        if reward == reward_policy.REWARD_IF_CROSS:
            score += 1

        state = ob[RAM_mask].data.tobytes()
        print("Run ", count)
        ql_agent_app.update_W(old_state, state, action, reward)

        action = ql_agent_app.act(state)  # Next action
        count+=1
        print('------------------------------------')
    scores.append(score)
    total_rewards.append(total_reward)

    print_result(i, scores, total_reward, score)
    

Episode  0
Run  0
------------------------------------
Run  1
------------------------------------
Run  2
------------------------------------
Run  3
------------------------------------
Run  4
------------------------------------
Run  5
------------------------------------
Run  6
------------------------------------
Run  7
------------------------------------
Run  8
------------------------------------
Run  9
------------------------------------
Run  10
------------------------------------
Run  11
------------------------------------
Run  12
------------------------------------
Run  13
------------------------------------
Run  14
------------------------------------
Run  15
------------------------------------
Run  16
------------------------------------
Run  17
------------------------------------
Run  18
------------------------------------
Run  19
------------------------------------
Run  20
------------------------------------
Run  21
------------------------------------
Run  22
-

Run  186
------------------------------------
Run  187
------------------------------------
Run  188
------------------------------------
Run  189
------------------------------------
Run  190
------------------------------------
Run  191
------------------------------------
Run  192
------------------------------------
Run  193
------------------------------------
Run  194
------------------------------------
Run  195
------------------------------------
Run  196
------------------------------------
Run  197
------------------------------------
Run  198
------------------------------------
Run  199
------------------------------------
Run  200
------------------------------------
Run  201
------------------------------------
Run  202
------------------------------------
Run  203
------------------------------------
Run  204
------------------------------------
Run  205
------------------------------------
Run  206
------------------------------------
Run  207
-------------------------

Run  393
------------------------------------
Run  394
------------------------------------
Run  395
------------------------------------
Run  396
------------------------------------
Run  397
------------------------------------
Run  398
------------------------------------
Run  399
------------------------------------
Run  400
------------------------------------
Run  401
------------------------------------
Run  402
------------------------------------
Run  403
------------------------------------
Run  404
------------------------------------
Run  405
------------------------------------
Run  406
------------------------------------
Run  407
------------------------------------
Run  408
------------------------------------
Run  409
------------------------------------
Run  410
------------------------------------
Run  411
------------------------------------
Run  412
------------------------------------
Run  413
------------------------------------
Run  414
-------------------------

------------------------------------
Run  597
------------------------------------
Run  598
------------------------------------
Run  599
------------------------------------
Run  600
------------------------------------
Run  601
------------------------------------
Run  602
------------------------------------
Run  603
------------------------------------
Run  604
------------------------------------
Run  605
------------------------------------
Run  606
------------------------------------
Run  607
------------------------------------
Run  608
------------------------------------
Run  609
------------------------------------
Run  610
------------------------------------
Run  611
------------------------------------
Run  612
------------------------------------
Run  613
------------------------------------
Run  614
------------------------------------
Run  615
------------------------------------
Run  616
------------------------------------
Run  617
----------------------------------

------------------------------------
Run  797
------------------------------------
Run  798
------------------------------------
Run  799
------------------------------------
Run  800
------------------------------------
Run  801
------------------------------------
Run  802
------------------------------------
Run  803
------------------------------------
Run  804
------------------------------------
Run  805
------------------------------------
Run  806
------------------------------------
Run  807
------------------------------------
Run  808
------------------------------------
Run  809
------------------------------------
Run  810
------------------------------------
Run  811
------------------------------------
Run  812
------------------------------------
Run  813
------------------------------------
Run  814
------------------------------------
Run  815
------------------------------------
Run  816
------------------------------------
Run  817
----------------------------------

Run  1005
------------------------------------
Run  1006
------------------------------------
Run  1007
------------------------------------
Run  1008
------------------------------------
Run  1009
------------------------------------
Run  1010
------------------------------------
Run  1011
------------------------------------
Run  1012
------------------------------------
Run  1013
------------------------------------
Run  1014
------------------------------------
Run  1015
------------------------------------
Run  1016
------------------------------------
Run  1017
------------------------------------
Run  1018
------------------------------------
Run  1019
------------------------------------
Run  1020
------------------------------------
Run  1021
------------------------------------
Run  1022
------------------------------------
Run  1023
------------------------------------
Run  1024
------------------------------------
Run  1025
------------------------------------
Run  1026
---

Run  1204
------------------------------------
Run  1205
------------------------------------
Run  1206
------------------------------------
Run  1207
------------------------------------
Run  1208
------------------------------------
Run  1209
------------------------------------
Run  1210
------------------------------------
Run  1211
------------------------------------
Run  1212
------------------------------------
Run  1213
------------------------------------
Run  1214
------------------------------------
Run  1215
------------------------------------
Run  1216
------------------------------------
Run  1217
------------------------------------
Run  1218
------------------------------------
Run  1219
------------------------------------
Run  1220
------------------------------------
Run  1221
------------------------------------
Run  1222
------------------------------------
Run  1223
------------------------------------
Run  1224
------------------------------------
Run  1225
---

Run  1412
------------------------------------
Run  1413
------------------------------------
Run  1414
------------------------------------
Run  1415
------------------------------------
Run  1416
------------------------------------
Run  1417
------------------------------------
Run  1418
------------------------------------
Run  1419
------------------------------------
Run  1420
------------------------------------
Run  1421
------------------------------------
Run  1422
------------------------------------
Run  1423
------------------------------------
Run  1424
------------------------------------
Run  1425
------------------------------------
Run  1426
------------------------------------
Run  1427
------------------------------------
Run  1428
------------------------------------
Run  1429
------------------------------------
Run  1430
------------------------------------
Run  1431
------------------------------------
Run  1432
------------------------------------
Run  1433
---

Run  1616
------------------------------------
Run  1617
------------------------------------
Run  1618
------------------------------------
Run  1619
------------------------------------
Run  1620
------------------------------------
Run  1621
------------------------------------
Run  1622
------------------------------------
Run  1623
------------------------------------
Run  1624
------------------------------------
Run  1625
------------------------------------
Run  1626
------------------------------------
Run  1627
------------------------------------
Run  1628
------------------------------------
Run  1629
------------------------------------
Run  1630
------------------------------------
Run  1631
------------------------------------
Run  1632
------------------------------------
Run  1633
------------------------------------
Run  1634
------------------------------------
Run  1635
------------------------------------
Run  1636
------------------------------------
Run  1637
---

Run  1824
------------------------------------
Run  1825
------------------------------------
Run  1826
------------------------------------
Run  1827
------------------------------------
Run  1828
------------------------------------
Run  1829
------------------------------------
Run  1830
------------------------------------
Run  1831
------------------------------------
Run  1832
------------------------------------
Run  1833
------------------------------------
Run  1834
------------------------------------
Run  1835
------------------------------------
Run  1836
------------------------------------
Run  1837
------------------------------------
Run  1838
------------------------------------
Run  1839
------------------------------------
Run  1840
------------------------------------
Run  1841
------------------------------------
Run  1842
------------------------------------
Run  1843
------------------------------------
Run  1844
------------------------------------
Run  1845
---

Run  2027
------------------------------------
Run  2028
------------------------------------
Run  2029
------------------------------------
Run  2030
------------------------------------
Run  2031
------------------------------------
Run  2032
------------------------------------
Run  2033
------------------------------------
Run  2034
------------------------------------
Run  2035
------------------------------------
Run  2036
------------------------------------
Run  2037
------------------------------------
Run  2038
------------------------------------
Run  2039
------------------------------------
Run  2040
------------------------------------
Run  2041
------------------------------------
Run  2042
------------------------------------
Run  2043
------------------------------------
Run  2044
------------------------------------
Run  2045
------------------------------------
Run  2046
------------------------------------
Run  2047
------------------------------------
Run  2048
---

Run  2227
------------------------------------
Run  2228
------------------------------------
Run  2229
------------------------------------
Run  2230
------------------------------------
Run  2231
------------------------------------
Run  2232
------------------------------------
Run  2233
------------------------------------
Run  2234
------------------------------------
Run  2235
------------------------------------
Run  2236
------------------------------------
Run  2237
------------------------------------
Run  2238
------------------------------------
Run  2239
------------------------------------
Run  2240
------------------------------------
Run  2241
------------------------------------
Run  2242
------------------------------------
Run  2243
------------------------------------
Run  2244
------------------------------------
Run  2245
------------------------------------
Run  2246
------------------------------------
Run  2247
------------------------------------
Run  2248
---

------------------------------------
Run  2429
------------------------------------
Run  2430
------------------------------------
Run  2431
------------------------------------
Run  2432
------------------------------------
Run  2433
------------------------------------
Run  2434
------------------------------------
Run  2435
------------------------------------
Run  2436
------------------------------------
Run  2437
------------------------------------
Run  2438
------------------------------------
Run  2439
------------------------------------
Run  2440
------------------------------------
Run  2441
------------------------------------
Run  2442
------------------------------------
Run  2443
------------------------------------
Run  2444
------------------------------------
Run  2445
------------------------------------
Run  2446
------------------------------------
Run  2447
------------------------------------
Run  2448
------------------------------------
Run  2449
-------------

Run  2642
------------------------------------
Run  2643
------------------------------------
Run  2644
------------------------------------
Run  2645
------------------------------------
Run  2646
------------------------------------
Run  2647
------------------------------------
Run  2648
------------------------------------
Run  2649
------------------------------------
Run  2650
------------------------------------
Run  2651
------------------------------------
Run  2652
------------------------------------
Run  2653
------------------------------------
Run  2654
------------------------------------
Run  2655
------------------------------------
Run  2656
------------------------------------
Run  2657
------------------------------------
Run  2658
------------------------------------
Run  2659
------------------------------------
Run  2660
------------------------------------
Run  2661
------------------------------------
Run  2662
------------------------------------
Run  2663
---

Run  115
------------------------------------
Run  116
------------------------------------
Run  117
------------------------------------
Run  118
------------------------------------
Run  119
------------------------------------
Run  120
------------------------------------
Run  121
------------------------------------
Run  122
------------------------------------
Run  123
------------------------------------
Run  124
------------------------------------
Run  125
------------------------------------
Run  126
------------------------------------
Run  127
------------------------------------
Run  128
------------------------------------
Run  129
------------------------------------
Run  130
------------------------------------
Run  131
------------------------------------
Run  132
------------------------------------
Run  133
------------------------------------
Run  134
------------------------------------
Run  135
------------------------------------
Run  136
-------------------------

Run  321
------------------------------------
Run  322
------------------------------------
Run  323
------------------------------------
Run  324
------------------------------------
Run  325
------------------------------------
Run  326
------------------------------------
Run  327
------------------------------------
Run  328
------------------------------------
Run  329
------------------------------------
Run  330
------------------------------------
Run  331
------------------------------------
Run  332
------------------------------------
Run  333
------------------------------------
Run  334
------------------------------------
Run  335
------------------------------------
Run  336
------------------------------------
Run  337
------------------------------------
Run  338
------------------------------------
Run  339
------------------------------------
Run  340
------------------------------------
Run  341
------------------------------------
Run  342
-------------------------

Run  524
------------------------------------
Run  525
------------------------------------
Run  526
------------------------------------
Run  527
------------------------------------
Run  528
------------------------------------
Run  529
------------------------------------
Run  530
------------------------------------
Run  531
------------------------------------
Run  532
------------------------------------
Run  533
------------------------------------
Run  534
------------------------------------
Run  535
------------------------------------
Run  536
------------------------------------
Run  537
------------------------------------
Run  538
------------------------------------
Run  539
------------------------------------
Run  540
------------------------------------
Run  541
------------------------------------
Run  542
------------------------------------
Run  543
------------------------------------
Run  544
------------------------------------
Run  545
-------------------------

Run  734
------------------------------------
Run  735
------------------------------------
Run  736
------------------------------------
Run  737
------------------------------------
Run  738
------------------------------------
Run  739
------------------------------------
Run  740
------------------------------------
Run  741
------------------------------------
Run  742
------------------------------------
Run  743
------------------------------------
Run  744
------------------------------------
Run  745
------------------------------------
Run  746
------------------------------------
Run  747
------------------------------------
Run  748
------------------------------------
Run  749
------------------------------------
Run  750
------------------------------------
Run  751
------------------------------------
Run  752
------------------------------------
Run  753
------------------------------------
Run  754
------------------------------------
Run  755
-------------------------

Run  933
------------------------------------
Run  934
------------------------------------
Run  935
------------------------------------
Run  936
------------------------------------
Run  937
------------------------------------
Run  938
------------------------------------
Run  939
------------------------------------
Run  940
------------------------------------
Run  941
------------------------------------
Run  942
------------------------------------
Run  943
------------------------------------
Run  944
------------------------------------
Run  945
------------------------------------
Run  946
------------------------------------
Run  947
------------------------------------
Run  948
------------------------------------
Run  949
------------------------------------
Run  950
------------------------------------
Run  951
------------------------------------
Run  952
------------------------------------
Run  953
------------------------------------
Run  954
-------------------------

Run  1134
------------------------------------
Run  1135
------------------------------------
Run  1136
------------------------------------
Run  1137
------------------------------------
Run  1138
------------------------------------
Run  1139
------------------------------------
Run  1140
------------------------------------
Run  1141
------------------------------------
Run  1142
------------------------------------
Run  1143
------------------------------------
Run  1144
------------------------------------
Run  1145
------------------------------------
Run  1146
------------------------------------
Run  1147
------------------------------------
Run  1148
------------------------------------
Run  1149
------------------------------------
Run  1150
------------------------------------
Run  1151
------------------------------------
Run  1152
------------------------------------
Run  1153
------------------------------------
Run  1154
------------------------------------
Run  1155
---

Run  1340
------------------------------------
Run  1341
------------------------------------
Run  1342
------------------------------------
Run  1343
------------------------------------
Run  1344
------------------------------------
Run  1345
------------------------------------
Run  1346
------------------------------------
Run  1347
------------------------------------
Run  1348
------------------------------------
Run  1349
------------------------------------
Run  1350
------------------------------------
Run  1351
------------------------------------
Run  1352
------------------------------------
Run  1353
------------------------------------
Run  1354
------------------------------------
Run  1355
------------------------------------
Run  1356
------------------------------------
Run  1357
------------------------------------
Run  1358
------------------------------------
Run  1359
------------------------------------
Run  1360
------------------------------------
Run  1361
---

Run  1541
------------------------------------
Run  1542
------------------------------------
Run  1543
------------------------------------
Run  1544
------------------------------------
Run  1545
------------------------------------
Run  1546
------------------------------------
Run  1547
------------------------------------
Run  1548
------------------------------------
Run  1549
------------------------------------
Run  1550
------------------------------------
Run  1551
------------------------------------
Run  1552
------------------------------------
Run  1553
------------------------------------
Run  1554
------------------------------------
Run  1555
------------------------------------
Run  1556
------------------------------------
Run  1557
------------------------------------
Run  1558
------------------------------------
Run  1559
------------------------------------
Run  1560
------------------------------------
Run  1561
------------------------------------
Run  1562
---

Run  1743
------------------------------------
Run  1744
------------------------------------
Run  1745
------------------------------------
Run  1746
------------------------------------
Run  1747
------------------------------------
Run  1748
------------------------------------
Run  1749
------------------------------------
Run  1750
------------------------------------
Run  1751
------------------------------------
Run  1752
------------------------------------
Run  1753
------------------------------------
Run  1754
------------------------------------
Run  1755
------------------------------------
Run  1756
------------------------------------
Run  1757
------------------------------------
Run  1758
------------------------------------
Run  1759
------------------------------------
Run  1760
------------------------------------
Run  1761
------------------------------------
Run  1762
------------------------------------
Run  1763
------------------------------------
Run  1764
---

------------------------------------
Run  1950
------------------------------------
Run  1951
------------------------------------
Run  1952
------------------------------------
Run  1953
------------------------------------
Run  1954
------------------------------------
Run  1955
------------------------------------
Run  1956
------------------------------------
Run  1957
------------------------------------
Run  1958
------------------------------------
Run  1959
------------------------------------
Run  1960
------------------------------------
Run  1961
------------------------------------
Run  1962
------------------------------------
Run  1963
------------------------------------
Run  1964
------------------------------------
Run  1965
------------------------------------
Run  1966
------------------------------------
Run  1967
------------------------------------
Run  1968
------------------------------------
Run  1969
------------------------------------
Run  1970
-------------

Run  2158
------------------------------------
Run  2159
------------------------------------
Run  2160
------------------------------------
Run  2161
------------------------------------
Run  2162
------------------------------------
Run  2163
------------------------------------
Run  2164
------------------------------------
Run  2165
------------------------------------
Run  2166
------------------------------------
Run  2167
------------------------------------
Run  2168
------------------------------------
Run  2169
------------------------------------
Run  2170
------------------------------------
Run  2171
------------------------------------
Run  2172
------------------------------------
Run  2173
------------------------------------
Run  2174
------------------------------------
Run  2175
------------------------------------
Run  2176
------------------------------------
Run  2177
------------------------------------
Run  2178
------------------------------------
Run  2179
---

Run  2361
------------------------------------
Run  2362
------------------------------------
Run  2363
------------------------------------
Run  2364
------------------------------------
Run  2365
------------------------------------
Run  2366
------------------------------------
Run  2367
------------------------------------
Run  2368
------------------------------------
Run  2369
------------------------------------
Run  2370
------------------------------------
Run  2371
------------------------------------
Run  2372
------------------------------------
Run  2373
------------------------------------
Run  2374
------------------------------------
Run  2375
------------------------------------
Run  2376
------------------------------------
Run  2377
------------------------------------
Run  2378
------------------------------------
Run  2379
------------------------------------
Run  2380
------------------------------------
Run  2381
------------------------------------
Run  2382
---

------------------------------------
Run  2571
------------------------------------
Run  2572
------------------------------------
Run  2573
------------------------------------
Run  2574
------------------------------------
Run  2575
------------------------------------
Run  2576
------------------------------------
Run  2577
------------------------------------
Run  2578
------------------------------------
Run  2579
------------------------------------
Run  2580
------------------------------------
Run  2581
------------------------------------
Run  2582
------------------------------------
Run  2583
------------------------------------
Run  2584
------------------------------------
Run  2585
------------------------------------
Run  2586
------------------------------------
Run  2587
------------------------------------
Run  2588
------------------------------------
Run  2589
------------------------------------
Run  2590
------------------------------------
Run  2591
-------------

### 3.4. Q Learning Approximator 2

In [8]:
env, initial_state = environment.get_env()
ql_agent_app = agents.QLearningLinearApprox2(alpha=ALPHA, gamma=GAMMA, available_actions=AVAILABLE_ACTIONS, N0=N0, weights_length=len(RAM_mask), fixed_alpha=True, feat_type='all') #len(RAM_mask)
ql_agent_app.trainScaler(env, RAM_mask)

In [10]:
%%time
# scores = []
# total_rewards = []

n_runs = 500
render = False
for i in range(n_runs):
#     render = i % 200 == 0

    game_over = False
    state = env.reset()
    state = reduce_state(state)[RAM_mask].data.tobytes()  # Select useful bytes
    action = ql_agent_app.act(state)
    
    score = 0
    total_reward = 0

    count = 0
    print("Episode ",i)
    while not game_over:
        if render:
            time.sleep(0.025)
            env.render()

        old_state = state
        ob, reward, game_over, _ = env.step(action)

        ob = reduce_state(ob)
        reward = reward_policy(reward, ob, action)

        total_reward += reward

        if reward == reward_policy.REWARD_IF_CROSS:
            score += 1

        state = ob[RAM_mask].data.tobytes()
#         print("Run ", count)
        ql_agent_app.update_W(old_state, state, action, reward)

        action = ql_agent_app.act(state)  # Next action
        count+=1
#         print('------------------------------------')
    scores.append(score)
    total_rewards.append(total_reward)

    print_result(i, scores, total_reward, score)
    

Episode  0
Run [   0] - Total reward:  947.20 Mean scores: 19.26 Means Scores[:-10]: 22.00 Score: 21 
Episode  1
Run [   1] - Total reward:  948.10 Mean scores: 19.26 Means Scores[:-10]: 21.90 Score: 21 
Episode  2
Run [   2] - Total reward: 1009.90 Mean scores: 19.27 Means Scores[:-10]: 22.00 Score: 22 
Episode  3
Run [   3] - Total reward:  958.20 Mean scores: 19.27 Means Scores[:-10]: 21.90 Score: 21 
Episode  4
Run [   4] - Total reward: 1013.80 Mean scores: 19.28 Means Scores[:-10]: 22.10 Score: 22 
Episode  5
Run [   5] - Total reward:  958.00 Mean scores: 19.28 Means Scores[:-10]: 22.10 Score: 21 
Episode  6
Run [   6] - Total reward:  958.40 Mean scores: 19.28 Means Scores[:-10]: 21.90 Score: 21 
Episode  7
Run [   7] - Total reward:  959.90 Mean scores: 19.29 Means Scores[:-10]: 21.50 Score: 21 
Episode  8
Run [   8] - Total reward:  957.80 Mean scores: 19.29 Means Scores[:-10]: 21.30 Score: 21 
Episode  9
Run [   9] - Total reward: 1178.10 Mean scores: 19.30 Means Scores[:-10

Run [  80] - Total reward:  961.80 Mean scores: 19.56 Means Scores[:-10]: 20.90 Score: 21 
Episode  81
Run [  81] - Total reward: 1015.10 Mean scores: 19.57 Means Scores[:-10]: 21.00 Score: 22 
Episode  82
Run [  82] - Total reward: 1067.90 Mean scores: 19.57 Means Scores[:-10]: 21.30 Score: 23 
Episode  83
Run [  83] - Total reward: 1012.60 Mean scores: 19.58 Means Scores[:-10]: 21.40 Score: 22 
Episode  84
Run [  84] - Total reward: 1061.50 Mean scores: 19.58 Means Scores[:-10]: 21.50 Score: 23 
Episode  85
Run [  85] - Total reward:  900.40 Mean scores: 19.59 Means Scores[:-10]: 21.30 Score: 20 
Episode  86
Run [  86] - Total reward: 1005.50 Mean scores: 19.59 Means Scores[:-10]: 21.40 Score: 22 
Episode  87
Run [  87] - Total reward: 1122.40 Mean scores: 19.60 Means Scores[:-10]: 21.80 Score: 24 
Episode  88
Run [  88] - Total reward: 1007.30 Mean scores: 19.60 Means Scores[:-10]: 21.90 Score: 22 
Episode  89
Run [  89] - Total reward:  967.90 Mean scores: 19.60 Means Scores[:-10]:

Run [ 159] - Total reward: 1073.70 Mean scores: 19.83 Means Scores[:-10]: 21.70 Score: 23 
Episode  160
Run [ 160] - Total reward: 1071.90 Mean scores: 19.83 Means Scores[:-10]: 21.80 Score: 23 
Episode  161
Run [ 161] - Total reward: 1022.60 Mean scores: 19.84 Means Scores[:-10]: 21.80 Score: 22 
Episode  162
Run [ 162] - Total reward:  971.80 Mean scores: 19.84 Means Scores[:-10]: 21.90 Score: 21 
Episode  163
Run [ 163] - Total reward: 1070.30 Mean scores: 19.84 Means Scores[:-10]: 22.00 Score: 23 
Episode  164
Run [ 164] - Total reward: 1183.10 Mean scores: 19.85 Means Scores[:-10]: 22.30 Score: 25 
Episode  165
Run [ 165] - Total reward: 1073.70 Mean scores: 19.85 Means Scores[:-10]: 22.50 Score: 23 
Episode  166
Run [ 166] - Total reward: 1027.20 Mean scores: 19.86 Means Scores[:-10]: 22.60 Score: 22 
Episode  167
Run [ 167] - Total reward:  979.30 Mean scores: 19.86 Means Scores[:-10]: 22.70 Score: 21 
Episode  168
Run [ 168] - Total reward: 1061.60 Mean scores: 19.86 Means Scor

Run [ 238] - Total reward: 1085.80 Mean scores: 20.08 Means Scores[:-10]: 22.60 Score: 23 
Episode  239
Run [ 239] - Total reward: 1021.70 Mean scores: 20.09 Means Scores[:-10]: 22.30 Score: 22 
Episode  240
Run [ 240] - Total reward:  963.70 Mean scores: 20.09 Means Scores[:-10]: 22.20 Score: 21 
Episode  241
Run [ 241] - Total reward: 1071.50 Mean scores: 20.09 Means Scores[:-10]: 22.10 Score: 23 
Episode  242
Run [ 242] - Total reward: 1012.00 Mean scores: 20.09 Means Scores[:-10]: 22.20 Score: 22 
Episode  243
Run [ 243] - Total reward: 1181.50 Mean scores: 20.10 Means Scores[:-10]: 22.40 Score: 25 
Episode  244
Run [ 244] - Total reward: 1064.60 Mean scores: 20.10 Means Scores[:-10]: 22.30 Score: 23 
Episode  245
Run [ 245] - Total reward: 1021.80 Mean scores: 20.11 Means Scores[:-10]: 22.60 Score: 22 
Episode  246
Run [ 246] - Total reward: 1118.10 Mean scores: 20.11 Means Scores[:-10]: 22.70 Score: 24 
Episode  247
Run [ 247] - Total reward:  967.40 Mean scores: 20.11 Means Scor

Run [ 317] - Total reward: 1066.60 Mean scores: 20.34 Means Scores[:-10]: 22.60 Score: 23 
Episode  318
Run [ 318] - Total reward: 1073.10 Mean scores: 20.34 Means Scores[:-10]: 22.50 Score: 23 
Episode  319
Run [ 319] - Total reward: 1016.70 Mean scores: 20.34 Means Scores[:-10]: 22.40 Score: 22 
Episode  320
Run [ 320] - Total reward: 1083.80 Mean scores: 20.35 Means Scores[:-10]: 22.30 Score: 23 
Episode  321
Run [ 321] - Total reward: 1081.70 Mean scores: 20.35 Means Scores[:-10]: 22.50 Score: 23 
Episode  322
Run [ 322] - Total reward:  914.70 Mean scores: 20.35 Means Scores[:-10]: 22.30 Score: 20 
Episode  323
Run [ 323] - Total reward: 1126.20 Mean scores: 20.35 Means Scores[:-10]: 22.50 Score: 24 
Episode  324
Run [ 324] - Total reward: 1022.30 Mean scores: 20.36 Means Scores[:-10]: 22.50 Score: 22 
Episode  325
Run [ 325] - Total reward: 1082.40 Mean scores: 20.36 Means Scores[:-10]: 22.80 Score: 23 
Episode  326
Run [ 326] - Total reward: 1082.70 Mean scores: 20.36 Means Scor

Run [ 396] - Total reward: 1023.80 Mean scores: 20.55 Means Scores[:-10]: 22.50 Score: 22 
Episode  397
Run [ 397] - Total reward: 1053.60 Mean scores: 20.55 Means Scores[:-10]: 22.50 Score: 23 
Episode  398
Run [ 398] - Total reward: 1078.40 Mean scores: 20.55 Means Scores[:-10]: 22.60 Score: 23 
Episode  399
Run [ 399] - Total reward: 1018.90 Mean scores: 20.56 Means Scores[:-10]: 22.70 Score: 22 
Episode  400
Run [ 400] - Total reward: 1016.20 Mean scores: 20.56 Means Scores[:-10]: 22.60 Score: 22 
Episode  401
Run [ 401] - Total reward: 1065.10 Mean scores: 20.56 Means Scores[:-10]: 22.60 Score: 23 
Episode  402
Run [ 402] - Total reward: 1021.30 Mean scores: 20.56 Means Scores[:-10]: 22.60 Score: 22 
Episode  403
Run [ 403] - Total reward: 1030.00 Mean scores: 20.56 Means Scores[:-10]: 22.60 Score: 22 
Episode  404
Run [ 404] - Total reward: 1023.20 Mean scores: 20.56 Means Scores[:-10]: 22.50 Score: 22 
Episode  405
Run [ 405] - Total reward:  969.50 Mean scores: 20.57 Means Scor

Run [ 475] - Total reward: 1025.30 Mean scores: 20.73 Means Scores[:-10]: 22.90 Score: 22 
Episode  476
Run [ 476] - Total reward: 1070.00 Mean scores: 20.73 Means Scores[:-10]: 22.80 Score: 23 
Episode  477
Run [ 477] - Total reward: 1024.20 Mean scores: 20.74 Means Scores[:-10]: 22.80 Score: 22 
Episode  478
Run [ 478] - Total reward:  972.50 Mean scores: 20.74 Means Scores[:-10]: 22.50 Score: 21 
Episode  479
Run [ 479] - Total reward:  966.00 Mean scores: 20.74 Means Scores[:-10]: 22.30 Score: 21 
Episode  480
Run [ 480] - Total reward: 1123.90 Mean scores: 20.74 Means Scores[:-10]: 22.30 Score: 24 
Episode  481
Run [ 481] - Total reward: 1022.90 Mean scores: 20.74 Means Scores[:-10]: 22.50 Score: 22 
Episode  482
Run [ 482] - Total reward: 1064.70 Mean scores: 20.74 Means Scores[:-10]: 22.40 Score: 23 
Episode  483
Run [ 483] - Total reward: 1127.30 Mean scores: 20.75 Means Scores[:-10]: 22.50 Score: 24 
Episode  484
Run [ 484] - Total reward: 1175.20 Mean scores: 20.75 Means Scor

The previous algorithms aim to solve control problems using a model free approach that depends on Q tables, which are structures that store the values associated with how good is to take an action A in a state S, called Q values.

Although using Q tables are good for solving small learning problems, they suffer with a trouble called the curse of dimensionality, in which even some small environments can generate a huge amount of possible states, requiring a large amount of memory that is not available. This drawback often prevents those techniques to be used in many tasks from real life that could benefit from it.

One of the solutions developed to deal with this problem is Function Approximation. On it, instead of trying to find the optimal values for a very large table, we try to find the best parameters for a parameterized function whose objective is to approximate the optimal values that we would find on that table.

Mathematically, we say that we have a family of parameterized functions $\mathcal{Q}$ given by $Q_{\theta}: S\times A \rightarrow \mathbb{R}$, where $\theta$ is an array of parameters in $\mathbb{R}^d$, called weights, and $d << |S|$. Given that, the objective of a function approximator is to find the array of weights $\theta^*$ that produces the $Q_{\theta}^*$ that better approximates the optimal Q values ($Q^*$) for the problem addressed. Some of the greatest advantages of these algorithms is that they learn to generalize for unseen states and requires a much smaller set of values to be learned (d instead of |S|).

For this project, we experimented with linear function approximators, which are given by the following equation, where $\hat{q}(s,a)$ is the approximated value of choosing action **a** in state **s**, $\theta_i$ is the i-th element of the array of parameters and $x_i$ is a function that turns the action-state pair into the i-th feature of a d-dimensional feature array.

$\hat{q}(s,a,\theta) = \sum_{i = 1}^d \theta_i*x_i(s,a)$



In order to discover the best parameters, we applied a Stochastic Gradient Descent algorithm to update the weights, given by the equation below:

$\theta = \theta + \alpha*(target - \hat{q}(S_t, A_t, \theta))*x(S_t, A_t)$

The **target** is the value that our function approximator tries to achieve at each update and it changes for each algorithm approximated (Monte Carlo, Q Learning and Sarsa Lambda), and $\alpha$ is a learning step size.

One of the critical points of function approximators is the choice of the features to be used to represent the states. Depending on their choice, we can build good or bad function approximators. For this project, we experimented with a set of different features, as described in the next sections.
