In [24]:
import random
import gym
import numpy as np
from collections import deque
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam

In [25]:
ENV_NAME = "CartPole-v1"

GAMMA = 0.95
LEARNING_RATE = 0.001

MEMORY_SIZE = 1000000
BATCH_SIZE = 20

EXPLORATION_MAX = 1.0
EXPLORATION_MIN = 0.01
EXPLORATION_DECAY = 0.995

In [26]:
class DQNSolver:

    def __init__(self, observation_space, action_space):
        self.exploration_rate = EXPLORATION_MAX

        self.action_space = action_space
        self.memory = deque(maxlen=MEMORY_SIZE)

        self.model = Sequential()
        self.model.add(Dense(24, input_shape=(observation_space,), activation="relu"))
        self.model.add(Dense(24, activation="relu"))
        self.model.add(Dense(self.action_space, activation="linear"))
        self.model.compile(loss="mse", optimizer=Adam(lr=LEARNING_RATE))

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() < self.exploration_rate:
            return random.randrange(self.action_space)
        q_values = self.model.predict(state)
        return np.argmax(q_values[0])

    def experience_replay(self):
        if len(self.memory) < BATCH_SIZE:
            return
        batch = random.sample(self.memory, BATCH_SIZE)
        for state, action, reward, state_next, terminal in batch:
            q_update = reward
            if not terminal:
                q_update = (reward + GAMMA * np.amax(self.model.predict(state_next)[0]))
            q_values = self.model.predict(state)
            q_values[0][action] = q_update
            self.model.fit(state, q_values, verbose=0)
        self.exploration_rate *= EXPLORATION_DECAY
        self.exploration_rate = max(EXPLORATION_MIN, self.exploration_rate)

In [31]:
def cartpole():
    env = gym.make(ENV_NAME)
    observation_space = env.observation_space.shape[0]
    action_space = env.action_space.n
    dqn_solver = DQNSolver(observation_space, action_space)
    run = 0
    while True:
        run += 1
        state = env.reset()
        state = np.reshape(state, [1, observation_space])
        step = 0
        while True:
            step += 1
            #env.render()
            action = dqn_solver.act(state)
            state_next, reward, terminal, info = env.step(action)
            reward = reward if not terminal else -reward
            state_next = np.reshape(state_next, [1, observation_space])
            dqn_solver.remember(state, action, reward, state_next, terminal)
            state = state_next
            if terminal:
                print ("Run: " + str(run) + ", exploration: " + str(dqn_solver.exploration_rate) + ", score: " + str(step))
                break
            dqn_solver.experience_replay()

In [None]:
if __name__ == "__main__":
    cartpole()

Run: 1, exploration: 0.9322301194154049, score: 34
Run: 2, exploration: 0.798065677681905, score: 32
Run: 3, exploration: 0.7590483508202912, score: 11
Run: 4, exploration: 0.5819594443402982, score: 54
Run: 5, exploration: 0.5452463540625918, score: 14
Run: 6, exploration: 0.5185893309484582, score: 11
Run: 7, exploration: 0.4883155414435353, score: 13
Run: 8, exploration: 0.4484282034609769, score: 18
Run: 9, exploration: 0.42013897252428334, score: 14
Run: 10, exploration: 0.3858205374665315, score: 18
Run: 11, exploration: 0.3596735257153405, score: 15
Run: 12, exploration: 0.3237376186352221, score: 22
Run: 13, exploration: 0.29729358661854943, score: 18
Run: 14, exploration: 0.2649210072611673, score: 24
Run: 15, exploration: 0.2494556624678441, score: 13
Run: 16, exploration: 0.23489314109365644, score: 13
Run: 17, exploration: 0.22007483514733558, score: 14
Run: 18, exploration: 0.20209834538617025, score: 18
Run: 19, exploration: 0.19415447453059972, score: 9
Run: 20, explorat

Run: 199, exploration: 0.01, score: 358
Run: 200, exploration: 0.01, score: 222
Run: 201, exploration: 0.01, score: 101
Run: 202, exploration: 0.01, score: 128
Run: 203, exploration: 0.01, score: 57
Run: 204, exploration: 0.01, score: 117
Run: 205, exploration: 0.01, score: 204
Run: 206, exploration: 0.01, score: 230
Run: 207, exploration: 0.01, score: 500
Run: 208, exploration: 0.01, score: 107
Run: 209, exploration: 0.01, score: 357
Run: 210, exploration: 0.01, score: 241
Run: 211, exploration: 0.01, score: 105
Run: 212, exploration: 0.01, score: 118
Run: 213, exploration: 0.01, score: 496
Run: 214, exploration: 0.01, score: 161
Run: 215, exploration: 0.01, score: 226
Run: 216, exploration: 0.01, score: 250
Run: 217, exploration: 0.01, score: 34
Run: 218, exploration: 0.01, score: 191
Run: 219, exploration: 0.01, score: 153
Run: 220, exploration: 0.01, score: 336
Run: 221, exploration: 0.01, score: 118
Run: 222, exploration: 0.01, score: 250
Run: 223, exploration: 0.01, score: 296
Ru

Run: 406, exploration: 0.01, score: 206
Run: 407, exploration: 0.01, score: 173
Run: 408, exploration: 0.01, score: 377
Run: 409, exploration: 0.01, score: 475
Run: 410, exploration: 0.01, score: 500
Run: 411, exploration: 0.01, score: 213
Run: 412, exploration: 0.01, score: 164
Run: 413, exploration: 0.01, score: 115
Run: 414, exploration: 0.01, score: 149
Run: 415, exploration: 0.01, score: 214
Run: 416, exploration: 0.01, score: 257
Run: 417, exploration: 0.01, score: 349
Run: 418, exploration: 0.01, score: 138
Run: 419, exploration: 0.01, score: 239
Run: 420, exploration: 0.01, score: 144
Run: 421, exploration: 0.01, score: 68
Run: 422, exploration: 0.01, score: 114
Run: 423, exploration: 0.01, score: 94
Run: 424, exploration: 0.01, score: 121
Run: 425, exploration: 0.01, score: 101
Run: 426, exploration: 0.01, score: 127
Run: 427, exploration: 0.01, score: 127
Run: 428, exploration: 0.01, score: 112
Run: 429, exploration: 0.01, score: 269
Run: 430, exploration: 0.01, score: 163
Ru

Run: 613, exploration: 0.01, score: 12
Run: 614, exploration: 0.01, score: 26
Run: 615, exploration: 0.01, score: 16
Run: 616, exploration: 0.01, score: 32
Run: 617, exploration: 0.01, score: 108
Run: 618, exploration: 0.01, score: 145
Run: 619, exploration: 0.01, score: 500
Run: 620, exploration: 0.01, score: 277
Run: 621, exploration: 0.01, score: 465
Run: 622, exploration: 0.01, score: 79
Run: 623, exploration: 0.01, score: 76
Run: 624, exploration: 0.01, score: 55
Run: 625, exploration: 0.01, score: 29
Run: 626, exploration: 0.01, score: 108
Run: 627, exploration: 0.01, score: 153
Run: 628, exploration: 0.01, score: 94
Run: 629, exploration: 0.01, score: 110
Run: 630, exploration: 0.01, score: 90
Run: 631, exploration: 0.01, score: 147
Run: 632, exploration: 0.01, score: 14
Run: 633, exploration: 0.01, score: 98
Run: 634, exploration: 0.01, score: 500
Run: 635, exploration: 0.01, score: 58
Run: 636, exploration: 0.01, score: 47
Run: 637, exploration: 0.01, score: 93
Run: 638, explo

Run: 824, exploration: 0.01, score: 9
Run: 825, exploration: 0.01, score: 10
Run: 826, exploration: 0.01, score: 10
Run: 827, exploration: 0.01, score: 9
Run: 828, exploration: 0.01, score: 10
Run: 829, exploration: 0.01, score: 9
Run: 830, exploration: 0.01, score: 10
Run: 831, exploration: 0.01, score: 11
Run: 832, exploration: 0.01, score: 10
Run: 833, exploration: 0.01, score: 9
Run: 834, exploration: 0.01, score: 10
Run: 835, exploration: 0.01, score: 10
Run: 836, exploration: 0.01, score: 9
Run: 837, exploration: 0.01, score: 9
Run: 838, exploration: 0.01, score: 9
Run: 839, exploration: 0.01, score: 10
Run: 840, exploration: 0.01, score: 10
Run: 841, exploration: 0.01, score: 10
Run: 842, exploration: 0.01, score: 10
Run: 843, exploration: 0.01, score: 10
Run: 844, exploration: 0.01, score: 10
Run: 845, exploration: 0.01, score: 8
Run: 846, exploration: 0.01, score: 10
Run: 847, exploration: 0.01, score: 10
Run: 848, exploration: 0.01, score: 10
Run: 849, exploration: 0.01, scor

Run: 1036, exploration: 0.01, score: 9
Run: 1037, exploration: 0.01, score: 10
Run: 1038, exploration: 0.01, score: 9
Run: 1039, exploration: 0.01, score: 10
Run: 1040, exploration: 0.01, score: 9
Run: 1041, exploration: 0.01, score: 8
Run: 1042, exploration: 0.01, score: 8
Run: 1043, exploration: 0.01, score: 9
Run: 1044, exploration: 0.01, score: 8
Run: 1045, exploration: 0.01, score: 9
Run: 1046, exploration: 0.01, score: 8
Run: 1047, exploration: 0.01, score: 9
Run: 1048, exploration: 0.01, score: 8
Run: 1049, exploration: 0.01, score: 9
Run: 1050, exploration: 0.01, score: 9
Run: 1051, exploration: 0.01, score: 9
Run: 1052, exploration: 0.01, score: 8
Run: 1053, exploration: 0.01, score: 9
Run: 1054, exploration: 0.01, score: 8
Run: 1055, exploration: 0.01, score: 8
Run: 1056, exploration: 0.01, score: 9
Run: 1057, exploration: 0.01, score: 10
Run: 1058, exploration: 0.01, score: 10
Run: 1059, exploration: 0.01, score: 9
Run: 1060, exploration: 0.01, score: 9
Run: 1061, exploratio

Run: 1244, exploration: 0.01, score: 9
Run: 1245, exploration: 0.01, score: 10
Run: 1246, exploration: 0.01, score: 10
Run: 1247, exploration: 0.01, score: 9
Run: 1248, exploration: 0.01, score: 10
Run: 1249, exploration: 0.01, score: 8
Run: 1250, exploration: 0.01, score: 9
Run: 1251, exploration: 0.01, score: 9
Run: 1252, exploration: 0.01, score: 10
Run: 1253, exploration: 0.01, score: 9
Run: 1254, exploration: 0.01, score: 9
Run: 1255, exploration: 0.01, score: 10
Run: 1256, exploration: 0.01, score: 10
Run: 1257, exploration: 0.01, score: 10
Run: 1258, exploration: 0.01, score: 9
Run: 1259, exploration: 0.01, score: 10
Run: 1260, exploration: 0.01, score: 11
Run: 1261, exploration: 0.01, score: 9
Run: 1262, exploration: 0.01, score: 10
Run: 1263, exploration: 0.01, score: 11
Run: 1264, exploration: 0.01, score: 10
Run: 1265, exploration: 0.01, score: 10
Run: 1266, exploration: 0.01, score: 10
Run: 1267, exploration: 0.01, score: 9
Run: 1268, exploration: 0.01, score: 12
Run: 1269,

Run: 1452, exploration: 0.01, score: 8
Run: 1453, exploration: 0.01, score: 10
Run: 1454, exploration: 0.01, score: 8
Run: 1455, exploration: 0.01, score: 10
Run: 1456, exploration: 0.01, score: 9
Run: 1457, exploration: 0.01, score: 10
Run: 1458, exploration: 0.01, score: 9
Run: 1459, exploration: 0.01, score: 8
Run: 1460, exploration: 0.01, score: 10
Run: 1461, exploration: 0.01, score: 10
Run: 1462, exploration: 0.01, score: 10
Run: 1463, exploration: 0.01, score: 10
Run: 1464, exploration: 0.01, score: 9
Run: 1465, exploration: 0.01, score: 10
Run: 1466, exploration: 0.01, score: 9
Run: 1467, exploration: 0.01, score: 11
Run: 1468, exploration: 0.01, score: 11
Run: 1469, exploration: 0.01, score: 10
Run: 1470, exploration: 0.01, score: 8
Run: 1471, exploration: 0.01, score: 9
Run: 1472, exploration: 0.01, score: 9
Run: 1473, exploration: 0.01, score: 10
Run: 1474, exploration: 0.01, score: 11
Run: 1475, exploration: 0.01, score: 9
Run: 1476, exploration: 0.01, score: 10
Run: 1477, 

Run: 1658, exploration: 0.01, score: 18
Run: 1659, exploration: 0.01, score: 17
Run: 1660, exploration: 0.01, score: 19
Run: 1661, exploration: 0.01, score: 18
Run: 1662, exploration: 0.01, score: 16
Run: 1663, exploration: 0.01, score: 17
Run: 1664, exploration: 0.01, score: 18
Run: 1665, exploration: 0.01, score: 16
Run: 1666, exploration: 0.01, score: 18
Run: 1667, exploration: 0.01, score: 14
Run: 1668, exploration: 0.01, score: 14
Run: 1669, exploration: 0.01, score: 17
Run: 1670, exploration: 0.01, score: 18
Run: 1671, exploration: 0.01, score: 16
Run: 1672, exploration: 0.01, score: 16
Run: 1673, exploration: 0.01, score: 16
Run: 1674, exploration: 0.01, score: 14
Run: 1675, exploration: 0.01, score: 14
Run: 1676, exploration: 0.01, score: 19
Run: 1677, exploration: 0.01, score: 17
Run: 1678, exploration: 0.01, score: 16
Run: 1679, exploration: 0.01, score: 19
Run: 1680, exploration: 0.01, score: 17
Run: 1681, exploration: 0.01, score: 20
Run: 1682, exploration: 0.01, score: 18


Run: 1863, exploration: 0.01, score: 17
Run: 1864, exploration: 0.01, score: 21
Run: 1865, exploration: 0.01, score: 20
Run: 1866, exploration: 0.01, score: 16
Run: 1867, exploration: 0.01, score: 20
Run: 1868, exploration: 0.01, score: 16
Run: 1869, exploration: 0.01, score: 21
Run: 1870, exploration: 0.01, score: 20
Run: 1871, exploration: 0.01, score: 17
Run: 1872, exploration: 0.01, score: 16
Run: 1873, exploration: 0.01, score: 17
Run: 1874, exploration: 0.01, score: 19
Run: 1875, exploration: 0.01, score: 21
Run: 1876, exploration: 0.01, score: 21
Run: 1877, exploration: 0.01, score: 16
Run: 1878, exploration: 0.01, score: 17
Run: 1879, exploration: 0.01, score: 19
Run: 1880, exploration: 0.01, score: 17
Run: 1881, exploration: 0.01, score: 15
Run: 1882, exploration: 0.01, score: 17
Run: 1883, exploration: 0.01, score: 21
Run: 1884, exploration: 0.01, score: 20
Run: 1885, exploration: 0.01, score: 21
Run: 1886, exploration: 0.01, score: 18
Run: 1887, exploration: 0.01, score: 21


Run: 2068, exploration: 0.01, score: 42
Run: 2069, exploration: 0.01, score: 44
Run: 2070, exploration: 0.01, score: 37
Run: 2071, exploration: 0.01, score: 39
Run: 2072, exploration: 0.01, score: 39
Run: 2073, exploration: 0.01, score: 42
Run: 2074, exploration: 0.01, score: 35
Run: 2075, exploration: 0.01, score: 35
Run: 2076, exploration: 0.01, score: 37
Run: 2077, exploration: 0.01, score: 37
Run: 2078, exploration: 0.01, score: 41
Run: 2079, exploration: 0.01, score: 33
Run: 2080, exploration: 0.01, score: 36
Run: 2081, exploration: 0.01, score: 30
Run: 2082, exploration: 0.01, score: 30
Run: 2083, exploration: 0.01, score: 32
Run: 2084, exploration: 0.01, score: 33
Run: 2085, exploration: 0.01, score: 33
Run: 2086, exploration: 0.01, score: 35
Run: 2087, exploration: 0.01, score: 33
Run: 2088, exploration: 0.01, score: 37
Run: 2089, exploration: 0.01, score: 40
Run: 2090, exploration: 0.01, score: 35
Run: 2091, exploration: 0.01, score: 35
Run: 2092, exploration: 0.01, score: 34


Run: 2274, exploration: 0.01, score: 10
Run: 2275, exploration: 0.01, score: 8
Run: 2276, exploration: 0.01, score: 11
Run: 2277, exploration: 0.01, score: 8
Run: 2278, exploration: 0.01, score: 12
Run: 2279, exploration: 0.01, score: 10
Run: 2280, exploration: 0.01, score: 10
Run: 2281, exploration: 0.01, score: 8
Run: 2282, exploration: 0.01, score: 9
Run: 2283, exploration: 0.01, score: 11
Run: 2284, exploration: 0.01, score: 9
Run: 2285, exploration: 0.01, score: 9
Run: 2286, exploration: 0.01, score: 9
Run: 2287, exploration: 0.01, score: 10
Run: 2288, exploration: 0.01, score: 10
Run: 2289, exploration: 0.01, score: 10
Run: 2290, exploration: 0.01, score: 10
Run: 2291, exploration: 0.01, score: 10
Run: 2292, exploration: 0.01, score: 27
Run: 2293, exploration: 0.01, score: 11
Run: 2294, exploration: 0.01, score: 10
Run: 2295, exploration: 0.01, score: 9
Run: 2296, exploration: 0.01, score: 13
Run: 2297, exploration: 0.01, score: 12
Run: 2298, exploration: 0.01, score: 8
Run: 2299

Run: 2480, exploration: 0.01, score: 27
Run: 2481, exploration: 0.01, score: 11
Run: 2482, exploration: 0.01, score: 10
Run: 2483, exploration: 0.01, score: 42
Run: 2484, exploration: 0.01, score: 41
Run: 2485, exploration: 0.01, score: 30
Run: 2486, exploration: 0.01, score: 21
Run: 2487, exploration: 0.01, score: 22
Run: 2488, exploration: 0.01, score: 14
Run: 2489, exploration: 0.01, score: 32
Run: 2490, exploration: 0.01, score: 36
Run: 2491, exploration: 0.01, score: 50
Run: 2492, exploration: 0.01, score: 8
Run: 2493, exploration: 0.01, score: 17
Run: 2494, exploration: 0.01, score: 18
Run: 2495, exploration: 0.01, score: 29
Run: 2496, exploration: 0.01, score: 60
Run: 2497, exploration: 0.01, score: 32
Run: 2498, exploration: 0.01, score: 20
Run: 2499, exploration: 0.01, score: 39
Run: 2500, exploration: 0.01, score: 15
Run: 2501, exploration: 0.01, score: 11
Run: 2502, exploration: 0.01, score: 55
Run: 2503, exploration: 0.01, score: 23
Run: 2504, exploration: 0.01, score: 30
R

Run: 2684, exploration: 0.01, score: 65
Run: 2685, exploration: 0.01, score: 500
Run: 2686, exploration: 0.01, score: 102
Run: 2687, exploration: 0.01, score: 344
Run: 2688, exploration: 0.01, score: 76
Run: 2689, exploration: 0.01, score: 267
Run: 2690, exploration: 0.01, score: 329
Run: 2691, exploration: 0.01, score: 172
Run: 2692, exploration: 0.01, score: 196
Run: 2693, exploration: 0.01, score: 333
Run: 2694, exploration: 0.01, score: 170
Run: 2695, exploration: 0.01, score: 435
Run: 2696, exploration: 0.01, score: 451
Run: 2697, exploration: 0.01, score: 135
Run: 2698, exploration: 0.01, score: 403
Run: 2699, exploration: 0.01, score: 194
Run: 2700, exploration: 0.01, score: 117
Run: 2701, exploration: 0.01, score: 199
Run: 2702, exploration: 0.01, score: 473
Run: 2703, exploration: 0.01, score: 500
Run: 2704, exploration: 0.01, score: 202
Run: 2705, exploration: 0.01, score: 109
Run: 2706, exploration: 0.01, score: 235
Run: 2707, exploration: 0.01, score: 161
Run: 2708, explora

Run: 2885, exploration: 0.01, score: 500
Run: 2886, exploration: 0.01, score: 500
Run: 2887, exploration: 0.01, score: 500
Run: 2888, exploration: 0.01, score: 500
Run: 2889, exploration: 0.01, score: 500
Run: 2890, exploration: 0.01, score: 348
Run: 2891, exploration: 0.01, score: 500
Run: 2892, exploration: 0.01, score: 500
Run: 2893, exploration: 0.01, score: 500
Run: 2894, exploration: 0.01, score: 425
Run: 2895, exploration: 0.01, score: 461
Run: 2896, exploration: 0.01, score: 500
Run: 2897, exploration: 0.01, score: 435
Run: 2898, exploration: 0.01, score: 361
Run: 2899, exploration: 0.01, score: 500
Run: 2900, exploration: 0.01, score: 500
Run: 2901, exploration: 0.01, score: 500
Run: 2902, exploration: 0.01, score: 500
Run: 2903, exploration: 0.01, score: 500
Run: 2904, exploration: 0.01, score: 300
Run: 2905, exploration: 0.01, score: 354
Run: 2906, exploration: 0.01, score: 188
Run: 2907, exploration: 0.01, score: 281
Run: 2908, exploration: 0.01, score: 34
Run: 2909, explor

Run: 3086, exploration: 0.01, score: 116
Run: 3087, exploration: 0.01, score: 174
Run: 3088, exploration: 0.01, score: 206
Run: 3089, exploration: 0.01, score: 329
Run: 3090, exploration: 0.01, score: 149
Run: 3091, exploration: 0.01, score: 308
Run: 3092, exploration: 0.01, score: 35
Run: 3093, exploration: 0.01, score: 56
Run: 3094, exploration: 0.01, score: 104
Run: 3095, exploration: 0.01, score: 351
Run: 3096, exploration: 0.01, score: 147
Run: 3097, exploration: 0.01, score: 99
Run: 3098, exploration: 0.01, score: 127
Run: 3099, exploration: 0.01, score: 177
Run: 3100, exploration: 0.01, score: 251
Run: 3101, exploration: 0.01, score: 242
Run: 3102, exploration: 0.01, score: 440
Run: 3103, exploration: 0.01, score: 120
Run: 3104, exploration: 0.01, score: 367
Run: 3105, exploration: 0.01, score: 96
Run: 3106, exploration: 0.01, score: 127
Run: 3107, exploration: 0.01, score: 163
Run: 3108, exploration: 0.01, score: 76
Run: 3109, exploration: 0.01, score: 66
Run: 3110, exploration

Run: 3287, exploration: 0.01, score: 166
Run: 3288, exploration: 0.01, score: 378
Run: 3289, exploration: 0.01, score: 29
Run: 3290, exploration: 0.01, score: 154
Run: 3291, exploration: 0.01, score: 171
Run: 3292, exploration: 0.01, score: 273
Run: 3293, exploration: 0.01, score: 159
Run: 3294, exploration: 0.01, score: 145
Run: 3295, exploration: 0.01, score: 126
Run: 3296, exploration: 0.01, score: 125
Run: 3297, exploration: 0.01, score: 251
Run: 3298, exploration: 0.01, score: 118
Run: 3299, exploration: 0.01, score: 137
Run: 3300, exploration: 0.01, score: 268
Run: 3301, exploration: 0.01, score: 124
Run: 3302, exploration: 0.01, score: 218
Run: 3303, exploration: 0.01, score: 266
Run: 3304, exploration: 0.01, score: 417
Run: 3305, exploration: 0.01, score: 126
Run: 3306, exploration: 0.01, score: 33
Run: 3307, exploration: 0.01, score: 98
Run: 3308, exploration: 0.01, score: 158
Run: 3309, exploration: 0.01, score: 165
Run: 3310, exploration: 0.01, score: 22
Run: 3311, explorati

Run: 3488, exploration: 0.01, score: 202
Run: 3489, exploration: 0.01, score: 366
Run: 3490, exploration: 0.01, score: 243
Run: 3491, exploration: 0.01, score: 91
Run: 3492, exploration: 0.01, score: 146
Run: 3493, exploration: 0.01, score: 158
Run: 3494, exploration: 0.01, score: 108
Run: 3495, exploration: 0.01, score: 366
Run: 3496, exploration: 0.01, score: 232
Run: 3497, exploration: 0.01, score: 61
Run: 3498, exploration: 0.01, score: 332
Run: 3499, exploration: 0.01, score: 302
Run: 3500, exploration: 0.01, score: 343
Run: 3501, exploration: 0.01, score: 117
Run: 3502, exploration: 0.01, score: 157
Run: 3503, exploration: 0.01, score: 361
Run: 3504, exploration: 0.01, score: 495
Run: 3505, exploration: 0.01, score: 247
Run: 3506, exploration: 0.01, score: 129
Run: 3507, exploration: 0.01, score: 223
Run: 3508, exploration: 0.01, score: 60
Run: 3509, exploration: 0.01, score: 86
Run: 3510, exploration: 0.01, score: 356
Run: 3511, exploration: 0.01, score: 500
Run: 3512, explorati

Run: 3689, exploration: 0.01, score: 116
Run: 3690, exploration: 0.01, score: 163
Run: 3691, exploration: 0.01, score: 244
Run: 3692, exploration: 0.01, score: 171
Run: 3693, exploration: 0.01, score: 446
Run: 3694, exploration: 0.01, score: 137
Run: 3695, exploration: 0.01, score: 279
Run: 3696, exploration: 0.01, score: 217
Run: 3697, exploration: 0.01, score: 222
Run: 3698, exploration: 0.01, score: 340
Run: 3699, exploration: 0.01, score: 245
Run: 3700, exploration: 0.01, score: 189
Run: 3701, exploration: 0.01, score: 330
Run: 3702, exploration: 0.01, score: 166
Run: 3703, exploration: 0.01, score: 171
Run: 3704, exploration: 0.01, score: 316
Run: 3705, exploration: 0.01, score: 251
Run: 3706, exploration: 0.01, score: 162
Run: 3707, exploration: 0.01, score: 500
Run: 3708, exploration: 0.01, score: 500
Run: 3709, exploration: 0.01, score: 325
Run: 3710, exploration: 0.01, score: 142
Run: 3711, exploration: 0.01, score: 216
Run: 3712, exploration: 0.01, score: 500
Run: 3713, explo

Run: 3890, exploration: 0.01, score: 488
Run: 3891, exploration: 0.01, score: 335
Run: 3892, exploration: 0.01, score: 500
Run: 3893, exploration: 0.01, score: 299
Run: 3894, exploration: 0.01, score: 317
Run: 3895, exploration: 0.01, score: 145
Run: 3896, exploration: 0.01, score: 262
Run: 3897, exploration: 0.01, score: 284
Run: 3898, exploration: 0.01, score: 372
Run: 3899, exploration: 0.01, score: 342
Run: 3900, exploration: 0.01, score: 451
Run: 3901, exploration: 0.01, score: 315
Run: 3902, exploration: 0.01, score: 139
Run: 3903, exploration: 0.01, score: 128
Run: 3904, exploration: 0.01, score: 234
Run: 3905, exploration: 0.01, score: 251
Run: 3906, exploration: 0.01, score: 500
Run: 3907, exploration: 0.01, score: 165
Run: 3908, exploration: 0.01, score: 98
Run: 3909, exploration: 0.01, score: 246
Run: 3910, exploration: 0.01, score: 381
Run: 3911, exploration: 0.01, score: 106
Run: 3912, exploration: 0.01, score: 256
Run: 3913, exploration: 0.01, score: 216
Run: 3914, explor