## 1. Import libraries

In [1]:
import sys
sys.path.append('../')

%matplotlib inline
from collections import defaultdict
from typing import List
from sklearn.preprocessing import StandardScaler

import numpy as np
import time
import matplotlib.pyplot as plt
import seaborn as sns

import gym

import src.agents as agents
import src.episode as episode
import src.environment as environment
import src.aux_plots as aux_plots

## 2. Auxiliary functions

### 2.1. Reduce state space

In [2]:
def reduce_state(ob):
    # Doesn't matter where we were hit
    ob[16] = 1 if ob[16] != 255 else 0

    # Reduce chicken y-position
    ob[14] = ob[14] // 3

    for b in range(108, 118):
        # The chicken is in the x-posistion ~49
        if ob[b] < 20 or ob[b] > 80:
            # We don't need to represent cars far from the chicken
            ob[b] = 0
        else:
            # Reduce the cars x-positions sample space
            ob[b] = ob[b] // 3

    return ob

### 2.2. Reward policy

In [3]:
def reward_policy(reward, ob, action):
    if reward == 1:
        reward = reward_policy.REWARD_IF_CROSS
    elif ob[16] == 1:  # Collision!
        reward = reward_policy.REWARD_IF_COLISION
    elif action != 1:  # Don't incentivate staying still
        reward = reward_policy.REWARD_IF_STILL

    return reward

### 2.3. Print results

In [4]:
def print_result(i, scores, total_reward, score):
#     if i % 10 == 0:
        print(f"Run [{i:4}] - Total reward: {total_reward:7.2f} Mean scores: {sum(scores) / len(scores):.2f} Means Scores[:-10]: {sum(scores[-10:]) / len(scores[-10:]):5.2f} Score: {score:2} ")

## 3. Running algorithms

### 3.1. Hyperparameters

In [5]:
RAM_mask = [
      14  # Chicken Y
    , 16  # Chicken Lane Collide
    , 108, 109, 110, 111, 112, 113, 114, 115, 116, 117  # Car X Coords
]

GAMMA = 0.99
AVAILABLE_ACTIONS = 2
N0 = 2.5

reward_policy.REWARD_IF_CROSS = 50
reward_policy.REWARD_IF_COLISION = -1
reward_policy.REWARD_IF_STILL = -0.1

### 3.2. Monte Carlo

In [None]:
env, initial_state = environment.get_env()

mc_agent = agents.MonteCarloControl(gamma=GAMMA, available_actions=AVAILABLE_ACTIONS, N0=N0)

In [None]:
def MonteCarloES(agent, reduce_s, reward_p, RAM_mask: List[int], render: bool=False):
    epi = episode.generate_episode(env, reduce_state=reduce_s, reward_policy=reward_p, agent=agent, RAM_mask=RAM_mask, render=render)
    return agent.update_policy(epi)

In [None]:
# %%time
# MonteCarloES(agent=mc_agent,reduce_s=reduce_state, reward_p=reward_policy, RAM_mask=RAM_mask, render=False)

In [None]:
%%time
scores = []
total_rewards = []


n_runs = 5

for i in range(n_runs):
    render = i % 201 == 200

    score, total_reward = MonteCarloES(agent=mc_agent,reduce_s=reduce_state, reward_p=reward_policy, RAM_mask=RAM_mask, render=render)

    scores.append(score)
    total_rewards.append(total_reward)

    print_result(i, scores, total_reward, score)

### 3.3. Q Learning

In [None]:
env, initial_state = environment.get_env()
ql_agent = agents.QLearning(gamma=GAMMA, available_actions=AVAILABLE_ACTIONS, N0=N0)

In [None]:
%%time
scores = []
total_rewards = []

n_runs = 1
render = False
for i in range(n_runs):
#     render = i % 200 == 0

    game_over = False
    state = env.reset()
    state = reduce_state(state)[RAM_mask].data.tobytes()  # Select useful bytes
    action = ql_agent.act(state)
    
    score = 0
    total_reward = 0

    while not game_over:
        if render:
            time.sleep(0.025)
            env.render()

        old_state = state
        ob, reward, game_over, _ = env.step(action)

        ob = reduce_state(ob)
        reward = reward_policy(reward, ob, action)

        total_reward += reward

        if reward == reward_policy.REWARD_IF_CROSS:
            score += 1

        state = ob[RAM_mask].data.tobytes()

        ql_agent.update_Q(old_state, state, action, reward)

        action = ql_agent.act(state)  # Next action

    scores.append(score)
    total_rewards.append(total_reward)

    print_result(i, scores, total_reward, score)

In [None]:
for key in ql_agent.Q:
    print('{}:{}'.format(np.frombuffer(key, dtype=np.uint8, count=-1), ql_agent.Q[key]))

### 3.4. Q Learning Approximator

In [6]:
env, initial_state = environment.get_env()
ql_agent_app = agents.QLearningLinearApprox(gamma=GAMMA, available_actions=AVAILABLE_ACTIONS, N0=N0, weights_length=len(RAM_mask))
ql_agent_app.trainScaler(env, RAM_mask)

In [7]:
state = env.reset()
ql_agent_app.createFeature(reduce_state(state)[RAM_mask].data.tobytes(), 1)

array([0.02723756, 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 1.        , 1.        ])

In [8]:
%%time
scores = []
total_rewards = []

n_runs = 5
render = False
for i in range(n_runs):
#     render = i % 200 == 0

    game_over = False
    state = env.reset()
    state = reduce_state(state)[RAM_mask].data.tobytes()  # Select useful bytes
    action = ql_agent_app.act(state)
    
    score = 0
    total_reward = 0

    count = 0
    while not game_over:
        if render:
            time.sleep(0.025)
            env.render()

        old_state = state
        ob, reward, game_over, _ = env.step(action)

        ob = reduce_state(ob)
        reward = reward_policy(reward, ob, action)

        total_reward += reward

        if reward == reward_policy.REWARD_IF_CROSS:
            score += 1

        state = ob[RAM_mask].data.tobytes()
        print("Run ", count)
        print('State:',np.frombuffer(old_state, dtype=np.uint8, count=-1))
        ql_agent_app.update_W(old_state, state, action, reward)

        action = ql_agent_app.act(state)  # Next action
        count+=1
        print('------------------------------------\n')
    scores.append(score)
    total_rewards.append(total_reward)

    print_result(i, scores, total_reward, score)

Run  0
State: [2 0 0 0 0 0 0 0 0 0 0 0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:0.0 | Update: 0.0
Weight: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
------------------------------------

Run  1
State: [2 0 0 0 0 0 0 0 0 0 0 0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:0.0 | Update: -0.1
Weight: [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
------------------------------------

Run  2
State: [2 0 0 0 0 0 0 0 0 0 0 0]
Alpha:0.5, Action:1, Reward:0.0 | Max_value:-0.10011128267950559 | Update: 0.00048200930014660054
Weight: [-0.00272376  0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.         -0.1       ]
------------------------------------

Run  3
State: [3 0 0 0 0 0 0 0 0 0 0 0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:-0.0991467276863477 | Update: -0.09852652342298991
Weight: [-0.00271063  0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.


Run  60
State: [30  0 12 15 20  0  0  0 23  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:-0.2169591612125239 | Update: -0.005621998714471027
Weight: [ 0.06182049  0.          0.05328884  0.06058784  0.0856496   0.06420442
 -0.08261476 -0.00353505  0.05596273  0.          0.          0.
  0.14817967 -0.44409856]
------------------------------------

Run  61
State: [31  0 12 15 20  0  7  0 22  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:-0.2307982302711704 | Update: 0.001993523487297727
Weight: [ 0.05952355  0.          0.05237937  0.05944978  0.08412819  0.06420442
 -0.08261476 -0.00353505  0.05421171  0.          0.          0.
  0.14255767 -0.44972056]
------------------------------------

Run  62
State: [32  0 12 15 20  0  8  0 22  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:-0.22586385265949055 | Update: 0.002361692607836141
Weight: [ 0.06036518  0.          0.05270186  0.05985333  0.08466767  0.06420442
 -0.08242472 -0.00353505  0.05480561  0.          0. 

  0.05761422 -0.59216749]
------------------------------------

Run  120
State: [34  0 24  0  0  7 15  0  0 12 22  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:-0.6435243098556603 | Update: 0.007275025803203827
Weight: [-0.0063686   0.01253153 -0.02750736  0.0736987   0.17062487  0.0620671
 -0.0620775  -0.08222912  0.1036184  -0.15497885 -0.10086852  0.
  0.05761422 -0.62818336]
------------------------------------

Run  121
State: [34  0 24  0  0  8 15  0  0 12 22  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:-0.6262640494304585 | Update: 0.00543747127916161
Weight: [-0.00299998  0.01253153 -0.02515359  0.0736987   0.17062487  0.06275613
 -0.06059137 -0.08222912  0.1036184  -0.1538     -0.09870258  0.
  0.06488924 -0.62090834]
------------------------------------

Run  122
State: [34  0 24  0  0  8 16  0  0 12 22  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:-0.6109270532513673 | Update: 0.007905835369191427
Weight: [-4.82223775e-04  1.25315298e-02 -2.33943484e-02  7.36986963e-0

Run  178
State: [48  0  0  0  7  0 21  0 15  0  7 16]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:-0.9811798609890893 | Update: 0.04097272207183811
Weight: [-0.21280351  0.01172914 -0.00994827  0.0736987   0.17854312 -0.00668334
 -0.00718992 -0.13222193 -0.0021484  -0.14135727 -0.15814389 -0.10816103
  0.13125884 -0.94939414]
------------------------------------

Run  179
State: [48  0  0  0  7  0 22  0 15  0  7 16]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:-0.9149517396538827 | Update: 0.009062561445610795
Weight: [-0.18601958  0.01172914 -0.00994827  0.0736987   0.18242389 -0.00668334
  0.00452789 -0.13222193  0.00617422 -0.14135727 -0.15426254 -0.09934457
  0.13125884 -0.90842142]
------------------------------------

Run  180
State: [49  0  0  0  8  0 23  0 14  0  7 16]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:-0.8934339779184367 | Update: 0.0065803017557013455
Weight: [-0.18009537  0.01172914 -0.00994827  0.0736987   0.18328226 -0.00668334
  0.00724311 -0.13222193  0.0080150


Run  241
State: [ 2  0  0  7  0 14  0 23  0 25  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:138.27696546366246 | Update: -1.2024227646352585
Weight: [ 4.24225091e+01  1.09983998e-02 -9.94826990e-03 -4.00454029e+00
  4.96524210e+01  2.07200238e+01  4.29073509e+01 -1.69677085e+01
 -1.12243916e-02 -1.77536338e+01 -1.46208355e-01  1.01732406e-01
 -6.85081908e-01  1.44551219e+02]
------------------------------------

Run  242
State: [ 2  0  0  7  0 15  0 23  0 25  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:136.99783416799164 | Update: -0.4510262482899918
Weight: [ 4.23897581e+01  1.09983998e-02 -9.94826990e-03 -4.11813002e+00
  4.96524210e+01  2.04922569e+01  4.29073509e+01 -1.73406952e+01
 -1.12243916e-02 -1.81595516e+01 -1.46208355e-01  1.01732406e-01
 -6.85081908e-01  1.43348796e+02]
------------------------------------

Run  243
State: [ 2  0  0  7  0 15  0 22  0 25  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:137.13386239178757 | Update: 0.4676730612151516
Weight: 

State: [12  0  8 23  0  0  0 14  7  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:106.60589717955278 | Update: 0.4539182793970298
Weight: [ 3.89845714e+01  5.19955998e-03 -3.37273268e-02 -1.19071455e+01
  4.96524210e+01  1.46557872e+01  3.43296164e+01 -2.30325618e+01
 -4.91207999e+00 -2.44550036e+01 -1.46208355e-01  1.01732406e-01
 -2.12793635e-01  1.07441695e+02]
------------------------------------

Run  305
State: [13  0  8 23  0  0  0 13  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:107.68759124859297 | Update: -0.6262720213043025
Weight: [ 3.90587532e+01  5.19955998e-03  1.52263812e-02 -1.17662525e+01
  4.96524210e+01  1.46557872e+01  3.43296164e+01 -2.29468553e+01
 -4.86905213e+00 -2.44550036e+01 -1.46208355e-01  1.01732406e-01
  2.41124645e-01  1.07895613e+02]
------------------------------------

Run  306
State: [13  0  8 23  0  0  0 12  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:108.47764906918297 | Update: 0.4365758227410197
Weight: [ 3.8947875

Run  359
State: [22  0 19  0 14 20  0 11  0  0 16  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:114.34804392245384 | Update: -0.9691660896717025
Weight: [ 3.60054365e+01 -5.69173415e-03 -2.53755345e+00 -1.22153058e+01
  4.84884045e+01  1.28833157e+01  2.69524539e+01 -2.42336900e+01
 -4.86905213e+00 -2.44550036e+01 -2.52884010e+00  1.01732406e-01
  1.10064960e+00  9.44065416e+01]
------------------------------------

Run  360
State: [22  0 19  0 14 21  0 11  0  0 16  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:112.44890072989269 | Update: -0.727169551863156
Weight: [ 3.57150616e+01 -5.69173415e-03 -2.78579230e+00 -1.22153058e+01
  4.83048136e+01  1.26210547e+01  2.69524539e+01 -2.43774702e+01
 -4.86905213e+00 -2.44550036e+01 -2.73868956e+00  1.01732406e-01
  1.31483507e-01  9.34373755e+01]
------------------------------------

Run  361
State: [22  0 19  0 14 21  0 10  0  0 15  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:112.95760539870757 | Update: 0.3820761556479795
Weight: [

Run  408
State: [28  0  0  0  0  0  0 18  9 23  0 25]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:54.87565966205814 | Update: 0.05670636333928769
Weight: [ 21.83792536  -0.09394317 -15.96272392 -12.21530578  35.29704119
  10.92942037  14.80539829 -28.30153485 -10.04476206 -27.89344519
  -6.42721668  -6.52428029   0.22305813  64.6711476 ]
------------------------------------

Run  409
State: [28  0  0  0  0  0  0 17  8 23  0 24]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:55.39435750776535 | Update: -0.1724325908946156
Weight: [ 21.85954896  -0.09394317 -15.96272392 -12.21530578  35.29704119
  10.92942037  14.80539829 -28.2877687  -10.03785094 -27.87583353
  -6.42721668  -6.50521466   0.27976449  64.72785397]
------------------------------------

Run  410
State: [28  0  0  0  0  0  0 16  8 23  0 24]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:55.49749477959258 | Update: -0.03674800131366851
Weight: [ 21.79379597  -0.09394317 -15.96272392 -12.21530578  35.29704119
  10.92942037  14.80539

Run  454
State: [35  0  0  7  0 14  0 24  0  8  0 15]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:62.31635284249304 | Update: 0.12744038922755152
Weight: [ 20.83948571  -0.09394317 -15.96272392 -11.69278065  35.29704119
  10.0005943   13.07851432 -25.52410419 -10.02940926 -27.47881243
  -6.42721668  -6.44722295  -3.84809372  63.2734527 ]
------------------------------------

Run  455
State: [35  0  0  7  0 15  0 23  0  7  0 15]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:62.841749451129246 | Update: 3.5633579329217184
Weight: [ 20.90023109  -0.09394317 -15.96272392 -11.68074169  35.29704119
  10.02473449  13.07851432 -25.48285393 -10.02940926 -27.46504548
  -6.42721668  -6.42151439  -3.84809372  63.40089309]
------------------------------------

Run  456
State: [35  0  0  7  0 15  0 22  0  7  0 15]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:68.60366179208395 | Update: 0.30426329184065537
Weight: [ 22.5987315   -0.09394317 -15.96272392 -11.34412058  35.29704119
  10.74793079  13.07851

Run  497
State: [57  0  0 19  7  0 22  0 15  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:89.00949183882287 | Update: 0.21600951599185692
Weight: [ 21.93613183  -0.09394317 -15.96272392 -11.42089983  35.63320055
   7.6385082   14.27550147 -24.06957897  -9.65914452 -26.87133727
  -6.42721668  -6.43957382   3.39104759  64.72234084]
------------------------------------

Run  498
State: [58  0  0 19  8  0 23  0 14  0  0  0]
Alpha:1.0, Action:1, Reward:50 | Max_value:72.95675220124234 | Update: 32.60821715639959
Weight: [ 22.10381362  -0.09394317 -15.96272392 -11.36551251  35.65366009
   7.6385082   14.34021988 -24.06957897  -9.6152674  -26.87133727
  -6.42721668  -6.43957382   3.6070571   64.93835036]
------------------------------------

Run  499
State: [ 2  0  0 19  8  0 24  0 14  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:146.25139868508458 | Update: 34.99309848573262
Weight: [ 4.78606910e+01 -9.39431705e-02 -1.59627239e+01 -3.00439151e+00
  3.91833945e+01  7.63850820


Run  543
State: [18  0  0  0 22  7 14  0  0  0 22  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:162.66925192710832 | Update: -0.5595284315590732
Weight: [ 4.74381949e+01 -9.39431705e-02 -1.59627239e+01  7.41195605e+00
  4.26516070e+01  7.55188733e+00  2.84811252e+01 -1.46255461e+01
  3.83001679e+00 -2.68713373e+01 -9.22306229e+00 -6.43957382e+00
  6.92252216e-01  1.33184904e+02]
------------------------------------

Run  544
State: [18  0  0  0 23  8 15  0  0  0 22  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:162.41821428158266 | Update: -0.5935798064234064
Weight: [ 4.73010332e+01 -9.39431705e-02 -1.59627239e+01  7.41195605e+00
  4.24850473e+01  7.49889345e+00  2.83744453e+01 -1.46255461e+01
  3.83001679e+00 -2.68713373e+01 -9.38964676e+00 -6.43957382e+00
  1.32723785e-01  1.32625375e+02]
------------------------------------

Run  545
State: [19  0  0  0 23  8 16  0  0  0 22  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:161.8955487422666 | Update: -1.334186786592312
Weight: [


Run  604
State: [36  0 15  0  0  0 25  0 14  9  7  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:116.77346159828036 | Update: -1.4303063131596616
Weight: [ 3.69742100e+01 -1.01365102e-01 -1.76240669e+01  7.41195605e+00
  2.82523301e+01  2.21181663e+00  1.32276051e+01 -7.88068677e+00
  2.33146836e+00 -3.07542556e+01 -1.66734729e+01 -6.43957382e+00
 -1.04350557e-01  1.02948130e+02]
------------------------------------

Run  605
State: [35  0 16  0  0  0 26  0 13  9  6  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:108.70804502800985 | Update: -5.538605434414279
Weight: [ 3.62729651e+01 -1.01365102e-01 -1.79132936e+01  7.41195605e+00
  2.82523301e+01  2.21181663e+00  1.27406361e+01 -7.88068677e+00
  2.06030498e+00 -3.09280806e+01 -1.68089658e+01 -6.43957382e+00
 -1.53465687e+00  1.01517823e+02]
------------------------------------

Run  606
State: [34  0 16  0  0  0  0 26 13  8  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:101.07852200641865 | Update: 5.696026007734261
Weight: [ 3


Run  669
State: [51  0  0  9 13 19  0 14  0  0  0 24]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:116.8922484672044 | Update: -0.2945952356508599
Weight: [ 32.09046212  -0.1013651  -21.96054198   8.16491299  28.86606306
   3.17935438  10.98378463  -6.85572234   1.2007894  -31.27684569
 -17.25868496  -3.44999537   0.33464443  89.1506595 ]
------------------------------------

Run  670
State: [52  0  0 10 13 19  0 13  0  0  0 24]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:116.46856415082004 | Update: -1.075016254970805
Weight: [ 31.88584873  -0.1013651  -21.96054198   8.12913202  28.81424345
   3.10362144  10.98378463  -6.91134633   1.2007894  -31.27684569
 -17.25868496  -3.54508134   0.33464443  88.85606426]
------------------------------------

Run  671
State: [52  0  0 10 13 20  0 13  0  0  0 23]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:114.83059892448871 | Update: -0.6694758546666293
Weight: [ 31.12454749  -0.1013651  -21.96054198   7.98405491  28.62514701
   2.82726212  10.98

Run  733
State: [15  0  0 25  0  0  0  7  0 20  0 12]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:165.48851957292237 | Update: 3.514420421509783
Weight: [ 4.35542069e+01 -1.01365102e-01 -2.19605420e+01  1.68330329e+01
  5.18225604e+01  6.92739447e-01  3.76802416e+00 -2.29426598e+01
  3.63411604e+01 -5.08492900e+01 -1.72586850e+01  1.94917526e+01
 -1.39254032e+00  1.59888233e+02]
------------------------------------

Run  734
State: [17  0  0 25  0  0  0  0  0 19  0 11]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:172.01848926102465 | Update: 0.301695210462384
Weight: [ 4.42721386e+01 -1.01365102e-01 -2.19605420e+01  1.80187404e+01
  5.18225604e+01  6.92739447e-01  3.76802416e+00 -2.26108723e+01
  3.63411604e+01 -4.99001625e+01 -1.72586850e+01  2.00589242e+01
  2.12188010e+00  1.63402654e+02]
------------------------------------

Run  735
State: [17  0  0 25  0  0  0  0  0 19  0 11]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:173.91835798194413 | Update: -0.21798409713352385
Weight: [ 4

Run  794
State: [30  0  0  0  0  0  0  0 24  0 12  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:193.25318980222204 | Update: -0.7234125781868102
Weight: [ 4.43916209e+01 -1.55819444e-01 -2.19605420e+01  1.56132406e+01
  5.18225604e+01  1.20323722e+00 -2.29464705e+00 -2.07616759e+01
  4.01479952e+01 -5.42173260e+01 -1.94482089e+01  1.78173008e+01
  7.24180791e+00  1.56775598e+02]
------------------------------------

Run  795
State: [32  0  0  0  0  0  0  0 24  0 12  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:191.90689547897585 | Update: -1.5942439764950507
Weight: [ 4.40960610e+01 -1.55819444e-01 -2.19605420e+01  1.56132406e+01
  5.18225604e+01  1.20323722e+00 -2.29464705e+00 -2.07616759e+01
  3.99128851e+01 -5.42173260e+01 -1.95656870e+01  1.78173008e+01
  6.51839533e+00  1.56052186e+02]
------------------------------------

Run  796
State: [33  0  0  0  0  0  0  0 23  0 11  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:188.04509674185795 | Update: -2.041600362331593
Weight: [ 


Run  853
State: [24  0 12  0 21  0  9  0  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:117.3717466902919 | Update: -1.7247805378507337
Weight: [ 22.53922197  -0.16757344 -25.52618803  15.61324064  44.2074623
   1.20323722  -7.85307522 -24.26305265  33.42944044 -54.217326
 -22.10440842  17.81730081  -0.58339204 102.98631012]
------------------------------------

Run  854
State: [24  0 13  0 21  0 10  0  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:115.9169169788799 | Update: 0.09538139724600114
Weight: [ 21.97547626  -0.16757344 -25.80520654  15.61324064  43.71736972
   1.20323722  -8.06447698 -24.26305265  33.42944044 -54.217326
 -22.10440842  17.81730081  -0.58339204 101.26152958]
------------------------------------

Run  855
State: [25  0 13  0 22  0 12  0  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:116.32376705065751 | Update: -0.38749271679120056
Weight: [ 22.00665174  -0.16757344 -25.78849083  15.61324064  43.74447213
   1.20323722  -8.0514873


Run  912
State: [35  0 24 17  0  0 14  0 19 12  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:185.0861136098025 | Update: 2.809522390904192
Weight: [ 47.79257088  -0.18845198  -9.03129914  30.45860052  29.54639913
   2.26175799 -18.12552791 -20.57649335  52.16202905 -37.36555951
 -22.10440842  17.81730081   5.71359152 149.56883268]
------------------------------------

Run  913
State: [35  0 24 17  0  0 15  0 18 12  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:189.095784237453 | Update: -2.1305070418382854
Weight: [ 49.13175011  -0.18845198  -8.12230377  31.10316337  29.54639913
   2.26175799 -17.58986351 -20.57649335  52.8848988  -36.91030466
 -22.10440842  17.81730081   5.71359152 152.37835508]
------------------------------------

Run  914
State: [35  0 24 17  0  0 16  0 18 12  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:183.37393777709082 | Update: 1.2850806727745976
Weight: [ 48.11622847  -0.18845198  -8.81160991  30.61438076  29.54639913
   2.26175799 -18.025081

Run  969
State: [49  0  0  0  7 11 23  0  0  0 20 16]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:161.28324290725112 | Update: 0.06469449431142493
Weight: [ 39.57196103  -0.22042744 -10.61334137  28.60222869  29.415001
   2.01096202 -20.16211441 -10.04474146  54.44331735 -36.56628678
 -25.64183052  19.57134233  -0.9879661  140.14529022]
------------------------------------

Run  970
State: [50  0  0  0  8 12 24  0  0  0 20 16]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:161.11961575746486 | Update: -1.9854998276647393
Weight: [ 39.61513297  -0.22042744 -10.61334137  28.60222869  29.4211286
   2.02059067 -20.14185031 -10.04474146  54.44331735 -36.56628678
 -25.62432048  19.58526322  -0.92327161 140.20998472]
------------------------------------

Run  971
State: [50  0  0  0  8 12 25  0  0  0 20 16]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:155.334513389959 | Update: -2.9751111839278224
Weight: [ 38.26312883  -0.22042744 -10.61334137  28.60222869  29.20620465
   1.6982192  -20.7908034


Run  1030
State: [29  0  0  0  0  0  0 18  9 24  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:104.33785266788179 | Update: -0.6927033138724568
Weight: [ 20.55299596  -0.42061001 -10.61334137  28.60222869  21.11639441
   5.5396778  -23.61772529 -11.99164811  42.93528807 -32.29754627
 -30.19939703  16.21655664  -0.33423989 104.34946535]
------------------------------------

Run  1031
State: [29  0  0  0  0  0  0 17  8 23  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:103.58367430882585 | Update: -0.9718400273822994
Weight: [ 20.27941654  -0.42061001 -10.61334137  28.60222869  21.11639441
   5.5396778  -23.61772529 -12.15981021  42.8508645  -32.52203748
 -30.19939703  16.21655664  -1.02694321 103.65676204]
------------------------------------

Run  1032
State: [29  0  0  0  0  0  0 16  8 23  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:102.33840090129499 | Update: 0.0349436474412812
Weight: [ 19.89559359  -0.42061001 -10.61334137  28.60222869  21.11639441
   5.5396778  -23

Run  1089
State: [40  0  8 10  0 20  0 12  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:95.81102072811152 | Update: 0.0785813115567322
Weight: [ 12.00578853  -0.42061001 -10.5665869   28.77408414  21.11639441
   5.57705064 -23.3815125  -13.59255153  42.29807507 -36.5921697
 -30.19939703  16.21655664  -0.61440872  86.79564528]
------------------------------------

Run  1090
State: [41  0  8 10  0 21  0 11  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:96.10692643833698 | Update: -0.8779209991075874
Weight: [ 12.04859579  -0.42061001 -10.55811214  28.78468895  21.11639441
   5.59831512 -23.3815125  -13.57983382  42.29807507 -36.5921697
 -30.19939703  16.21655664  -0.53582741  86.87422659]
------------------------------------

Run  1091
State: [41  0  8 10  0 21  0 10  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:95.56982340603247 | Update: 0.30930545820518773
Weight: [ 11.5583911   -0.42061001 -10.65279326  28.66621052  21.11639441
   5.34886701 -23.38151

Run  1151
State: [ 2  0 20 25 16  0  0  0  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:274.8324351343575 | Update: 0.21370039728253687
Weight: [ 45.13833687  -0.46968049  20.57335908  68.20580818  38.48132289
   7.49034214  -6.08897309  31.55638508  66.3425156  -36.5921697
 -30.19939703  16.21655664  -2.04156413 235.79310687]
------------------------------------

Run  1152
State: [ 2  0 20 26 16  0  0  0  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:275.8965771673526 | Update: -2.0595313507297988
Weight: [ 45.14415755  -0.46968049  20.63097643  68.27790717  38.52758765
   7.49034214  -6.08897309  31.55638508  66.3425156  -36.5921697
 -30.19939703  16.21655664  -1.82786373 236.00680727]
------------------------------------

Run  1153
State: [ 2  0 21 26 17  0  0  0  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:262.69640265721625 | Update: -11.424891751025143
Weight: [ 45.08806095  -0.46968049  20.07569092  67.55526116  38.08171239
   7.49034214  -6.088

Run  1213
State: [16  0  0  0  0  0  0  0 23 15 11 20]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:212.39085164832287 | Update: -0.8123411401059286
Weight: [ 39.95155207  -0.46968049  12.48861672  58.96030311  31.66908375
   5.19861576  -7.80276084  25.72692586  64.87875029 -38.75583179
 -38.61120254  13.98088914  -1.59307622 193.5970098 ]
------------------------------------

Run  1214
State: [17  0  0  0  0  0  6  0 23 15 11 19]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:210.37015951562432 | Update: -3.18510813107946
Weight: [ 39.77454257  -0.46968049  12.48861672  58.96030311  31.66908375
   5.19861576  -7.80276084  25.72692586  64.6257388  -38.92037143
 -38.73212885  13.76239091  -2.40541736 192.78466866]
------------------------------------

Run  1215
State: [17  0  0  0  0  0  7  0 22 15 11 19]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:206.70921012606027 | Update: -1.7426329527003759
Weight: [ 39.03712876  -0.46968049  12.48861672  58.96030311  31.66908375
   5.19861576  -8.

Run  1276
State: [40  0  0  0  0  8 16  0  0  0  0  7]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:144.30842760110633 | Update: -1.6785359561179973
Weight: [ 18.21215877  -0.46968049  12.48861672  58.96030311  31.66908375
   4.88168042  -9.94608444  21.68365114  57.88549681 -42.28289268
 -41.35885927   3.44570028  -0.75947014 135.83722863]
------------------------------------

Run  1277
State: [40  0  0  0  0  8 17  0  0  0  0  7]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:141.9357200999934 | Update: -0.7367276995378234
Weight: [ 17.29777438  -0.46968049  12.48861672  58.96030311  31.66908375
   4.69999247 -10.31183322  21.68365114  57.88549681 -42.28289268
 -41.35885927   3.28768169  -0.75947014 134.15869268]
------------------------------------

Run  1278
State: [40  0  0  0  0  9 18  0  0  0  0  7]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:143.3073339647431 | Update: 0.8519461365966094
Weight: [ 16.89644112  -0.46968049  12.48861672  58.96030311  31.66908375
   4.62024765 -10.4

Alpha:1.0, Action:0, Reward:-0.1 | Max_value:511.52930416687997 | Update: 6.260147343996721
Weight: [ 66.82435749  -0.46968049  12.48861672 112.25449456 102.87667771
  19.64813145  14.29568033  19.16896345 120.98473035 -42.28289268
 -41.35885927   3.14653767  10.61702039 402.86704816]
------------------------------------

Run  1338
State: [ 6  0  0 19 26  0 24  0 14  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:518.4286556433921 | Update: -6.618456897346164
Weight: [ 67.2506353   -0.46968049  12.48861672 113.85966817 104.99430514
  19.64813145  16.25653157  19.16896345 122.25632815 -42.28289268
 -41.35885927   3.14653767  10.61702039 409.1271955 ]
------------------------------------

Run  1339
State: [ 6  0  0 19 26  0 25  0 13  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:490.70504275969614 | Update: -17.137895006841973
Weight: [ 66.7098235   -0.46968049  12.48861672 112.16261988 102.66591858
  19.64813145  14.0933138   19.16896345 121.00157373 -42.28289268
 -41.35885

Run  1399
State: [33  0 17  0  0 17  0 19  0  0 18  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:279.29802869528294 | Update: -1.776032124794824
Weight: [ 36.58775437  -0.46968049  -2.47378385  73.19893093  88.77378604
  19.77340556  -0.6341032   19.9626421  114.76507912 -72.57084584
 -58.22119652   3.14653767   1.21318772 265.71018826]
------------------------------------

Run  1400
State: [34  0 17  0  0 17  0 18  0  0 17  0]
Alpha:1.0, Action:0, Reward:-1 | Max_value:275.23259714345426 | Update: -4.061693143198852
Weight: [ 35.78957055  -0.46968049  -2.88080565  73.19893093  88.77378604
  19.36489318  -0.6341032   19.50753588 114.76507912 -72.57084584
 -58.6538229    3.14653767  -0.5628444  263.93415613]
------------------------------------

Run  1401
State: [34  1 18  0  0 18  0 16  0  0 17  0]
Alpha:1.0, Action:0, Reward:-1 | Max_value:268.7073159016167 | Update: -4.398665874610515
Weight: [ 33.90885035  -0.46968049  -3.81164335  73.19893093  88.77378604
  18.43064658  -0.634103

Run  1456
State: [29  0  0  0 14  0  0 11  0  0  0 23]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:232.16085710668455 | Update: -2.421608571066855
Weight: [ 12.15985324  -0.69347814 -10.23219095  73.19893093  83.74206215
  21.90578389  -4.57601323   9.07666681 100.48344492 -72.57084584
 -63.21239864  -2.6505331   -1.142164   210.96827055]
------------------------------------

Run  1457
State: [29  0  0  0 14  0  0 11  0  0  0 23]
Alpha:0.5, Action:0, Reward:-0.1 | Max_value:228.87206179694834 | Update: -1.253145215481993
Weight: [ 11.20345206  -0.69347814 -10.23219095  73.19893093  83.28333234
  21.90578389  -4.57601323   8.71741002 100.48344492 -72.57084584
 -63.21239864  -3.39958388  -1.142164   208.54666198]
------------------------------------

Run  1458
State: [29  0  0  0 14  0  0 10  0  0  0 23]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:227.26428327122315 | Update: -1.0997042362160414
Weight: [ 10.70852914  -0.69347814 -10.23219095  73.19893093  83.04594671
  21.90578389  -4.

Run  1509
State: [50  0  0 10  0 20  0 11  0 21  0 13]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:110.78847861112125 | Update: 0.20385617741261797
Weight: [-29.74532598  -0.69079882 -10.23219095  71.83030344  62.19984522
  13.99754062 -21.04242152   1.63350116 100.48344492 -78.44151772
 -63.21239864 -19.05589905   0.58300997 141.48285413]
------------------------------------

Run  1510
State: [49  0  0 10  0 21  0 10  0 21  0 12]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:112.48108956577398 | Update: 0.22570077601776006
Weight: [-29.60651238  -0.69079882 -10.23219095  71.85781453  62.19984522
  14.05270508 -21.04242152   1.66374417 100.48344492 -78.38371025
 -63.21239864 -19.02025831   0.58300997 141.68671031]
------------------------------------

Run  1511
State: [48  0  0 11  0 21  0  9  0 21  0 12]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:114.70829854832039 | Update: 0.4798021983336298
Weight: [-29.4558977   -0.69079882 -10.23219095  71.88827362  62.19984522
  14.11683457 -21.

Run  1574
State: [48  0  0  0  0  0  0  0  0  0 26  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:54.09883153929675 | Update: 23.745228123554803
Weight: [-48.30443545  -0.82815011 -10.23219095  58.95452532  62.19984522
  23.86854606 -21.10263091   1.02488212  89.08202374 -75.12895137
 -64.08350076 -17.72353763 -24.94406385 108.88141815]
------------------------------------

Run  1575
State: [49  0  0  0  0  0  0  0  0  0 26  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:91.44980638786718 | Update: 0.5920528433635184
Weight: [-32.78214714  -0.82815011 -10.23219095  58.95452532  62.19984522
  23.86854606 -21.10263091   1.02488212  89.08202374 -75.12895137
 -55.72863718 -17.72353763  -1.19883573 132.62664628]
------------------------------------

Run  1576
State: [50  0  0  0  0  0  0  0  0  0 25  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:91.94029632861694 | Update: -0.7536922555150483
Weight: [-32.38705835  -0.82815011 -10.23219095  58.95452532  62.19984522
  23.86854606 -21.10263

Run  1633
State: [ 9  0 12  0 20  0  6  0 23  0 11  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:784.7642537892402 | Update: -9.00088424479577
Weight: [ 42.58194648  -0.82815011  26.09790692  58.95452532 150.01583115
 155.38774708 -14.35026668 135.60902634  63.34579475 -75.12895137
  79.91847574 -17.72353763  -6.92003198 705.32571332]
------------------------------------

Run  1634
State: [ 9  0 12  0 20  0  7  0 22  0 11  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:773.4232273406644 | Update: -8.039678873426169
Weight: [ 41.47871702  -0.82815011  24.64182988  58.95452532 147.58003936
 155.38774708 -15.08574297 135.60902634  60.5423825  -75.12895137
  78.57859072 -17.72353763  -6.92003198 696.32482907]
------------------------------------

Run  1635
State: [ 9  0 12  0 20  0  8  0 22  0 11  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:765.7914060137667 | Update: 1.5651472740536292
Weight: [ 40.49330156  -0.82815011  23.34124745  58.95452532 145.40436603
 155.38774708 -15.85216

Run  1695
State: [38  0 24  0  0  7 13  0  0 13  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:513.5608062520364 | Update: -3.9665039135452957
Weight: [-18.98198495  -0.89657302 -13.8435508   58.95452532 135.61801477
 155.38774708 -22.0436266  116.70203143  42.97981875 -97.07205625
  73.53365437 -17.72353763  -3.54716513 532.82004567]
------------------------------------

Run  1696
State: [37  0 24  0  0  7 14  0  0 12  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:507.8391426792965 | Update: -1.5545259739564585
Weight: [-21.03470463  -0.89657302 -15.126877    58.95452532 135.61801477
 155.01207284 -22.74586325 116.70203143  42.97981875 -97.76834933
  73.53365437 -17.72353763  -3.54716513 528.85354176]
------------------------------------

Run  1697
State: [36  0 24  0  0  7 15  0  0 12  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:507.70123216120646 | Update: 2.1033689947846597
Weight: [-21.8180222   -0.89657302 -15.62982972  58.95452532 135.61801477
 154.86484108 -23.04

State: [ 2  0  0 19  7  0 22  0 15  0  0 16]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:701.7349427059446 | Update: 110.02252524007451
Weight: [  7.52440066  -0.89657302  -9.72216065  70.30826797 147.17551231
 128.94691109  18.9628644  108.32475078  43.826342   -97.92232473
  73.53365437  -7.85591951 116.88162713 539.52912765]
------------------------------------

Run  1759
State: [ 2  0  0 19  7  0 23  0 15  0  0 16]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:841.9621608375768 | Update: -6.47616179630711
Weight: [ 10.5211455   -0.89657302  -9.72216065  98.51930574 157.59639816
 128.94691109  51.92660924 108.32475078  66.17476181 -97.92232473
  73.53365437  15.8185942  116.88162713 649.55165289]
------------------------------------

Run  1760
State: [ 2  0  0 19  8  0 24  0 14  0  0 16]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:827.9801254039639 | Update: -7.600259367836202
Weight: [ 10.34475067  -0.89657302  -9.72216065  96.85874355 156.98300247
 128.94691109  49.89809631 108.324

Run  1822
State: [18  0  0  0  0 16  0 20  0 24 18  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:867.2831783405509 | Update: -9.380865981379316
Weight: [-2.34894439e+01  1.28800821e-01 -9.72216065e+00  1.77163579e+02
  1.35774828e+02  1.21482889e+02  1.64468420e+01  2.14396716e+02
  1.27205042e+02 -8.54008956e+01  6.80461281e+00  6.78223101e+01
 -1.17931756e+01  8.15539015e+02]
------------------------------------

Run  1823
State: [16  0  0  0  0 17  0 19  0 24 18  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:851.8864348024956 | Update: 0.4169157386778579
Weight: [-2.57890508e+01  1.28800821e-01 -9.72216065e+00  1.77163579e+02
  1.35774828e+02  1.19452083e+02  1.64468420e+01  2.11866363e+02
  1.27205042e+02 -8.84410456e+01  4.51951337e+00  6.78223101e+01
 -1.17931756e+01  8.06158149e+02]
------------------------------------

Run  1824
State: [16  0  0  0  0 17  0 18  0 24 18  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:853.0281854342674 | Update: 3.4344985032253135
Weight: [-2

  2.49227742e+01  8.26668320e+02]
------------------------------------

Run  1877
State: [31  0  6  0  0  0  0 19  9  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:902.0351254302184 | Update: 6.352595954741446
Weight: [-1.11727746e+01  1.28800821e-01 -1.02337449e+01  1.77163579e+02
  1.35774828e+02  1.02954310e+02  2.44245265e+01  2.21479806e+02
  1.23153785e+02 -9.52589316e+01 -6.38162659e+00  6.78223101e+01
  1.85979570e+01  8.20343503e+02]
------------------------------------

Run  1878
State: [31  0  7  0  0  0  0 18  9  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:905.3708171711936 | Update: -13.740911437955788
Weight: [-8.49082206e+00  1.28800821e-01 -9.71991377e+00  1.77163579e+02
  1.35774828e+02  1.02954310e+02  2.44245265e+01  2.23107651e+02
  1.23928011e+02 -9.52589316e+01 -6.38162659e+00  6.78223101e+01
  1.85979570e+01  8.26696098e+02]
------------------------------------

Run  1879
State: [31  0  7  0  0  0  0 17  8  0  0  0]
Alpha:1.0, Action:1, Reward:-1

Run  1932
State: [40  0 17  8 11 16  0 20  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:787.1300911649142 | Update: -9.158701944123209
Weight: [-5.06843989e+01  6.49904384e-02 -2.18904281e+01  1.79184933e+02
  1.39010945e+02  1.07671197e+02  4.49322873e+01  1.96059968e+02
  1.18408699e+02 -9.52589316e+01 -6.38162659e+00  6.78223101e+01
 -8.63634688e+00  7.04715586e+02]
------------------------------------

Run  1933
State: [40  0 17  8 11 17  0 19  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:770.7628411954825 | Update: -10.41852842267565
Weight: [-5.56736123e+01  6.49904384e-02 -2.39893718e+01  1.78196134e+02
  1.37647770e+02  1.05688486e+02  4.49322873e+01  1.93589539e+02
  1.18408699e+02 -9.52589316e+01 -6.38162659e+00  6.78223101e+01
 -8.63634688e+00  6.95556884e+02]
------------------------------------

Run  1934
State: [40  0 17  8 11 17  0 18  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:757.8520569538998 | Update: -4.980286605807009
Weight: [

Run  2000
State: [ 8  0  0 24  0  0  0  9  0 20  0 23]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:1857.2212454418202 | Update: -21.169732135644153
Weight: [ 2.75737861e+01  6.49904384e-02  2.06750136e+02  4.25437605e+02
  3.61902412e+02  8.94288154e+01  2.07600446e+02  1.85182765e+02
  3.32180412e+02 -7.14580794e+01 -6.38162659e+00  1.07929943e+02
 -7.43424026e+00  1.68235631e+03]
------------------------------------

Run  2001
State: [ 8  0  0 24  0  0  0  8  0 20  0 23]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:1831.5552547019865 | Update: -16.59297351268674
Weight: [ 2.52673389e+01  6.49904384e-02  2.06750136e+02  4.18580979e+02
  3.61902412e+02  8.94288154e+01  2.07600446e+02  1.82613161e+02
  3.32180412e+02 -7.71753164e+01 -6.38162659e+00  1.01381732e+02
 -7.43424026e+00  1.66118658e+03]
------------------------------------

Run  2002
State: [ 8  0  0 25  0  0  0  7  0 20  0 22]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:1794.0859271875504 | Update: -34.00925586664039
Weigh


Run  2063
State: [39  0  0  0  0  0  0  0 25  0 12 10]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:1517.7446477404646 | Update: -15.177446477404601
Weight: [-3.36609735e+01  6.49904384e-02  2.06750136e+02  3.42132103e+02
  3.61902412e+02  5.86903345e+01  1.84814540e+02  1.43823582e+02
  3.32922550e+02 -1.30483183e+02 -7.05336525e+01  2.40717900e+01
  6.19002297e+01  1.36923115e+03]
------------------------------------

Run  2064
State: [39  0  0  0  0  0  0  0 25  0 12 10]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:1480.1256904948823 | Update: 31.2533208399866
Weight: [-4.17222066e+01  6.49904384e-02  2.06750136e+02  3.42132103e+02
  3.61902412e+02  5.86903345e+01  1.84814540e+02  1.43823582e+02
  3.27784330e+02 -1.30483183e+02 -7.29983840e+01  2.20306246e+01
  4.67227832e+01  1.35405370e+03]
------------------------------------

Run  2065
State: [40  0  0  0  0  0  0  0 25  0 12 10]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:1520.4583027962524 | Update: 26.48334623289088
Weight: 

Run  2123
State: [ 5  0  0  0 20  0  8  0  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:1318.3841978313283 | Update: -6.499968626502778
Weight: [-4.22427187e+01  6.49904384e-02  2.06750136e+02  3.42132103e+02
  3.45970880e+02  5.86903345e+01  1.54391439e+02  1.41046046e+02
  2.94074271e+02 -1.30483183e+02 -8.45706520e+01  1.79610920e+01
 -1.32497537e+01  1.20403046e+03]
------------------------------------

Run  2124
State: [ 5  0  0  0 21  0  9  0  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:1313.360426201146 | Update: -11.14065420984366
Weight: [-4.26853269e+01  6.49904384e-02  2.06750136e+02  3.42132103e+02
  3.44211878e+02  5.86903345e+01  1.53683276e+02  1.41046046e+02
  2.94074271e+02 -1.30483183e+02 -8.45706520e+01  1.79610920e+01
 -1.32497537e+01  1.19753049e+03]
------------------------------------

Run  2125
State: [ 5  0  0  0 21  0 10  0  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:1302.5653607347633 | Update: 1.7068008183259735
Weight: 


Run  2190
State: [35  0 15 18  0  0 20  0 16 10  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:1088.0391755006374 | Update: -4.0063781623046
Weight: [-1.06522807e+02 -1.32366159e-01  1.86448019e+02  3.18721295e+02
  2.84759881e+02  3.80091335e+01  8.85354445e+01  1.54127837e+02
  2.57550075e+02 -1.65686986e+02 -8.45706520e+01  1.79610920e+01
 -3.40399438e+00  9.62675199e+02]
------------------------------------

Run  2191
State: [34  0 15 19  0  0 21  0 15 10  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:1084.841311727967 | Update: -8.280827945234705
Weight: [-1.08432476e+02 -1.32366159e-01  1.85637877e+02  3.17748081e+02
  2.84759881e+02  3.80091335e+01  8.74442198e+01  1.54127837e+02
  2.56682023e+02 -1.66227980e+02 -8.45706520e+01  1.79610920e+01
 -7.41037254e+00  9.58668821e+02]
------------------------------------

Run  2192
State: [33  0 15 19  0  0 22  0 15 10  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:1074.2461499813799 | Update: -1.8379457015555545
Weight: [


Run  2251
State: [18  0  0  0 10 15  0 22  0  0 18 25]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:757.050467405907 | Update: -8.61885138146613
Weight: [-1.78354934e+02 -2.95972656e-01  1.34573556e+02  2.40382532e+02
  2.79221178e+02  3.21605335e+01  6.98895041e+01  7.77314164e+01
  2.41536137e+02 -1.71630153e+02 -1.01497571e+02 -1.16298548e+00
  3.94822083e+00  7.55615417e+02]
------------------------------------

Run  2252
State: [18  0  0  0 10 15  0 21  0  0 18 25]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:744.4026028451383 | Update: -4.3840914478511195
Weight: [-1.80467742e+02 -2.95972656e-01  1.34573556e+02  2.40382532e+02
  2.78054974e+02  3.04113066e+01  6.98895041e+01  7.51741238e+01
  2.41536137e+02 -1.71630153e+02 -1.03597050e+02 -4.06078861e+00
 -4.67063056e+00  7.46996565e+02]
------------------------------------

Run  2253
State: [18  0  0  0 11 16  0 20  0  0 18 25]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:737.386389409226 | Update: -8.470974777664651
Weight: [-1


Run  2312
State: [29  0  0  0  0  0  0 17  8 23  0 14]
Alpha:0.5, Action:0, Reward:-0.1 | Max_value:516.8061542406504 | Update: -2.75754234601456
Weight: [-2.22983456e+02 -3.51226573e-01  1.34573556e+02  2.40382532e+02
  2.14625148e+02  3.47688471e+01  1.22111690e+01  5.17292706e+01
  2.12496394e+02 -1.72066192e+02 -1.18042664e+02 -3.35080070e+01
 -3.82421427e+00  6.29987163e+02]
------------------------------------

Run  2313
State: [29  0  0  0  0  0  0 16  8 23  0 13]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:508.8137160932056 | Update: -5.543142824025381
Weight: [-2.24072532e+02 -3.51226573e-01  1.34573556e+02  2.40382532e+02
  2.14625148e+02  3.47688471e+01  1.22111690e+01  5.10970343e+01
  2.12197659e+02 -1.72922620e+02 -1.18042664e+02 -3.40272011e+01
 -3.82421427e+00  6.27229621e+02]
------------------------------------

Run  2314
State: [30  0  0  0  0  0  0 15  7 22  0 13]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:497.1849644020259 | Update: 0.18411244473082888
Weight: [-

Run  2374
State: [22  0  0 11  0 22  0  7  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:520.7587451940844 | Update: -8.39436846878914
Weight: [-2.48260788e+02 -3.44552774e-01  1.34573556e+02  2.33980471e+02
  2.14625148e+02  2.61100529e+01  2.28094160e+01  3.64383879e+01
  2.10012889e+02 -1.82007600e+02 -1.18042664e+02 -4.18783136e+01
 -8.84790564e+00  5.52281498e+02]
------------------------------------

Run  2375
State: [22  0  0 11  0 23  0  0  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:514.1085521786257 | Update: -1.7807892455153365
Weight: [-2.50775851e+02 -3.44552774e-01  1.34573556e+02  2.32734337e+02
  2.14625148e+02  2.36113409e+01  2.28094160e+01  3.56458968e+01
  2.10012889e+02 -1.82007600e+02 -1.18042664e+02 -4.18783136e+01
 -8.84790564e+00  5.43887129e+02]
------------------------------------

Run  2376
State: [22  0  0 12  0 24  0  0  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:511.9451396655111 | Update: -5.219451396655131
Weight: [

Run  2439
State: [23  0 12  0 20  0  0  0  0  0 25  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:416.11741001951725 | Update: -0.1017811861371456
Weight: [-2.70750269e+02 -3.44552774e-01  1.14444874e+02  1.95230700e+02
  1.80220895e+02  5.97702275e+01  2.64490493e+01  2.53148533e+01
  2.16270023e+02 -1.82007600e+02 -1.47655370e+02 -4.18783136e+01
 -1.66922651e+00  4.79436010e+02]
------------------------------------

Run  2440
State: [23  0 12  0 20  0  6  0  0  0 24  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:413.0170167831151 | Update: -5.428268127839431
Weight: [-2.70782150e+02 -3.44552774e-01  1.14428409e+02  1.95230700e+02
  1.80193352e+02  5.97702275e+01  2.64490493e+01  2.53148533e+01
  2.16270023e+02 -1.82007600e+02 -1.47689805e+02 -4.18783136e+01
 -1.66922651e+00  4.79334229e+02]
------------------------------------

Run  2441
State: [24  0 12  0 20  0  8  0  0  0 24  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:402.16159670330563 | Update: -0.6349995990911452
Weight:


Run  2500
State: [19  0 24  0  0  0 13  0 20 13 10  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:590.497579871393 | Update: -5.607686092398467
Weight: [-249.54634651    0.62818033  147.57605234  195.23070015  145.59732692
   67.37971483   -9.69112596   48.95039911  237.52640629 -144.75615251
 -132.35162206  -41.87831357   -9.50003325  587.63254109]
------------------------------------

Run  2501
State: [19  0 24  0  0  0 14  0 19 12  9  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:576.3357070552661 | Update: -3.0230519067313253
Weight: [-250.99737339    0.62818033  145.7617366   195.23070015  145.59732692
   67.37971483  -10.68392032   48.95039911  236.00765156 -145.74054408
 -133.1105025   -41.87831357   -9.50003325  582.02485499]
------------------------------------

Run  2502
State: [20  0 24  0  0  0 15  0 18 12  9  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:572.1982408484594 | Update: -5.97533180603034
Weight: [-251.77960861    0.62818033  144.78365569  195.23070015  14

Run  2557
State: [25  0  0  0  0  9 17  0  0  0  0 17]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:349.6139104242955 | Update: 0.9017308270233002
Weight: [-293.12059433    0.54936414  126.6021095   195.23070015  145.59732692
   67.40096661  -27.79028541   40.42245968  221.09642961 -153.37389215
 -138.53212434  -65.08659293   -0.78439003  458.90596264]
------------------------------------

Run  2558
State: [24  0  0  0  0  9 18  0  0  0  0 17]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:367.1909197726569 | Update: 12.69134149727887
Weight: [-2.92813583e+02  5.49364141e-01  1.26602110e+02  1.95230700e+02
  1.45597327e+02  6.75107723e+01 -2.75815202e+01  4.04224597e+01
  2.21096430e+02 -1.53373892e+02 -1.38532124e+02 -6.48804325e+01
  1.17340796e-01  4.59807693e+02]
------------------------------------

Run  2559
State: [23  0  0  0  6 10 19  0  0  0  0 17]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:388.4271648110038 | Update: 1.6838009805309753
Weight: [-2.88665409e+02  5.49364141e-01 


Run  2607
State: [ 3  0  0 17 22  0 13  0 19  0  0  7]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:866.7999104554958 | Update: 1.7152451595604816
Weight: [-219.72938876    1.24710322  126.6021095   216.17658012  176.56311019
  127.04966646  -18.94707681  109.57839524  194.52625592 -153.37389215
 -138.53212434  -10.57149387   -8.51026087  726.04923146]
------------------------------------

Run  2608
State: [ 3  0  0 17 23  0 15  0 19  0  0  7]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:866.3177859193922 | Update: -11.403374934134717
Weight: [-219.65931013    1.24710322  126.6021095   216.57009302  177.07370203
  127.04966646  -18.64340686  109.57839524  194.96757607 -153.37389215
 -138.53212434  -10.41001945   -6.79501571  727.76447662]
------------------------------------

Run  2609
State: [ 3  0  0 17 23  0 15  0 18  0  0  7]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:851.6130282725637 | Update: -2.006735640669717
Weight: [-220.12521025    1.24710322  126.6021095   213.95392172  1

Alpha:1.0, Action:1, Reward:0.0 | Max_value:358.63648955517124 | Update: 4.798627665019694
Weight: [-250.26113978    1.24710322  126.6021095   136.01749146  140.29794242
  127.04966646  -50.6974355    96.64204842  171.5267941  -186.05384308
 -141.45861077  -13.78363734   -6.47065264  518.26579948]
------------------------------------

Run  2654
State: [21  0  0  0  0  0  0  0  0 16 25  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:364.62283380152223 | Update: -3.746228338015271
Weight: [-248.88875937    1.24710322  126.6021095   136.01749146  140.29794242
  127.04966646  -50.6974355    96.64204842  171.5267941  -185.01708393
 -139.7701924   -13.78363734   -1.67202497  523.06442714]
------------------------------------

Run  2655
State: [21  0  0  0  0  0  0  0  0 16 25  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:359.0716458061116 | Update: -2.813567683923054
Weight: [-249.96015952    1.24710322  126.6021095   136.01749146  140.29794242
  127.04966646  -50.6974355    96.64204842  


Run  2704
State: [27  0 10  0  0 25  0  0  0  0 13  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:462.2325418084799 | Update: -6.466264379336451
Weight: [-251.08887425    1.25824315  122.29955803  136.01749146  140.29794242
  123.83999666  -32.47147471   94.31262307  171.5267941  -180.62096431
 -138.25372267  -13.78363734   -6.18596657  522.24954951]
------------------------------------

Run  2705
State: [28  0 10  0  0 26  0  0  0  0 13  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:510.7125852860364 | Update: 58.01891610900009
Weight: [-253.46656508    1.25824315  121.42784931  136.01749146  140.29794242
  121.65274398  -32.47147471   94.31262307  171.5267941  -180.62096431
 -139.39131454  -13.78363734   -6.18596657  515.78328513]
------------------------------------

Run  2706
State: [29  0 10  0  0 26  0  0 26  0 13  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:587.9483869914195 | Update: -57.26350007233043
Weight: [-231.34245532    1.25824315  129.2493042   136.01749146  140

Run  38
State: [16  0  8  9 13 19  0 14  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:548.2909074613611 | Update: -3.410409148900385
Weight: [-263.27030521    1.25824315  115.46130968  135.62005819  143.3427394
  124.38034057  -11.40471216   99.40360417  144.53071817 -180.62096431
 -142.53245065  -13.78363734   -4.98795472  498.60278527]
------------------------------------

Run  39
State: [16  0  8 10 13 20  0 13  0  0  0  0]
Alpha:1.0, Action:1, Reward:-1 | Max_value:543.4518902661221 | Update: -2.1620075503760745
Weight: [-264.01343493    1.25824315  115.09350741  135.20583649  142.74284497
  123.50361116  -11.40471216   98.75966792  144.53071817 -180.62096431
 -142.53245065  -13.78363734   -4.98795472  495.19237612]
------------------------------------

Run  40
State: [16  1  8 10 14 20  0 11  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:546.0841628347915 | Update: 6.937169287717438
Weight: [-264.48453736    1.25824315  114.86034146  134.91406621  142.3625456

 -142.53245065  -13.78363734   -3.48384506  486.04110451]
------------------------------------

Run  96
State: [ 2  0 20 25  0  0  0  0  0 19  0  0]
Alpha:0.5, Action:0, Reward:-0.1 | Max_value:489.28219339125434 | Update: -2.49641096695629
Weight: [-251.65333753    1.20034689  101.40588608  117.68278834  143.51540902
  134.59564895  -44.00434201   85.23891421  157.84518154 -202.26331967
 -142.53245065  -13.78363734   -3.48384506  480.9849242 ]
------------------------------------

Run  97
State: [ 2  0 20 25  0  0  0  0  0 19  0  0]
Alpha:0.3333333333333333, Action:0, Reward:-0.1 | Max_value:490.4706510488902 | Update: -0.22934226884007103
Weight: [-251.72133366    1.20034689  100.73281023  116.84054043  143.51540902
  134.59564895  -44.00434201   85.23891421  157.84518154 -202.90380697
 -142.53245065  -13.78363734   -3.48384506  478.48851324]
------------------------------------

Run  98
State: [ 2  0 20 26  0  0  0  0  0 18  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:491.540

Run  132
State: [19  0  0  0  0 15  0 21  0  7 18 25]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:452.75555056027144 | Update: 25.780307730719585
Weight: [-233.11993884    1.16896781   87.99984924   90.45007389  143.51540902
  148.31152706  -32.32489467  103.81684764  157.84518154 -213.9641897
 -139.6455109    16.97409788   25.22636387  471.69952416]
------------------------------------

Run  133
State: [18  0  0  0  0 16  0 21  0  7 18 25]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:508.12706202908214 | Update: 40.12605728979668
Weight: [-226.44910904    1.16896781   87.99984924   90.45007389  143.51540902
  153.54373322  -32.32489467  111.11840912  157.84518154 -211.52734991
 -133.36564644   25.64187193   25.22636387  497.47983189]
------------------------------------

Run  134
State: [17  0  0  0  0 16  0 19  0  0 18 25]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:565.7331120248806 | Update: 22.962147170101503
Weight: [-216.61268697    1.16896781   87.99984924   90.45007389  143.

Alpha:1.0, Action:0, Reward:-0.1 | Max_value:516.5598095175994 | Update: -8.212090581674772
Weight: [-214.19536413    1.16896781   87.99984924   90.45007389  143.51540902
  154.41358004  -33.05684143  126.47922643  141.99476672 -207.73450252
 -121.32610643   42.63995629   -8.89978322  544.57608074]
------------------------------------

Run  169
State: [19  0  0  0  0  0 11  0 20  0 10 18]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:505.40865228826516 | Update: -6.084923175377014
Weight: [-216.32029837    1.16896781   87.99984924   90.45007389  143.51540902
  154.41358004  -34.17521265  126.47922643  139.6594436  -207.73450252
 -122.43743735   40.54156339   -8.89978322  536.36399016]
------------------------------------

Run  170
State: [19  0  0  0  0  0 13  0 20  0 10 18]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:494.1944397746871 | Update: 0.3117053081206791
Weight: [-217.89481359    1.16896781   87.99984924   90.45007389  143.51540902
  154.41358004  -35.0867617   126.47922643  1


Run  220
State: [34  0  0  0 22  0 12  0  0  0  0  8]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:502.905083105463 | Update: 6.593169276456706
Weight: [-212.27228302    0.95578207   87.99984924   90.45007389  136.66745706
  154.41358004  -13.19078544  126.70899206  151.88007386 -207.73450252
 -117.54392972   47.11665072   -9.185031    545.87654205]
------------------------------------

Run  221
State: [34  0  0  0 22  6 13  0  0  0  0  7]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:510.8286074637396 | Update: 3.1517188590783007
Weight: [-209.21939198    0.95578207   87.99984924   90.45007389  138.63010272
  154.41358004  -12.11330914  126.70899206  151.88007386 -207.73450252
 -117.54392972   47.82600581   -9.185031    552.46971132]
------------------------------------

Run  222
State: [35  0  0  0 22  7 14  0  0  0  0  7]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:516.8448870545847 | Update: -3.5373387520634196
Weight: [-207.76002488    0.95578207   87.99984924   90.45007389  139.5

Alpha:1.0, Action:1, Reward:50 | Max_value:559.5078349541265 | Update: 216.48849204089902
Weight: [-218.39362801    0.95578207   87.99984924   93.91966825  138.89336016
  143.08051742  -14.29756361  128.81617041  141.72739397 -177.89729349
 -117.54392972   46.53203156   -4.66501079  541.1101793 ]
------------------------------------

Run  280
State: [ 2  0  0 17  0  0 15  0 18 12  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:1032.0130762878282 | Update: -9.952725640184326
Weight: [ -44.44340538    0.95578207   87.99984924  143.58662948  138.89336016
  143.08051742   26.97819895  128.81617041  197.42831411 -142.81751022
 -117.54392972   46.53203156  211.82348125  757.59867134]
------------------------------------

Run  281
State: [ 2  0  0 17  0  0 16  0 18 12  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:1010.2807111190805 | Update: -10.102807111190828
Weight: [ -44.71449332    0.95578207   87.99984924  141.30326729  138.89336016
  143.08051742   24.94506709  128.81617041  19


Run  337
State: [25  0 14  0  0  8 17  0  0  0 22  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:1390.9191848390876 | Update: -0.4718295276572917
Weight: [-5.44191970e+01  8.11314964e-01  6.12234443e+01  2.67714323e+02
  1.38893360e+02  1.41723354e+02  1.98988261e+02  1.97897514e+02
  3.48695651e+02 -4.00740023e+01 -1.23695442e+02  4.65320316e+01
 -8.06874814e+00  1.36794139e+03]
------------------------------------

Run  338
State: [24  0 14  0  0  9 18  0  0  0 22  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:1391.97907762959 | Update: -3.7033586817685773
Weight: [-5.45798405e+01  8.11314964e-01  6.11343949e+01  2.67714323e+02
  1.38893360e+02  1.41672282e+02  1.98879025e+02  1.97897514e+02
  3.48695651e+02 -4.00740023e+01 -1.23835916e+02  4.65320316e+01
 -8.54057767e+00  1.36746957e+03]
------------------------------------

Run  339
State: [24  0 14  0  0  9 18  0  0  0 21  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:1401.889330331866 | Update: 0.6130605801247384
Weight: [-5

Run  397
State: [35  0 25  0 25  0 21  0 15  0  7  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:1078.5866927483112 | Update: 2.182350851986257
Weight: [-1.64276005e+02  8.11314964e-01 -2.47549636e+01  2.67714323e+02
  7.27153273e+01  1.04737883e+02  1.65735041e+02  1.65819570e+02
  3.07870271e+02 -4.00740023e+01 -1.86356007e+02  4.65320316e+01
 -1.32820967e+01  1.04866784e+03]
------------------------------------

Run  398
State: [36  0 26  0 25  0 22  0 15  0  7  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:1083.4331313696505 | Update: -9.584383772112915
Weight: [-1.63235772e+02  8.11314964e-01 -2.40194639e+01  2.67714323e+02
  7.34535537e+01  1.04737883e+02  1.66359172e+02  1.65819570e+02
  3.08313563e+02 -4.00740023e+01 -1.86149273e+02  4.65320316e+01
 -1.10997458e+01  1.05085019e+03]
------------------------------------

Run  399
State: [36  0 26  0 26  0 24  0 14  0  7  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:1067.833238684127 | Update: -10.778332386841157
Weight: [-1

  1.55405583e+02  2.33068992e+03]
------------------------------------

Run  450
State: [ 3  0  0  0  0 12 25  0  0  9  0 16]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:3003.232626370568 | Update: 44.892019247213284
Weight: [-1.64986960e+01  8.11314964e-01 -4.53381499e+01  2.67714323e+02
  5.21855190e+01  2.45043245e+02  4.93405015e+02  1.30951101e+02
  2.98654752e+02  1.67177798e+02 -1.82550334e+02  3.76638384e+02
  1.55405583e+02  2.46444209e+03]
------------------------------------

Run  451
State: [ 5  0  0  6  0 13 26 26  0  8  0 16]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:2918.8368917508737 | Update: -212.8548994617595
Weight: [-1.46645726e+01  8.11314964e-01 -4.53381499e+01  2.67714323e+02
  5.21855190e+01  2.52332043e+02  5.08689169e+02  1.30951101e+02
  2.98654752e+02  1.72633524e+02 -1.82550334e+02  3.86298195e+02
  2.00297603e+02  2.50933411e+03]
------------------------------------

Run  452
State: [ 6  0  0  7  0 13  0 25  0  8  0 15]
Alpha:1.0, Action:1, Reward:0.0 |

Run  504
State: [27  0  0 20  8  0 25  0 13  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:2227.524174915176 | Update: -153.59123443753924
Weight: [-6.06562849e+01  8.11314964e-01 -4.53381499e+01  2.26219255e+02
  4.67819019e+01  1.67201572e+02  3.91624170e+02  4.23567719e+00
  3.26092708e+02  1.40802533e+02 -1.82550334e+02  3.16928344e+02
 -2.04478476e+01  2.12418140e+03]
------------------------------------

Run  505
State: [27  0  0 20  9  0  0 26 13  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:2031.0856188334697 | Update: -24.51769038759153
Weight: [-1.17132861e+02  8.11314964e-01 -4.53381499e+01  1.84763934e+02
  3.01561457e+01  1.67201572e+02  3.39331755e+02  4.23567719e+00
  2.99054137e+02  1.40802533e+02 -1.82550334e+02  3.16928344e+02
 -2.04478476e+01  1.97059016e+03]
------------------------------------

Run  506
State: [27  0  0 20  9  0  0 25 12  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:1997.5619348740297 | Update: -20.016795310227053
Weight: 

Run  567
State: [53  0  0  0  0 15  0 21  0 25 18  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:1294.9918089629064 | Update: -16.01753411163645
Weight: [-3.32320169e+02  8.11314964e-01 -4.53381499e+01  5.80397070e+01
 -9.16544840e+01  1.52452963e+02  2.79746177e+02 -2.94343498e+01
  2.69028429e+02  1.30347823e+02 -2.06740548e+02  3.16928344e+02
 -1.95519237e+01  1.52157915e+03]
------------------------------------

Run  568
State: [53  0  6  0  0 16  0 20  0 24 18  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:1267.8648035127064 | Update: -10.912947136849425
Weight: [-3.43881549e+02  8.11314964e-01 -4.53381499e+01  5.80397070e+01
 -9.16544840e+01  1.49202147e+02  2.79746177e+02 -3.39708748e+01
  2.69028429e+02  1.24940572e+02 -2.10642284e+02  3.16928344e+02
 -1.95519237e+01  1.50556162e+03]
------------------------------------

Run  569
State: [53  0  7  0  0 17  0 19  0 24 18  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:1243.798975694471 | Update: 2.821270963447887
Weight: [-3


Run  626
State: [ 2  0 18  0  0  0  0 13  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:2023.5955426066957 | Update: 3.485496111264638
Weight: [-1.08930234e+02  9.06894539e-01 -1.12164603e+01  5.80397070e+01
 -9.16544840e+01  4.96298229e+02  1.93239840e+02 -1.24310606e+02
  8.81427927e+01  3.28299392e+02 -4.48269965e+01  3.16928344e+02
 -2.05195590e+01  2.04787753e+03]
------------------------------------

Run  627
State: [ 2  0 19  0  0  0  0 11  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:2029.0592271326002 | Update: -18.722284011653755
Weight: [-1.08835297e+02  9.06894539e-01 -1.03706849e+01  5.80397070e+01
 -9.16544840e+01  4.96298229e+02  1.93239840e+02 -1.23699500e+02
  8.81427927e+01  3.28299392e+02 -4.48269965e+01  3.16928344e+02
 -1.70340629e+01  2.05136303e+03]
------------------------------------

Run  628
State: [ 2  0 19  0  0  0  0 10  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:2010.4259284827137 | Update: -18.498490986385605
Weight: 


Run  689
State: [22  0  0 12 16 24  0  0  0  0  0 22]
Alpha:0.5, Action:1, Reward:0.0 | Max_value:2056.6787723429397 | Update: 2.3571715317044664
Weight: [-1.33014399e+02  9.06894539e-01 -2.33465283e+01  4.40962575e+01
 -1.00754993e+02  4.76815743e+02  2.28698623e+02 -1.25122222e+02
  8.81427927e+01  3.28299392e+02 -4.48269965e+01  3.04499184e+02
 -2.06411931e+01  1.86163659e+03]
------------------------------------

Run  690
State: [23  0  0 12 16 25  0  0  0  0  0 22]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:2055.0340778239506 | Update: -25.51098033804169
Weight: [-1.32308160e+02  9.06894539e-01 -2.33465283e+01  4.44779875e+01
 -1.00244680e+02  4.77581177e+02  2.28698623e+02 -1.25122222e+02
  8.81427927e+01  3.28299392e+02 -4.48269965e+01  3.05196602e+02
 -1.82840216e+01  1.86399377e+03]
------------------------------------

Run  691
State: [23  0  0 13 17 25  0  0  0  0  0 21]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:2026.319318488366 | Update: -14.018161079657375
Weight: [


Run  750
State: [38  0  0  0  0  0  0  0  0 16 25  9]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:1320.1267736331902 | Update: -10.197851309030739
Weight: [-2.99790443e+02  1.08518588e+00 -2.33465283e+01 -4.99219324e+01
 -1.52874820e+02  4.02836635e+02  2.39822819e+02 -1.71663653e+02
 -1.37882437e+01  2.66622306e+02 -4.06552860e+01  1.94313309e+02
  3.10341643e+00  1.40479962e+03]
------------------------------------

Run  751
State: [38  0  0  0  0  0  0  0  0 16 25  9]
Alpha:0.5, Action:0, Reward:-0.1 | Max_value:1301.834506062906 | Update: -6.792732838714301
Weight: [-3.05067970e+02  1.08518588e+00 -2.33465283e+01 -4.99219324e+01
 -1.52874820e+02  4.02836635e+02  2.39822819e+02 -1.71663653e+02
 -1.37882437e+01  2.64419027e+02 -4.41054390e+01  1.93078981e+02
  3.10341643e+00  1.39460177e+03]
------------------------------------

Run  752
State: [38  0  0  0  0  0  0  0  0 15 25  9]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:1307.1335234236537 | Update: 2.013928727657458
Weight: [-


Run  810
State: [49  0  0  0  0  0 10  0 21  0 10  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:805.6530432417701 | Update: -5.470338159158814
Weight: [-447.11905755    1.22644187  -23.34652835  -49.92193242 -152.87481979
  305.65406473  178.39085525 -189.38997264  -18.96063959  234.17492989
 -122.14779509  188.96611824   -9.75821219 1098.96613222]
------------------------------------

Run  811
State: [49  0  0  0  0  0 11  0 20  0 10  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:799.5328578067938 | Update: -5.676041672319911
Weight: [-450.76952445    1.22644187  -23.34652835  -49.92193242 -152.87481979
  305.65406473  177.64587214 -189.38997264  -20.51627358  234.17492989
 -122.88808843  188.96611824   -9.75821219 1093.49579406]
------------------------------------

Run  812
State: [49  0  0  0  0  0 12  0 20  0 10  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:788.8591874255562 | Update: 0.05887821213161715
Weight: [-454.55726146    1.22644187  -23.34652835  -49.92193242 -152

Alpha:1.0, Action:0, Reward:-0.1 | Max_value:3921.527452220786 | Update: -37.6634833230537
Weight: [-1.30855144e+02  1.22644187e+00  1.50771073e+02 -4.99219324e+01
  2.42582809e+02  2.97126342e+02  1.21289413e+02  5.44271267e+02
  2.46230383e+02  2.34174930e+02 -1.35101779e+02  1.88966118e+02
 -3.60481079e+01  3.78415376e+03]
------------------------------------

Run  871
State: [21  0 14  0 24  9 19  0  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:3876.085812421666 | Update: 0.8505756975528129
Weight: [-1.41626688e+02  1.22644187e+00  1.43662761e+02 -4.99219324e+01
  2.30351957e+02  2.92539979e+02  1.12056787e+02  5.44271267e+02
  2.46230383e+02  2.34174930e+02 -1.35101779e+02  1.88966118e+02
 -3.60481079e+01  3.74649028e+03]
------------------------------------

Run  872
State: [23  0 15  0 24 10 20  0  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:3887.346943425943 | Update: -28.83523115166372
Weight: [-1.41383428e+02  1.22644187e+00  1.43823292e+02 -4.99219324

Alpha:1.0, Action:1, Reward:0.0 | Max_value:2922.2503136937253 | Update: 3.81409406110879
Weight: [-2.99250793e+02 -1.22713891e+00 -6.14728740e+01 -1.77986110e+02
  2.61346173e+02  1.85802921e+02  9.74980798e+01  4.49331208e+02
  1.24373919e+02  4.74475439e+01 -1.35101779e+02  1.88966118e+02
 -3.36772970e+01  3.01187722e+03]
------------------------------------

Run  924
State: [21  0 25 18  0  0 20  0 16 10  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:2925.360743721793 | Update: -31.566025422118855
Weight: [-2.98159984e+02 -1.22713891e+00 -6.01874413e+01 -1.77059605e+02
  2.61346173e+02  1.85802921e+02  9.85369318e+01  4.49331208e+02
  1.25200309e+02  4.80140765e+01 -1.35101779e+02  1.88966118e+02
 -2.98632029e+01  3.01569131e+03]
------------------------------------

Run  925
State: [21  0 26 19  0  0 22  0 15 10  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:2880.977140453797 | Update: -28.909771404537878
Weight: [-3.07187689e+02 -1.22713891e+00 -7.08258788e+01 -1.847275


Run  972
State: [43  0  0  0  0  8 16  0  0  0 22 18]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:2158.037989273479 | Update: -5.330927287731811
Weight: [-5.11411227e+02 -1.22713891e+00 -5.83424409e+01 -3.16586768e+02
  2.61346173e+02  1.84144461e+02  9.74352917e+01  3.22228593e+02
  7.87368714e+01  3.36972328e+01 -1.53566472e+02  1.90548944e+01
 -2.34705120e+01  2.46468977e+03]
------------------------------------

Run  973
State: [44  0  0  0  0  8 16  0  0  0 22 17]
Alpha:0.5, Action:1, Reward:0.0 | Max_value:2146.5407537433994 | Update: 2.0617717898851424
Weight: [-5.14533058e+02 -1.22713891e+00 -5.83424409e+01 -3.16586768e+02
  2.61346173e+02  1.83567432e+02  9.62736960e+01  3.22228593e+02
  7.87368714e+01  3.36972328e+01 -1.55153611e+02  1.77644041e+01
 -2.88014393e+01  2.45935884e+03]
------------------------------------

Run  974
State: [45  0  0  0  0  9 17  0  0  0 22 17]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:2151.0986152425685 | Update: 6.5459118684880195
Weight: [-5.


Run  1020
State: [52  0  0  0 21  0 10  0 21  0 10  8]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:1580.6813070711744 | Update: -5.893571071802626
Weight: [-7.54122344e+02 -1.41757359e+00 -5.83424409e+01 -3.16586768e+02
  1.87433673e+02  9.00859578e+01  8.54076180e+01  2.56848402e+02
  3.81119653e+01  3.36972328e+01 -2.32635624e+02 -4.29043534e+01
 -1.95364374e+01  2.08472621e+03]
------------------------------------

Run  1021
State: [53  0  0  0 21  0 11  0 20  0 10  8]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:1563.8876455315208 | Update: 3.1297442311772556
Weight: [-7.58296032e+02 -1.41757359e+00 -5.83424409e+01 -3.16586768e+02
  1.85759028e+02  9.00859578e+01  8.46049965e+01  2.56848402e+02
  3.64359740e+01  3.36972328e+01 -2.33433192e+02 -4.35384392e+01
 -2.54300084e+01  2.07883264e+03]
------------------------------------

Run  1022
State: [54  0  0  0 22  0 12  0 20  0 10  8]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:1570.501767884503 | Update: -14.646431554828496
Weight:

Run  1065
State: [14  0  0  0  0  0  0  0  0 15  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:3852.516660507461 | Update: 20.04773952981168
Weight: [-2.78446121e+02 -1.41757359e+00 -5.83424409e+01 -3.16586768e+02
  7.32876785e+02  9.00859578e+01  6.15470590e+02  3.27554965e+02
  3.79647968e+02  2.02826878e+02 -9.08383647e+01 -4.92547721e+01
 -4.22411217e+01  3.80585053e+03]
------------------------------------

Run  1066
State: [14  0  0  0  0  0  7  0  0 15  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:3872.223763149219 | Update: 1.6269767922240135
Weight: [-2.74623761e+02 -1.41757359e+00 -5.83424409e+01 -3.16586768e+02
  7.32876785e+02  9.00859578e+01  6.15470590e+02  3.27554965e+02
  3.79647968e+02  2.06887546e+02 -9.08383647e+01 -4.92547721e+01
 -4.22411217e+01  3.82589827e+03]
------------------------------------

Run  1067
State: [16  0  0  0  0  0  8  0  0 14  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:3882.381486662716 | Update: -30.539858889939296
Weight: [-

Run  1124
State: [23  0 13 16  0  0 11  0 20  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:2956.2033024340294 | Update: -22.991696959971705
Weight: [-4.75288092e+02 -9.34300804e-02 -1.03977540e+02 -3.93710346e+02
  7.32876785e+02 -1.98599710e+01  4.89796256e+02  1.92074484e+02
  3.40239825e+02  1.47489346e+02 -9.08383647e+01 -4.92547721e+01
 -3.09290884e+01  3.03612028e+03]
------------------------------------

Run  1125
State: [23  0 13 16  0  0 12  0 20  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:2929.028664976032 | Update: 3.7391215954266954
Weight: [-4.82489825e+02 -9.34300804e-02 -1.08006865e+02 -3.98674839e+02
  7.32876785e+02 -1.98599710e+01  4.86351995e+02  1.92074484e+02
  3.34012881e+02  1.47489346e+02 -9.08383647e+01 -4.92547721e+01
 -3.09290884e+01  3.01312858e+03]
------------------------------------

Run  1126
State: [23  0 13 16  0  0 13  0 19  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:2935.0549531470438 | Update: -28.1881547038015
Weight: 

Run  1189
State: [38  0 25  0 25 11 21  0  0  0 21  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:2586.0734758770395 | Update: -2.657487338982264
Weight: [-6.33799762e+02 -1.09379317e+00 -1.97443645e+02 -4.43543360e+02
  6.62009836e+02 -2.64229048e+01  4.05098955e+02  1.45777184e+02
  3.11041066e+02  1.47489346e+02 -9.08707441e+01 -4.92547721e+01
 -2.03480816e+01  2.66772498e+03]
------------------------------------

Run  1190
State: [38  0 26  0 25 11 22  0  0  0 21  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:2579.681226551024 | Update: -4.695062330431483
Weight: [-6.35175048e+02 -1.09379317e+00 -1.98339276e+02 -4.43543360e+02
  6.61110885e+02 -2.68184257e+01  4.04338939e+02  1.45777184e+02
  3.11041066e+02  1.47489346e+02 -9.16259757e+01 -4.92547721e+01
 -2.30055690e+01  2.66506749e+03]
------------------------------------

Run  1191
State: [39  0 26  0 25 11 23  0  0  0 20  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:2562.6201372715655 | Update: -6.981283162276668
Weight: [-

Run  1249
State: [36  0  0  0  0  0  0 25 12  8  0 15]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:1992.8205797301346 | Update: -31.146150316151306
Weight: [-8.16368691e+02 -1.32968480e+00 -2.92453861e+02 -4.43543360e+02
  5.66489007e+02 -2.86655766e+01  2.51430797e+02  1.87783281e+02
  2.74882377e+02  6.94229999e+01 -1.54258612e+02 -1.33271253e+02
 -5.04673003e+01  2.31558644e+03]
------------------------------------

Run  1250
State: [37  0  0  0  0  0  0 25 12  8  0 15]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:1934.2687635853977 | Update: 17.407746175159218
Weight: [-8.31638902e+02 -1.32968480e+00 -2.92453861e+02 -4.43543360e+02
  5.66489007e+02 -2.86655766e+01  2.51430797e+02  1.77281747e+02
  2.69821106e+02  6.60583862e+01 -1.54258612e+02 -1.39554369e+02
 -5.04673003e+01  2.28444029e+03]
------------------------------------

Run  1251
State: [38  0  0  0  0  0  0 24 12  8  0 15]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:1938.7307759045038 | Update: -7.08398908777167
Weight: 

Run  1301
State: [ 2  0  0  0  8 11 23  0  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:4677.5576762226865 | Update: -34.58448986174608
Weight: [-3.24825008e+02 -1.32968480e+00 -2.92453861e+02 -4.43543360e+02
  7.15007957e+02  1.96749606e+02  6.99706444e+02  1.37196143e+02
  2.54923422e+02  5.94672942e+01 -1.54258612e+02 -1.86378206e+02
  7.81858169e+02  3.56650839e+03]
------------------------------------

Run  1302
State: [ 2  0  0  0  8 12 24  0  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:4612.9628929642295 | Update: 710.4255401212686
Weight: [-3.25767005e+02 -1.32968480e+00 -2.92453861e+02 -4.43543360e+02
  7.11264297e+02  1.91602305e+02  6.88873625e+02  1.37196143e+02
  2.54923422e+02  5.94672942e+01 -1.54258612e+02 -1.86378206e+02
  7.47273679e+02  3.53192390e+03]
------------------------------------

Run  1303
State: [ 2  0  0  0  8 12 25  0  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:5410.80594429744 | Update: -73.32585179234593
Weight: [-3

 -3.02459871e+01  3.46433690e+03]
------------------------------------

Run  1350
State: [ 3  0  0 18 24  0 20  0 16  0  0  0]
Alpha:0.5, Action:0, Reward:-0.1 | Max_value:3751.843887373264 | Update: -10.469791971762788
Weight: [-3.49144391e+02 -3.11478203e+00 -2.92453861e+02 -5.45948200e+02
  6.41891990e+02  1.83344713e+02  7.43952962e+02  1.07813810e+02
  1.57888349e+02  5.94672942e+01 -1.54258612e+02 -1.86378206e+02
 -3.02459871e+01  3.43676005e+03]
------------------------------------

Run  1351
State: [ 3  0  0 18 25  0 21  0 15  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:3741.0030465030873 | Update: -34.81935278385754
Weight: [-3.49572148e+02 -3.11478203e+00 -2.92453861e+02 -5.48491481e+02
  6.38492027e+02  1.83344713e+02  7.41101285e+02  1.07813810e+02
  1.55619885e+02  5.94672942e+01 -1.54258612e+02 -1.86378206e+02
 -3.02459871e+01  3.42629026e+03]
------------------------------------

Run  1352
State: [ 3  0  0 19 25  0 22  0 15  0  0  0]
Alpha:1.0, Action:1, Reward

Run  1408
State: [27  0 15  0  0 10 21  0  0 10 21  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:2448.1449351356464 | Update: 13.010973223977999
Weight: [-444.88542166   -3.11478203 -324.11685642 -714.40417469  440.4251977
  172.82324356  516.87249013 -119.69346001   37.60037615 -195.97085306
 -131.58348686 -186.37820556  -32.39310122 2564.82708127]
------------------------------------

Run  1409
State: [28  0 15  0  0 11 22  0  0 10 20  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:2485.291351858149 | Update: -5.788717548425666
Weight: [-440.10119542   -3.11478203 -321.48586776 -714.40417469  440.4251977
  174.58365943  520.59350437 -119.69346001   37.60037615 -194.21393858
 -127.88589692 -186.37820556  -19.382128   2577.83805449]
------------------------------------

Run  1410
State: [28  0 15  0  0 12 24  0  0  9 20  0]
Alpha:0.5, Action:1, Reward:0.0 | Max_value:2471.174695471088 | Update: -5.676660824275132
Weight: [-442.30858278   -3.11478203 -322.65642203 -714.40417469  440.


Run  1467
State: [20  0  0  0 10  0  0 22 11  0  0 25]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:1694.6525273225848 | Update: -14.868954322842
Weight: [-677.07292442   -2.82081991 -482.95916066 -714.40417469  425.83138988
   71.46046839  397.31609844 -145.95592471  -15.44080649 -228.30512319
 -261.67847543 -199.09609258  -35.82487181 1931.82046468]
------------------------------------

Run  1468
State: [20  0  0  0 10  0  0 21 10  0  0 25]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:1673.6645413002402 | Update: 17.57477788533879
Weight: [-681.12286438   -2.82081991 -482.95916066 -714.40417469  423.81949439
   71.46046839  397.31609844 -150.36768048  -17.65567049 -228.30512319
 -261.67847543 -204.09528567  -35.82487181 1916.95151036]
------------------------------------

Run  1469
State: [21  0  0  0 11  0  0 20 10  0  0 25]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:1691.5315753706802 | Update: -3.74754163485386
Weight: [-676.33592417   -2.82081991 -482.95916066 -714.40417469  426

 -261.67847543 -312.5129168    -3.29565773 1460.39359726]
------------------------------------

Run  1529
State: [49  0  0 10  0 20  0 12  0 21  0 13]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:578.3900465473658 | Update: -10.97804053458458
Weight: [-911.11401139   -2.82081991 -482.95916066 -718.16765763  222.04322101
   20.76140375  229.20252549 -181.76116763  -20.05055223 -250.83249623
 -261.67847543 -314.15101721  -12.66518525 1451.02406975]
------------------------------------

Run  1530
State: [51  0  0 10  0 21  0 11  0 21  0 12]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:555.0624135215426 | Update: -10.411864373455956
Weight: [-918.4398791    -2.82081991 -482.95916066 -719.6491816   222.04322101
   17.79069334  229.20252549 -183.53786897  -20.05055223 -253.94553812
 -261.67847543 -316.0703384   -23.64322578 1440.04602922]
------------------------------------

Run  1531
State: [51  0  0 11  0 21  0  9  0 21  0 12]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:530.8420030157102 |

Run  1587
State: [19  0  0 24  0  0  0  8  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:5402.202353948398 | Update: 5.634590017594746
Weight: [-3.41401731e+02 -2.82081991e+00 -4.82959161e+02  7.45433927e+01
  2.22043221e+02  1.18622779e+01  6.70630364e+02 -3.46547381e+02
  1.28827079e+03  6.19668315e+02 -2.61678475e+02  2.42896165e+02
 -5.39524814e+01  5.49808499e+03]
------------------------------------

Run  1588
State: [20  0  0 25  0  0  0  6  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:5432.263017923139 | Update: 17.359302592624772
Weight: [-3.39943742e+02 -2.82081991e+00 -4.82959161e+02  7.63683698e+01
  2.22043221e+02  1.18622779e+01  6.70630364e+02 -3.45939441e+02
  1.28827079e+03  6.19668315e+02 -2.61678475e+02  2.42896165e+02
 -4.83178914e+01  5.50371958e+03]
------------------------------------

Run  1589
State: [21  0  0 25  0  0  0  0  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:5452.9505417164355 | Update: -54.62950541716509
Weight: [-3


Run  1645
State: [27  0 10  0 17 26  0  0  0  0 13  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:4107.010721642061 | Update: 448.2202856576155
Weight: [-7.44689019e+02 -3.21121214e+00 -5.22096696e+02 -9.22060665e+00
  1.20856294e+02 -1.58163558e+02  4.79725741e+02 -3.42690483e+02
  1.28827079e+03  6.19668315e+02 -5.64613196e+02  2.42896165e+02
 -5.43580261e+01  4.14345961e+03]
------------------------------------

Run  1646
State: [29  0 10  0 18 26  0  0 26  0 13  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:5091.961893209671 | Update: -75.74134954749934
Weight: [-5.79875271e+02 -3.21121214e+00 -4.61672700e+02 -9.22060665e+00
  2.23957897e+02 -4.85832007e-01  4.79725741e+02 -3.42690483e+02
  1.28827079e+03  6.19668315e+02 -4.85759051e+02  2.42896165e+02
  3.93862260e+02  4.59167989e+03]
------------------------------------

Run  1647
State: [30  0 11  0 18  0  0  0 25  0 12  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:4902.754004619472 | Update: -57.33212020649535
Weight: [-6.

Run  1707
State: [ 7  0 23  0  0  0  9  0  0 14  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:6983.268429289295 | Update: -54.17861035929491
Weight: [-2.41412675e+02 -3.21121214e+00  1.11749023e+02 -9.22060665e+00
  1.35502230e+02 -2.71305961e+01  3.77716860e+02 -2.48626310e+02
  1.22035195e+03  1.17711228e+03 -5.11453235e+02  2.42896165e+02
  1.06101016e+01  6.68705484e+03]
------------------------------------

Run  1708
State: [ 7  0 23  0  0  0 10  0  0 14  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:6925.6017808663 | Update: -53.692378528725385
Weight: [-2.46577600e+02 -3.21121214e+00  9.49504025e+01 -9.22060665e+00
  1.35502230e+02 -2.71305961e+01  3.71076332e+02 -2.48626310e+02
  1.22035195e+03  1.16687000e+03 -5.11453235e+02  2.42896165e+02
  1.06101016e+01  6.63287623e+03]
------------------------------------

Run  1709
State: [ 7  0 23  0  0  0 11  0  0 14  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:6853.6352990576725 | Update: -67.63627392067428
Weight: [


Run  1769
State: [35  0  0 17  0  0 16  0 18  0  0 17]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:5149.69205375984 | Update: -63.95265407268289
Weight: [-5.64606151e+02 -3.21121214e+00 -1.42958778e+02 -1.05201846e+02
  1.35502230e+02 -1.47306303e+02  1.91137408e+02 -3.71540651e+02
  1.10464195e+03  1.03082906e+03 -5.11453235e+02 -2.63415471e+01
 -7.94372700e+01  5.15042332e+03]
------------------------------------

Run  1770
State: [35  0  0 17  0  0 17  0 17  0  0 17]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:5058.480981926487 | Update: -49.889303264593764
Weight: [-5.95089647e+02 -3.21121214e+00 -1.42958778e+02 -1.19873915e+02
  1.35502230e+02 -1.47306303e+02  1.77202285e+02 -3.71540651e+02
  1.08905342e+03  1.03082906e+03 -5.11453235e+02 -4.09628802e+01
 -7.94372700e+01  5.08647067e+03]
------------------------------------

Run  1771
State: [35  0  0 18  0  0 18  0 17  0  0 17]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:4988.649301449425 | Update: -47.73054429453623
Weight: 

Run  1832
State: [47  0  0  0 26 13 25  0  0 26 20  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:3730.8178426732097 | Update: -213.82097118697675
Weight: [-9.02579684e+02 -3.94600793e+00 -1.42958778e+02 -3.21973623e+02
  4.47648942e+01 -1.17395188e+02  1.06485817e+02 -4.40808569e+02
  9.82713270e+02  1.03082906e+03 -4.42911629e+02 -2.31538879e+02
 -4.86527895e+01  4.26032195e+03]
------------------------------------

Run  1833
State: [49  0  0  0  0 13  0 26  0 26 19  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:3367.5056947158964 | Update: 218.49059965161223
Weight: [-1.03944277e+03 -3.94600793e+00 -1.42958778e+02 -3.21973623e+02
 -3.04577569e+01 -1.55004825e+02  3.36872950e+01 -4.40808569e+02
  9.82713270e+02  9.55759389e+02 -5.00783825e+02 -2.31538879e+02
 -2.62473761e+02  4.04650098e+03]
------------------------------------

Run  1834
State: [50  0  0  0  0 14  0 25  0 26 19  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:3750.738000434267 | Update: -46.63734363231788
Weight: 

Run  1892
State: [11  0  0  0  0  0  0 21 10  7  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:6629.996713296731 | Update: -26.341070533396305
Weight: [-4.40788901e+02 -3.94600793e+00 -1.42958778e+02 -3.21973623e+02
 -3.04577569e+01  6.66735120e+02 -1.30851407e+02 -4.36020951e+02
  1.01120262e+03  1.64899656e+03  3.44684455e+01 -2.31538879e+02
 -7.39115785e+01  6.56067052e+03]
------------------------------------

Run  1893
State: [12  0  6  0  0  0  0 20 10  6  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:6600.313659470498 | Update: -66.10313659470557
Weight: [-4.44734967e+02 -3.94600793e+00 -1.42958778e+02 -3.21973623e+02
 -3.04577569e+01  6.66735120e+02 -1.30851407e+02 -4.43481333e+02
  1.00763559e+03  1.64650671e+03  3.44684455e+01 -2.31538879e+02
 -1.00252649e+02  6.53432945e+03]
------------------------------------

Run  1894
State: [12  0  6  0  0  0  0 20 10  6  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:6377.085521755802 | Update: -111.98966900277719
Weight: [

Run  1955
State: [41  0 19 10 13 20  0 12  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:4369.2284421513705 | Update: 7.911152550461338
Weight: [-8.67943557e+02 -3.94600793e+00 -3.80468784e+02 -3.39126423e+02
 -7.00059597e+01  5.87061606e+02 -2.85317067e+02 -5.58546978e+02
  9.68348403e+02  1.63207764e+03  3.44684455e+01 -2.31538879e+02
 -5.31822518e+01  4.94250751e+03]
------------------------------------

Run  1956
State: [43  0 19 10 14 21  0 10  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:4392.359655413764 | Update: -33.098031508120584
Weight: [-8.63526207e+02 -3.94600793e+00 -3.78442449e+02 -3.38058786e+02
 -6.86143799e+01  5.89202402e+02 -2.85317067e+02 -5.57266626e+02
  9.68348403e+02  1.63207764e+03  3.44684455e+01 -2.31538879e+02
 -4.52710993e+01  4.95041867e+03]
------------------------------------

Run  1957
State: [43  0 19 11 14 22  0  9  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:4323.06215777178 | Update: -15.445107425635797
Weight: [-

Run  2021
State: [14  0  0  0  0  0  0  0  0 17 26 21]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:5444.656524816226 | Update: -61.87322936586497
Weight: [-5.45325267e+02 -3.94600793e+00  7.75290317e+01 -2.88492921e+02
  3.82352280e+02  5.58600183e+02  8.57529428e+01 -9.00149232e+02
  1.11811713e+03  1.19334741e+03  5.13782354e+01 -5.80647320e+02
  6.31633746e+01  5.36486236e+03]
------------------------------------

Run  2022
State: [15  0  0  0  0  0  0  0  0 17 26 21]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:5310.688477121707 | Update: -43.872795765269075
Weight: [-5.57122197e+02 -3.94600793e+00  7.75290317e+01 -2.88492921e+02
  3.82352280e+02  5.58600183e+02  8.57529428e+01 -9.00149232e+02
  1.11811713e+03  1.17914400e+03  2.96078659e+01 -5.98121691e+02
  1.29014519e+00  5.30298913e+03]
------------------------------------

Run  2023
State: [15  0  0  0  0  0  0  0  0 17 26 20]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:5237.930471242609 | Update: -67.16730721311797
Weight: [

Run  2082
State: [31  0  0  0  0  0  8  0 22  0 11  8]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:3856.0471167057653 | Update: 0.21783601467677727
Weight: [-9.44062532e+02 -3.96659009e+00  7.75290317e+01 -2.88492921e+02
  3.82352280e+02  4.46416470e+02 -9.83768934e+01 -8.14582322e+02
  9.54642378e+02  1.04692008e+03 -2.76069947e+02 -8.38662863e+02
 -6.21665276e+01  4.13564205e+03]
------------------------------------

Run  2083
State: [32  0  0  0  0  0  9  0 21  0 10  8]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:3854.9938741210303 | Update: -39.98937044537388
Weight: [-9.43970566e+02 -3.96659009e+00  7.75290317e+01 -2.88492921e+02
  3.82352280e+02  4.46416470e+02 -9.83531605e+01 -8.14582322e+02
  9.54707275e+02  1.04692008e+03 -2.76037520e+02 -8.38639426e+02
 -6.19486916e+01  4.13585989e+03]
------------------------------------

Run  2084
State: [32  0  0  0  0  0 10  0 21  0 10  8]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:3786.7261872346603 | Update: -53.554010829634535
Weigh


Run  2147
State: [45  0  0  0 24  9 18  0  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:2649.1215359840166 | Update: -28.323312377104685
Weight: [-1288.02619512    -3.96659009    77.52903169  -288.49292069
   274.92030657   446.9973455   -127.1861905   -728.20697583
   857.77832473  1046.92007908  -309.46362071  -861.38585707
   -36.06089659  3327.68230158]
------------------------------------

Run  2148
State: [45  0  0  0 24  9 19  0  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:2609.131852262035 | Update: -22.016654216295592
Weight: [-1305.38399663    -3.96659009    77.52903169  -288.49292069
   265.72258468   443.54835464  -134.12921666  -728.20697583
   857.77832473  1046.92007908  -309.46362071  -861.38585707
   -36.06089659  3299.3589892 ]
------------------------------------

Run  2149
State: [45  0  0  0 24 10 20  0  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:2558.2429758543076 | Update: -5.888548846622143
Weight: [-1318.87679397    -3.966

Run  2194
State: [ 4  0 13 17  0  0 14  0 19 12  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:11086.420700457877 | Update: 107.02395382262512
Weight: [-2.71251150e+02 -5.77251557e+00  9.50379365e+02  8.05137728e+02
  2.58854496e+02  1.99389010e+03 -5.27373431e+01 -6.46369338e+02
  1.53427182e+03  2.58618862e+03 -3.09463621e+02 -8.61385857e+02
  2.22977004e+02  9.72817044e+03]
------------------------------------

Run  2195
State: [ 3  0 14 17  0  0 15  0 18 12  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:11219.720735177978 | Update: -109.0228154278193
Weight: [-2.65421008e+02 -5.77251557e+00  9.69135454e+02  8.29691249e+02
  2.58854496e+02  1.99389010e+03 -3.23321231e+01 -6.46369338e+02
  1.56180831e+03  2.60353077e+03 -3.09463621e+02 -8.61385857e+02
  2.22977004e+02  9.83519440e+03]
------------------------------------

Run  2196
State: [ 2  0 14 17  0  0 16  0 18 12  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:10932.147458448953 | Update: -154.91316862536223
Weight:

Run  2246
State: [ 8  0 23  0  0  0 11  0  0  0 23  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:6855.128909829502 | Update: -69.15567558395924
Weight: [-3.62176613e+02 -6.92192978e+00  3.02111314e+02  1.18641019e+02
  2.58854496e+02  1.99389010e+03 -3.36092708e+02 -8.13242916e+02
  1.22806704e+03  2.39215840e+03 -4.59529117e+02 -8.61385857e+02
 -7.68251068e+01  6.99479921e+03]
------------------------------------

Run  2247
State: [ 8  0 24  0  0  0 12  0  0  0 23  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:6944.195268780652 | Update: 181.75642523802344
Weight: [-3.69711140e+02 -6.92192978e+00  2.80668903e+02  1.18641019e+02
  2.58854496e+02  1.99389010e+03 -3.46452542e+02 -8.13242916e+02
  1.22806704e+03  2.39215840e+03 -4.81054225e+02 -8.61385857e+02
 -7.68251068e+01  6.92564354e+03]
------------------------------------

Run  2248
State: [ 9  0 24  0  0  7 14  0  0  0 23  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:7276.587140681586 | Update: -71.7867515858743
Weight: [-3.

Run  2304
State: [40  0  0  0 24  0 18  0 17  0  8 17]
Alpha:1.0, Action:0, Reward:-1 | Max_value:4860.828298490012 | Update: -56.69110606196591
Weight: [ -854.31263318    -6.92192978   156.51116494   118.64101898
    -7.51677667  1703.57243531  -513.20734625  -852.04461872
   889.04442093  2392.15839866  -748.97372206 -1075.05515543
   -90.7960708   5583.75172944]
------------------------------------

Run  2305
State: [40  1  0  0 24  0 19  0 17  0  8 17]
Alpha:1.0, Action:0, Reward:-1 | Max_value:4764.121806960799 | Update: -55.62682347887403
Weight: [ -885.19517823    -6.92192978   156.51116494   118.64101898
   -25.92666533  1703.57243531  -527.10430249  -852.04461872
   875.99360078  2392.15839866  -755.1112656  -1088.01629918
   -90.7960708   5527.06062338]
------------------------------------

Run  2306
State: [39  1  0  0 24  0 20  0 16  0  8 17]
Alpha:1.0, Action:1, Reward:-1 | Max_value:4676.269156522668 | Update: 43.03337923572417
Weight: [ -915.49795411    -7.67461838   156


Run  2360
State: [44  0  0  0  0  8 16  0  0 12  0  7]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:4399.016855907885 | Update: -4.962449551877398
Weight: [-1185.98849437    -8.16691465   156.51116494   118.64101898
   134.90847315  1686.32573377  -377.13060443  -985.27712252
   882.02273871  2087.06705971  -724.51361893 -1176.9405365
   -65.68138331  4808.60045288]
------------------------------------

Run  2361
State: [45  0  0  0  0  9 17  0  0 11  0  7]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:4386.24450938768 | Update: -49.47079860211943
Weight: [-1188.96212446    -8.16691465   156.51116494   118.64101898
   134.90847315  1685.78858871  -378.21190975  -985.27712252
   882.02273871  2086.26294466  -724.51361893 -1177.40770507
   -70.64383286  4803.63800333]
------------------------------------

Run  2362
State: [46  0  0  0  0  9 18  0  0 11  0  6]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:4330.19309177271 | Update: -25.981007440966096
Weight: [-1219.28005802    -8.16691465 

 -3.10791030e+01  1.36662581e+04]
------------------------------------

Run  2415
State: [ 9  0  0 18  0  0 18  0 17  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:14349.825619534628 | Update: -20.171475921004458
Weight: [-3.48725195e+02 -8.16691465e+00  1.56511165e+02  1.67708286e+03
  1.34908473e+02  3.15986529e+03 -4.86403710e+02 -9.25481693e+02
  2.34525896e+03  2.01085236e+03 -7.24513619e+02 -1.17695967e+03
 -1.28075992e+02  1.35692612e+04]
------------------------------------

Run  2416
State: [10  0  0 18  0  0 18  0 17  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:14293.628244049149 | Update: -26.90658225668085
Weight: [-3.51197593e+02 -8.16691465e+00  1.56511165e+02  1.67218288e+03
  1.34908473e+02  3.15986529e+03 -4.91348439e+02 -9.25481693e+02
  2.34061530e+03  2.01085236e+03 -7.24513619e+02 -1.17695967e+03
 -1.48247467e+02  1.35490898e+04]
------------------------------------

Run  2417
State: [11  0  0 18  6  0 19  0 16  0  0  0]
Alpha:1.0, Action:1, Reward:


Run  2477
State: [40  0 16  0 26 13 26 26  0 26 19  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:10187.019331503008 | Update: 488.0799952217767
Weight: [-1.34446894e+03 -8.16691465e+00 -1.58162207e+02  7.85587281e+02
 -4.60827257e+02  3.10137097e+03 -7.99537280e+02 -1.10861230e+03
  2.12282034e+03  1.83800844e+03 -9.69916755e+02 -1.17695967e+03
 -1.29775711e+02  1.03859047e+04]
------------------------------------

Run  2478
State: [40  0 16  0  0 13  0 25  0 26 19  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:11380.132071937276 | Update: -99.85626048860104
Weight: [-1.07858680e+03 -8.16691465e+00 -5.28863165e+01  7.85587281e+02
 -2.89119725e+02  3.18722088e+03 -6.26716241e+02 -9.37463950e+02
  2.12282034e+03  2.00936676e+03 -8.44419486e+02 -1.17695967e+03
  3.58304284e+02  1.08739847e+04]
------------------------------------

Run  2479
State: [41  0 16  0  0 14  0 24  0 25 19  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:11125.83596243379 | Update: 160.18711624489515
Weight: [

Run  2538
State: [50  0  0  0  0  0  0 21 10  7  0 25]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:9840.971921535107 | Update: -138.04880155298633
Weight: [-1.29606554e+03 -9.93241124e+00 -5.85703842e+02  7.85587281e+02
 -2.89119725e+02  2.88315056e+03 -6.31871322e+02 -1.06619933e+02
  1.93233559e+03  1.73484723e+03 -1.12325343e+03 -4.12060268e+02
  3.41181228e+02  1.01650550e+04]
------------------------------------

Run  2539
State: [51  0  0  0  0  0  0 20 10  6  0 25]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:9287.463847090035 | Update: -275.25446094132167
Weight: [-1.39006834e+03 -9.93241124e+00 -5.85703842e+02  7.85587281e+02
 -2.89119725e+02  2.88315056e+03 -6.31871322e+02 -1.45718451e+02
  1.91364140e+03  1.72179840e+03 -1.12325343e+03 -4.58474602e+02
  2.03132426e+02  1.00270062e+04]
------------------------------------

Run  2540
State: [52  0  0  0  0  0  0 19  9  0  0 25]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:8621.97080981899 | Update: -83.35309618742212
Weight: [-

Run  2572
State: [ 6  0  0  0  0  0 12  0  0  0  0 18]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:16561.623783728526 | Update: 86.56877165307742
Weight: [-5.10281356e+02 -9.93241124e+00 -5.85703842e+02  7.85587281e+02
 -2.89119725e+02  2.88315056e+03 -5.11565662e+02 -1.74708677e+02
  1.82821289e+03  1.69949729e+03 -1.12325343e+03  1.11354775e+03
  4.11443138e+02  1.57537304e+04]
------------------------------------

Run  2573
State: [ 7  0  0  0  0  7 14  0  0  0  0 18]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:16729.578808788272 | Update: 316.98891132138124
Weight: [-5.03207591e+02 -9.93241124e+00 -5.85703842e+02  7.85587281e+02
 -2.89119725e+02  2.88315056e+03 -4.97418324e+02 -1.74708677e+02
  1.82821289e+03  1.69949729e+03 -1.12325343e+03  1.13450398e+03
  4.98011910e+02  1.58402992e+04]
------------------------------------

Run  2574
State: [ 8  0  0  0  0  7 15  0  0  0  0 18]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:17117.090304273264 | Update: 360.20586946230105
Weight: 

Alpha:1.0, Action:0, Reward:-0.1 | Max_value:15245.600105515055 | Update: -140.93727689720436
Weight: [ -852.17309125   -15.42822442  -585.70384203   596.47267963
  -593.93432383  2291.9915402   -385.98691959  -513.91827443
  1703.16598564  1699.4972937  -1123.25342862   959.79461335
  -142.49615363 14696.97338482]
------------------------------------

Run  2619
State: [ 5  0  0 15 20  0  0  0 23  0  0  9]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:15041.40679471942 | Update: -39.45750785893142
Weight: [ -863.68945274   -15.42822442  -585.70384203   569.84472768
  -630.16733825  2291.9915402   -385.98691959  -513.91827443
  1659.26971091  1699.4972937  -1123.25342862   942.73583755
  -142.49615363 14556.03610793]
------------------------------------

Run  2620
State: [ 5  0  0 15 20  0  6  0 23  0  0  9]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:14977.093546378956 | Update: 16.422161479431452
Weight: [ -866.37626807   -15.42822442  -585.70384203   561.85734001
  -640.84520844  2291.

Run  2677
State: [28  0  0  0  0  0  7  0  0 15 24  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:10310.942193528595 | Update: -129.7438530633026
Weight: [-1403.63750287   -15.42822442  -585.70384203  -310.3886971
  -857.65353542  2291.9915402   -409.13015364  -783.83747515
  1162.27721824  1139.78390106 -1250.44446959   789.59778076
  -123.93041313 11086.98934544]
------------------------------------

Run  2678
State: [28  0  0  0  0  0  9  0  0 14 24  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:10116.632111226612 | Update: -2.765739985199616
Weight: [-1453.1121818    -15.42822442  -585.70384203  -310.3886971
  -857.65353542  2291.9915402   -421.49865513  -783.83747515
  1162.27721824  1113.5042926  -1292.58380688   789.59778076
  -123.93041313 10957.24549238]
------------------------------------

Run  2679
State: [29  0  0  0  0  0 10  0  0 14 24  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:10089.939832262091 | Update: 2.723504192015753
Weight: [-1454.16682982   -15.42822442 

Run  0
State: [2 0 0 0 0 0 0 0 0 0 0 0]
Alpha:0.3333333333333333, Action:1, Reward:0.0 | Max_value:9650.35033909952 | Update: -7.664180885980992
Weight: [-2111.98505759   -15.42822442  -655.83641682  -310.3886971
  -857.65353542  1879.66873527  -601.70206115  -786.78320973
  1035.63183984  1025.42500777 -1581.75157517   789.59778076
  -102.27361779  9736.63831027]
------------------------------------

Run  1
State: [3 0 0 0 0 0 0 0 0 0 0 0]
Alpha:0.5, Action:1, Reward:0.0 | Max_value:9613.912129281705 | Update: -7.4834113212000375
Weight: [-2112.19381116   -15.42822442  -655.83641682  -310.3886971
  -857.65353542  1879.66873527  -601.70206115  -786.78320973
  1035.63183984  1025.42500777 -1581.75157517   789.59778076
  -109.93779868  9728.97412939]
------------------------------------

Run  2
State: [4 0 0 0 0 0 0 0 0 0 0 0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:9577.642398582931 | Update: -7.124877884531088
Weight: [-2112.49955593   -15.42822442  -655.83641682  -310.3886971
  -8


Run  64
State: [ 5  1 13 17 22  0 14  0 19  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:7714.666056983733 | Update: -48.963988327859624
Weight: [-2245.09798645   -10.68743882  -794.27222044  -459.67128471
 -1074.30445953  1617.62368104  -647.97215811  -560.90622383
   691.54576129  1025.42500777 -1581.75157517   789.59778076
   -40.89548899  8390.46669712]
------------------------------------

Run  65
State: [ 4  0 14 17 23  0 15  0 18  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:7673.479174856307 | Update: 34.793979574824334
Weight: [-2248.43213505   -11.34997235  -802.85322378  -470.90464168
 -1088.87999291  1617.62368104  -657.3076472   -560.90622383
   678.94768193  1025.42500777 -1581.75157517   789.59778076
   -89.85947732  8341.50270879]
------------------------------------

Run  66
State: [ 3  0 14 17 23  0 16  0 18  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:7730.838457249542 | Update: -64.97707743667252
Weight: [-2246.53672902   -11.34997235  -79


Run  124
State: [18  0 25  0  0 10 21  0  0 10 21  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:5539.752669385448 | Update: -57.997416610826804
Weight: [-2391.19896649   -11.34997235 -1212.88836283  -906.72001318
  -918.8677951   1582.38606993  -555.0618737   -670.18798109
   604.13304561   692.18119488 -1521.62783911   789.59778076
   -95.62581679  6820.80195719]
------------------------------------

Run  125
State: [18  0 26  0  0 11 22  0  0 10 21  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:5463.328883824966 | Update: -50.94430335839388
Weight: [-2405.41633814   -11.34997235 -1232.43475424  -906.72001318
  -918.8677951   1574.53888042  -571.64858167  -670.18798109
   604.13304561   684.34961306 -1538.11013143   789.59778076
   -95.62581679  6762.80454058]
------------------------------------

Run  126
State: [18  0 26  0  0 11 23  0  0  9 20  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:5405.609020833962 | Update: 54.778111157351304
Weight: [-2417.9047236    -11.34997235 

Alpha:1.0, Action:1, Reward:0.0 | Max_value:6357.8871334594405 | Update: 19.34048483904735
Weight: [-2327.23340541   -12.73992443  -936.85486983  -906.72001318
  -945.6945887   1491.08323542  -324.79302205  -362.06722974
   411.7384638    796.67684675 -1373.90299005   856.46831866
   -78.03623419  6993.05025392]
------------------------------------

Run  185
State: [20  0  0  0 10  0  0 22 11  0  0 15]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:6369.629529583453 | Update: -76.17958081934557
Weight: [-2321.96552977   -12.73992443  -936.85486983  -906.72001318
  -943.07765724  1491.08323542  -324.79302205  -356.06788875
   414.61940236   796.67684675 -1373.90299005   860.36987688
   -58.69574935  7012.39073875]
------------------------------------

Run  186
State: [20  0  0  0 10  0  0 21 10  0  0 14]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:6267.778671652422 | Update: -70.57084449164995
Weight: [-2342.71498678   -12.73992443  -936.85486983  -906.72001318
  -953.38539992  1491.0832

Run  244
State: [45  0  0  9  0 18  0 16  0 23  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:4721.428683080583 | Update: 8.982279556655158
Weight: [-2685.53256358   -12.73992443  -936.85486983  -902.74633315
 -1067.45815673  1488.12354977  -239.34052294  -532.38571787
   357.32205597   840.38974439 -1373.90299005   679.08637445
   -76.80346018  5988.95005154]
------------------------------------

Run  245
State: [46  0  0  9  0 19  0 15  0 22  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:4730.761105476358 | Update: -52.42172803383528
Weight: [-2680.02781811   -12.73992443  -936.85486983  -901.65536285
 -1067.45815673  1490.3111329   -239.34052294  -530.44744505
   357.32205597   843.17942989 -1373.90299005   679.08637445
   -67.82118063  5997.9323311 ]
------------------------------------

Run  246
State: [46  0  0 10  0 19  0 14  0 22  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:4637.146744686977 | Update: 11.782748686109699
Weight: [-2712.86813404   -12.73992443  -93


Run  302
State: [ 5  0  8 23  0  0  0 13  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:6328.625059241358 | Update: 73.65976314211275
Weight: [-1361.6563525    -12.73992443  -977.40900604  -616.44270704
 -1067.45815673  1248.82183806   421.12240933  -636.94611624
   682.42912274   892.8918592  -1373.90299005   679.08637445
  -138.30940274  6831.13352236]
------------------------------------

Run  303
State: [ 6  0  8 23  0  0  0 11  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:6404.5534555150025 | Update: -72.15608868200707
Weight: [-1356.64057245   -12.73992443  -969.46502428  -593.57924428
 -1067.45815673  1248.82183806   421.12240933  -624.03148457
   682.42912274   892.8918592  -1373.90299005   679.08637445
   -64.6496396   6904.79328551]
------------------------------------

Run  304
State: [ 6  0  8 24  0  0  0 11  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:6312.238675304425 | Update: -8.468283359636189
Weight: [-1362.53663926   -12.73992443  -

Run  364
State: [14  0 20  0 15 23  0  0  0  0 14  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:4766.9763884989225 | Update: -48.48675796172665
Weight: [-1649.77778014   -12.09704588 -1214.93503976  -624.9742807
 -1146.00624569  1093.06572636   179.2389408   -679.56647596
   682.42912274   892.8918592  -1568.05485036   679.08637445
   -53.47179177  5599.33560649]
------------------------------------

Run  365
State: [14  0 20  0 16 24  0  0  0  0 14  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:4704.431655117226 | Update: -47.14431655117278
Weight: [-1659.02240609   -12.09704588 -1228.00791376  -624.9742807
 -1155.84724939  1077.97686479   179.2389408   -679.56647596
   682.42912274   892.8918592  -1577.24115514   679.08637445
   -53.47179177  5550.84884852]
------------------------------------

Run  366
State: [14  0 20  0 16 24  0  0  0  0 14  0]
Alpha:0.5, Action:0, Reward:-0.1 | Max_value:4657.6514984441255 | Update: -16.14918182788324
Weight: [-1668.01107827   -12.09704588 -

Run  424
State: [26  0  0  0  0  0  0  0  0 17  0 21]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:5343.1530373194 | Update: -53.53153037319407
Weight: [-1709.39424341   -12.21830105 -1032.87536986  -624.9742807
  -938.43615271  1052.30485778   510.69130378  -414.74704161
   603.75278303   992.00528741 -1574.74488506   864.04616144
   -77.60742578  5476.68231297]
------------------------------------

Run  425
State: [26  0  0  0  0  0  0  0  0 17  0 21]
Alpha:0.5, Action:0, Reward:-0.1 | Max_value:5275.81909831684 | Update: -26.429095491584576
Weight: [-1728.34912908   -12.21830105 -1032.87536986  -624.9742807
  -938.43615271  1052.30485778   510.69130378  -414.74704161
   603.75278303   979.71677194 -1574.74488506   848.92767099
   -77.60742578  5423.1507826 ]
------------------------------------

Run  426
State: [26  0  0  0  0  0  0  0  0 17  0 21]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:5194.446070505163 | Update: -22.466568832790472
Weight: [-1737.70736113   -12.21830105 -1032

Run  485
State: [53  0  0 15  0  0  0  0 23  0  0  9]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:2504.7180908000455 | Update: 32.42549428773191
Weight: [-2413.39496825   -12.21830105 -1032.87536986  -727.24850853
  -938.43615271   807.58079701   323.19571075  -510.65680433
   613.45727225   854.53673595 -1574.74488506   589.1711535
   -67.83713923  4141.89729745]
------------------------------------

Run  486
State: [54  0  0 15  0  0  7  0 22  0  0  9]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:2555.5932644931822 | Update: -29.230814883276707
Weight: [-2389.99039986   -12.21830105 -1032.87536986  -720.68461214
  -938.43615271   807.58079701   323.19571075  -510.65680433
   623.5565053    854.53673595 -1574.74488506   593.09587271
   -35.41164494  4174.32279174]
------------------------------------

Run  487
State: [54  0  0 15  0  0  8  0 22  0  0  8]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:2510.4405055907973 | Update: -20.84088334936814
Weight: [-2411.48715176   -12.21830105 


Run  551
State: [18  0  0  0 23  8 15  0  0  0 22  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:12488.489948933438 | Update: -49.96892836314146
Weight: [-1.23275209e+03 -1.22183010e+01 -1.03287537e+03  1.45998043e+03
 -4.65083233e+02  7.94924521e+02  2.75417471e+03  4.85571149e+02
  2.79029289e+03  8.54536736e+02 -2.00789031e+03  7.20180016e+02
 -7.73097108e+01  1.28095378e+04]
------------------------------------

Run  552
State: [18  0  0  0 23  8 17  0  0  0 22  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:12447.893355030288 | Update: -22.576696266620274
Weight: [-1.24500137e+03 -1.22183010e+01 -1.03287537e+03  1.45998043e+03
 -4.80634038e+02  7.89515789e+02  2.74396711e+03  4.85571149e+02
  2.79029289e+03  8.54536736e+02 -2.02276721e+03  7.20180016e+02
 -7.73097108e+01  1.27595689e+04]
------------------------------------

Run  553
State: [19  0  0  0 24  9 18  0  0  0 22  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:12482.785482146564 | Update: -60.16527202109137
Weight: 


Run  612
State: [26  0 15  0  0  0 24  0 14  9  7  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:10425.44416667554 | Update: -104.35444166675552
Weight: [-1898.65852443   -19.35366209 -1324.33296096  1459.98042936
  -614.90998673   587.63211877  2384.56337862   106.36431916
  2279.32936919   202.11018745 -2454.66382657   720.18001629
   -98.20420617 10361.99193685]
------------------------------------

Run  613
State: [26  0 15  0  0  0 24  0 14  9  7  0]
Alpha:0.5, Action:0, Reward:-0.1 | Max_value:10302.98892331358 | Update: -43.251678107353655
Weight: [-1935.60920559   -19.35366209 -1345.43479151  1459.98042936
  -614.90998673   587.63211877  2350.45552152   106.36431916
  2259.54542274   189.42799648 -2464.54932672   720.18001629
   -98.20420617 10257.63749518]
------------------------------------

Run  614
State: [26  0 16  0  0  0 25  0 13  9  6  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:10485.705596902824 | Update: 233.85284313801458
Weight: [-1950.9241164    -19.3536620

Run  670
State: [17  0  0  7  9 14  0 25  0  0  0 26]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:8471.202109009133 | Update: -81.08063553191278
Weight: [-2478.08471521   -18.31410139 -1722.50724949  1436.62180988
  -614.89474069   579.13762255  2037.22638768  -276.66981022
  2140.19274007   152.83022897 -2449.65836961   619.3093493
  -208.09684331  8747.39283646]
------------------------------------

Run  671
State: [17  0  0  7  9 14  0 24  0  0  0 26]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:8357.824426996112 | Update: -88.0318058567209
Weight: [-2496.85644212   -18.31410139 -1722.50724949  1428.96233429
  -624.76854791   563.77905643  2037.22638768  -304.00773265
  2140.19274007   152.83022897 -2449.65836961   590.95824576
  -208.09684331  8666.31220092]
------------------------------------

Run  672
State: [17  0  0  7 10 14  0 23  0  0  0 26]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:8239.945850752641 | Update: -82.63064532354383
Weight: [-2517.23749868   -18.31410139 -17

Run  724
State: [ 8  0  0 21  0  0  0 21 10 24  0 14]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:10996.67319176365 | Update: 76.13341000439686
Weight: [-2412.60552816   -15.16213729 -1722.50724949  1745.14192337
  -313.12632964   535.77271479  1816.38030571  -520.15196927
  2690.44243534    33.92817858 -2449.65836961  1036.57399352
  -179.08496544 10334.75829657]
------------------------------------

Run  725
State: [ 8  0  0 21  0  0  0 20 10 24  0 14]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:11067.884023896397 | Update: -140.62731569678
Weight: [-2404.31077568   -15.16213729 -1722.50724949  1766.71829579
  -313.12632964   535.77271479  1816.38030571  -498.58927967
  2700.75221148    58.60148567 -2449.65836961  1050.9085016
  -102.95155543 10410.89170657]
------------------------------------

Run  726
State: [ 8  0  0 21  0  0  0 19  9 24  0 14]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:10905.801326581819 | Update: -85.85339240896246
Weight: [-2419.63215398   -15.16213729 -172

Run  786
State: [31  0  0  0  0 20  0 13  0  0 16  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:5821.814438670197 | Update: -12.108565876390458
Weight: [-2.98027713e+03 -1.39141341e+01 -1.72250725e+03  9.46866446e+02
 -3.13126330e+02  4.94521601e+02  1.41750925e+03 -4.16841000e+02
  2.63442136e+03 -6.55380571e+02 -2.72866445e+03  6.60241238e+02
 -2.69591801e+00  7.56391494e+03]
------------------------------------

Run  787
State: [30  0  0  0  0 20  0 12  0  0 16  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:5858.7738315564075 | Update: -2.9373281133985074
Weight: [-2.98538915e+03 -1.39141341e+01 -1.72250725e+03  9.46866446e+02
 -3.13126330e+02  4.91244965e+02  1.41750925e+03 -4.18963972e+02
  2.63442136e+03 -6.55380571e+02 -2.73128626e+03  6.60241238e+02
 -2.69591801e+00  7.55180637e+03]
------------------------------------

Run  788
State: [29  0  0  0  0 21  0 11  0  0 16  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:5901.259038453093 | Update: -12.782005279650548
Weight: [

------------------------------------

Run  840
State: [28  0  7  0 12  0  0 15  7  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:5915.809136143426 | Update: -81.22090849648612
Weight: [-3149.9535545    -13.029721   -1721.53528007   946.86644574
  -329.55517877   607.72900028  1265.41056269  -422.94280462
  2525.25929614  -655.38057069 -2759.81708832   660.24123848
   -82.71567232  7201.08252444]
------------------------------------

Run  841
State: [28  0  8  0 13  0  0 14  7  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:5583.0803462387685 | Update: -288.6497976278524
Weight: [-3180.92518264   -13.029721   -1729.19977986   946.86644574
  -342.7430312    607.72900028  1265.41056269  -439.37393554
  2517.56019834  -655.38057069 -2759.81708832   660.24123848
   -82.71567232  7119.86161595]
------------------------------------

Run  842
State: [28  0  8  0 13  0  0 13  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:5238.61105792534 | Update: -44.48914814601994
We


Run  892
State: [53  0 18 10  0 19  0 14  0 22  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:2913.8333601424656 | Update: -124.93453635059768
Weight: [-3638.2527728    -13.029721   -1870.10347781   973.69719272
  -633.83708817   585.77516466  1157.31663219  -429.16489627
  2490.1984874   -553.88048981 -2759.81708832   660.24123848
    -9.00975274  6062.08833817]
------------------------------------

Run  893
State: [55  0 19 10  0 20  0 12  0 22  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:2651.0880071522633 | Update: 70.74334960933629
Weight: [-3728.42993015   -13.029721   -1900.41955106   956.8368514
  -633.83708817   553.65767432  1157.31663219  -452.75440515
  2490.1984874   -590.9951963  -2759.81708832   660.24123848
  -133.94428909  5937.15380182]
------------------------------------

Run  894
State: [56  0 19 10  0 20  0 11  0 21  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:2794.7134416762406 | Update: -14.34521005039096
Weight: [-3675.44083901   -13.029721   


Run  949
State: [17  0  0 23  0  0  0 11  0  0  0 23]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:23599.45768330835 | Update: 45.1202162960908
Weight: [-1.46506243e+03 -1.30297210e+01  2.95088015e+03  3.53936758e+03
 -6.33837088e+02  5.12982360e+03  7.05560368e+02 -5.86401574e+02
  4.07822513e+03  4.06285602e+03 -2.75981709e+03 -3.94458624e+02
 -2.45393512e+02  2.30133413e+04]
------------------------------------

Run  950
State: [18  0  0 24  0  0  0 10  0  0  0 23]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:23644.90282186702 | Update: -48.16743589503312
Weight: [-1.45461623e+03 -1.30297210e+01  2.95088015e+03  3.55337257e+03
 -6.33837088e+02  5.12982360e+03  7.05560368e+02 -5.79707782e+02
  4.07822513e+03  4.06285602e+03 -2.75981709e+03 -3.80502060e+02
 -2.00273296e+02  2.30584615e+04]
------------------------------------

Run  951
State: [19  0  0 24  0  0  0  9  0  0  0 23]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:23571.16512884055 | Update: 0.6641842302233272
Weight: [-1.4664


Run  1008
State: [21  0  0  0 15 23  0  7  0  0 15 12]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:17060.99033128788 | Update: 105.72385587660392
Weight: [-3.00405032e+03 -1.49340254e+01  2.95088015e+03  2.60182267e+03
 -9.92295472e+02  4.53528116e+03  2.97598416e+02 -1.00529060e+03
  4.07822513e+03  4.06285602e+03 -3.81823157e+03 -1.64519466e+03
 -1.48541016e+02  1.77178681e+04]
------------------------------------

Run  1009
State: [22  0  0  0 15 23  0  0  0  0 14 11]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:17245.629648562353 | Update: -123.88395305530867
Weight: [-2.97381389e+03 -1.49340254e+01  2.95088015e+03  2.60182267e+03
 -9.70837472e+02  4.56818195e+03  2.97598416e+02 -9.95309479e+02
  4.07822513e+03  4.06285602e+03 -3.79677037e+03 -1.62813251e+03
 -4.28171598e+01  1.78235919e+04]
------------------------------------

Run  1010
State: [22  0  0  0 16 24  0  0  0  0 14 11]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:17044.499311507057 | Update: -168.63303821234513
Weight


Run  1055
State: [39  0  0  0  0  0  0 16  8 23  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:14408.795182304813 | Update: -110.31220871572623
Weight: [-3.92526306e+03 -1.49340254e+01  2.95088015e+03  2.60182267e+03
 -1.26925082e+03  4.13635037e+03  4.18333054e+02 -1.23756529e+03
  3.03910809e+03  3.78218523e+03 -4.10098148e+03 -1.89744669e+03
 -1.99535626e+02  1.54225455e+04]
------------------------------------

Run  1056
State: [41  0  0  0  0  0  0 14  7 22  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:14225.98151630882 | Update: -166.34093201601172
Weight: [-3.98385344e+03 -1.49340254e+01  2.95088015e+03  2.60182267e+03
 -1.26925082e+03  4.13635037e+03  4.18333054e+02 -1.26136939e+03
  3.02715755e+03  3.74792484e+03 -4.10098148e+03 -1.89744669e+03
 -3.09847835e+02  1.53122333e+04]
------------------------------------

Run  1057
State: [41  0  0  0  0  0  0 13  6 22  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:13709.128286444893 | Update: -414.37451827335644
Weig

Run  1098
State: [ 2  0  0  7  0 13  0 25  0  8  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:34458.492001985906 | Update: 3327.0619159092894
Weight: [-1.66119003e+03 -1.49340254e+01  2.95088015e+03  2.73697879e+03
 -1.26925082e+03  6.51972685e+03  5.20346889e+03 -7.89841443e+02
  2.97772167e+03  5.27018803e+03 -4.10098148e+03 -1.89744669e+03
  3.57288095e+03  2.91236515e+04]
------------------------------------

Run  1099
State: [ 2  0  0  7  0 14  0 24  0  8  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:38346.12958446666 | Update: 3204.9559478218434
Weight: [-1.57056899e+03 -1.49340254e+01  2.95088015e+03  3.05127763e+03
 -1.26925082e+03  7.10493414e+03  5.20346889e+03  3.31942568e+02
  2.97772167e+03  5.62959932e+03 -4.10098148e+03 -1.89744669e+03
  3.57288095e+03  3.24507134e+04]
------------------------------------

Run  1100
State: [ 2  0  0  7  0 15  0 23  0  7  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:42068.12437756152 | Update: -412.35306740619126
Weight: 

Run  1153
State: [ 2  0 17 21 10  0  0 22 11  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:33450.608686655876 | Update: -275.11756428940134
Weight: [-2230.4704223    -55.49208853  1816.03572698  2193.03927348
 -1398.70284971  7265.63421868  4690.81493299  2137.39706923
   714.35227654  6396.62786091 -4100.98147801 -1897.44669472
   -97.88869325 31960.82151439]
------------------------------------

Run  1154
State: [ 2  0 17 21 10  0  0 21 10  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:33057.88002549097 | Update: -377.83381504351564
Weight: [-2237.96395273   -55.49208853  1752.98571512  2115.07036304
 -1435.92858696  7265.63421868  4690.81493299  2055.76715262
   673.37104964  6396.62786091 -4100.98147801 -1897.44669472
  -373.00625754 31685.7039501 ]
------------------------------------

Run  1155
State: [ 2  0 17 21 11  0  0 20 10  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:32578.562986183362 | Update: 39.61096647298473
Weight: [-2248.25522293   -55.492088

  1311.09088444 28617.25116294]
------------------------------------

Run  1205
State: [20  1  0  0  0 15  0 23  0 25 19 25]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:33546.20864401224 | Update: -311.0104624261221
Weight: [-1916.14535531   -47.22626406  1171.31262183   543.33795351
 -1723.40639654  7624.14737465  5089.49019598  1820.11672465
   507.99990432  6701.77888026 -3459.14111708 -1577.90925428
  1311.09088444 29778.81644445]
------------------------------------

Run  1206
State: [19  0  0  0  0 15  0 21  0 25 18 25]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:32789.28023095045 | Update: 689.3143773514166
Weight: [-2000.85700835   -51.43455811  1171.31262183   543.33795351
 -1723.40639654  7561.02668288  5089.49019598  1723.64252394
   507.99990432  6596.7869675  -3539.10949159 -1682.47621459
  1000.08042201 29467.80598203]
------------------------------------

Run  1207
State: [18  0  0  0  0 16  0 20  0 24 18 25]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:33824.32160590280

 -3.74130099e+02  2.49278210e+04]
------------------------------------

Run  1260
State: [34  0  0  0  0  0  0 21 10  7  0 14]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:23880.32641025991 | Update: -368.7252454372938
Weight: [-2.89163202e+03 -5.14345581e+01  1.17131262e+03  5.43337954e+02
 -1.72340640e+03  5.94639450e+03  4.43983259e+03  1.19336921e+03
  2.41054223e+01  5.50581085e+03 -4.45499184e+03 -2.97765316e+03
 -2.53909642e+02  2.50480415e+04]
------------------------------------

Run  1261
State: [35  0  0  0  0  0  0 20 10  6  0 14]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:22921.800670001758 | Update: -686.9113866169719
Weight: [-3062.36599674   -51.43455811  1171.31262183   543.33795351
 -1723.40639654  5946.39450054  4439.83258557  1088.93794818
   -25.82633223  5470.95772674 -4454.99183739 -3047.07728214
  -253.90964243 24679.31621548]
------------------------------------

Run  1262
State: [35  0  0  0  0  0  0 19  9  0  0 14]
Alpha:1.0, Action:0, Reward:-0.1 | Max_val

Run  1308
State: [41  0  0  6  9 13 26 26  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:20915.64663570561 | Update: -1799.8192687866904
Weight: [-4.19271283e+03 -5.53205147e+01  1.17131262e+03  5.56362096e+02
 -1.76881542e+03  5.85832196e+03  4.33254589e+03  6.97274086e+02
 -2.11053053e+02  5.41530419e+03 -4.45499184e+03 -3.40431712e+03
 -2.41521939e+00  2.22111370e+04]
------------------------------------

Run  1309
State: [42  0  0  7  9 13  0 25  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:18307.044466592233 | Update: -108.18929089161975
Weight: [-5197.67778351   -55.3205147   1171.31262183   410.62705073
 -1987.99313928  5541.74613479  3695.2597143     66.15603938
  -211.05305303  5415.30418519 -4454.99183739 -3404.31712368
 -1802.23448818 20411.31771024]
------------------------------------

Run  1310
State: [42  0  0  7  9 14  0 25  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:18117.106837523294 | Update: -209.14908802683567
Weight: [-5259.5608


Run  1358
State: [ 2  0  0 19 25  0 22  0 15  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:22816.431951986146 | Update: -173.69827703014016
Weight: [-2967.58891656   -55.3205147   1171.31262183   722.76816355
 -1572.09201054  4604.56442696  4006.73115049   177.26109623
   255.87193309  5415.30418519 -4454.99183739 -3404.31712368
  -131.93642819 21936.73601222]
------------------------------------

Run  1359
State: [ 2  0  0 19 25  0 23  0 15  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:22584.85402563746 | Update: -228.9356999334741
Weight: [-2972.32003335   -55.3205147   1171.31262183   678.22993247
 -1630.84913169  4604.56442696  3954.68957381   177.26109623
   220.58932158  5415.30418519 -4454.99183739 -3404.31712368
  -131.93642819 21763.03773519]
------------------------------------

Run  1360
State: [ 2  0  0 19 25  0 23  0 14  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:22325.30528913155 | Update: -49.234078671608586
Weight: [-2978.5556826    -55.3205


Run  1408
State: [24  0 14  0  0  9 18  0  0 11 22  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:17166.394531571124 | Update: -110.89316342715756
Weight: [-3.57722134e+03 -5.53205147e+01  7.13411726e+02 -6.08953174e+02
 -1.61314125e+03  4.58631427e+03  3.94055614e+03 -8.87898253e+02
  1.40942674e+01  3.66792997e+03 -4.49799752e+03 -3.40431712e+03
 -1.65643814e+02  1.74099802e+04]
------------------------------------

Run  1409
State: [24  0 14  0  0  9 18  0  0 11 21  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:17138.81660642916 | Update: 113.50512090248958
Weight: [-3.61346684e+03 -5.53205147e+01  6.92482617e+02 -6.08953174e+02
 -1.61314125e+03  4.57281057e+03  3.91337238e+03 -8.87898253e+02
  1.40942674e+01  3.65145827e+03 -4.53101297e+03 -3.40431712e+03
 -1.65643814e+02  1.72990870e+04]
------------------------------------

Run  1410
State: [25  0 14  0  0 10 20  0  0 11 21  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:17303.879032111272 | Update: -159.3756851284088
Weight


Run  1452
State: [48  0 23  0  0  0  8  0 22  0 11  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:9788.941685865753 | Update: -53.246870274686444
Weight: [-5395.22854253   -55.3205147   -183.93372496  -608.9531736
 -1613.1412546   3594.9628961   3285.40145062 -1060.04134925
  -546.32162703  3511.31323713 -5245.79381671 -3404.31712368
  -240.14435639 13913.81325887]
------------------------------------

Run  1453
State: [48  0 23  0  0  0  9  0 22  0 11  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:9824.579529597637 | Update: 265.2729286976992
Weight: [-5430.03609491   -55.3205147   -200.44345057  -608.9531736
 -1613.1412546   3594.9628961   3279.6002708  -1060.04134925
  -562.18482412  3511.31323713 -5253.72022501 -3404.31712368
  -240.14435639 13860.56638859]
------------------------------------

Run  1454
State: [48  0 23  0  0  0 10  0 21  0 10  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:10214.498707797142 | Update: -173.73382424950432
Weight: [-5256.62681634   -55.3205147 

Run  1507
State: [11  0  0  0 21  0  9  0  0  0  0 19]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:36662.915165523904 | Update: -292.1569552830988
Weight: [-2272.76402817   -55.3205147   2002.17944177  -608.9531736
  1068.55759241  3594.9628961   5475.76346995  4370.69590186
  2462.09785546  3511.31323713 -4690.96483911  4310.68680866
  -307.50183537 34852.55246025]
------------------------------------

Run  1508
State: [11  0  0  0 21  0 10  0  0  0  0 19]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:36390.74495619663 | Update: -289.92292005486524
Weight: [-2316.53105819   -55.3205147   2002.17944177  -608.9531736
   985.54183886  3594.9628961   5439.95456885  4370.69590186
  2462.09785546  3511.31323713 -4690.96483911  4236.03345769
  -307.50183537 34560.39550497]
------------------------------------

Run  1509
State: [11  0  0  0 21  0 11  0  0  0  0 19]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:36075.85713429506 | Update: -23.462106145751022
Weight: [-2359.96341508   -55.3205147


Run  1559
State: [ 5  0  0 15  0  0  8  0 22 14  0  8]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:36411.305889305266 | Update: 130.32847778819269
Weight: [-1802.9243691    -73.5940223   2002.17944177  -973.53923072
  1162.89292999  2986.0208494   5719.86158458  5077.46711757
  1978.96971622  3422.0918883  -4690.96483911  4355.90641291
  -416.54506733 34324.92473127]
------------------------------------

Run  1560
State: [ 5  0  0 15  0  0  9  0 22 14  0  8]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:36605.03649283351 | Update: -328.16717596643866
Weight: [-1794.04979563   -73.5940223   2002.17944177  -947.15682259
  1162.89292999  2986.0208494   5734.06070899  5077.46711757
  2017.79690585  3446.73004419 -4690.96483911  4369.9283749
  -286.21658954 34455.25320905]
------------------------------------

Run  1561
State: [ 5  0  0 16  0  0 10  0 21 14  0  8]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:36289.738361542 | Update: -285.45531710031355
Weight: [-1816.39597631   -73.5940223

Alpha:1.0, Action:0, Reward:-0.1 | Max_value:21850.92033589578 | Update: -218.6092033589557
Weight: [-3573.54173914   -73.5940223   2002.17944177 -3425.9758737
  1162.89292999  2986.0208494   4764.89293607  3805.46067104
  1024.86344191  2899.22890317 -4690.96483911  4144.91343135
  -298.33188403 25920.24142127]
------------------------------------

Run  1606
State: [25  0  0 26  0  0  0  0  0  0 26  0]
Alpha:0.5, Action:0, Reward:-0.1 | Max_value:22782.008418781228 | Update: 500.5483511452894
Weight: [-3647.97149805   -73.5940223   2002.17944177 -3502.6812213
  1162.89292999  2986.0208494   4764.89293607  3805.46067104
  1024.86344191  2899.22890317 -4767.88345452  4144.91343135
  -298.33188403 25701.63221791]
------------------------------------

Run  1607
State: [25  0  0  0  0  0  0  0  0  0 26  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:23355.18873339511 | Update: 17.42001199749211
Weight: [-3477.55006783   -73.5940223   2002.17944177 -3327.04938753
  1162.89292999  2986.02084


Run  1657
State: [14  0 10  0 17 26  0  0  0  0 13  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:21476.60022026493 | Update: -90.89319372250611
Weight: [-4449.04273916   -82.60361203  1787.83549888 -3327.04938753
   678.13426775  2229.903427    4429.5216472   3337.41450024
  1024.86344191  2899.22890317 -5620.79519126  4144.91343135
  -184.46333685 22192.85643083]
------------------------------------

Run  1658
State: [15  0 10  0 17 26  0  0  0  0 13  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:20921.284766666486 | Update: -374.2157411860753
Weight: [-4466.37269919   -82.60361203  1775.58230524 -3327.04938753
   657.22661518  2197.92845275  4429.5216472   3337.41450024
  1024.86344191  2899.22890317 -5636.78577743  4144.91343135
  -275.35653057 22101.96323711]
------------------------------------

Run  1659
State: [16  0 11  0 18  0  0  0 26  0 13  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:20490.383636351682 | Update: -205.00383636351398
Weight: [-4542.81811951   -82.60361

Alpha:1.0, Action:0, Reward:-0.1 | Max_value:21282.07633563731 | Update: -190.94290706457468
Weight: [-4567.84278977   -85.53400093  1630.2995023  -3327.04938753
   953.63580014  2066.28450675  4929.778763    3329.24943826
  1186.61513474  3055.59017029 -5648.45745839  4144.91343135
  -212.83548595 21527.92676159]
------------------------------------

Run  1714
State: [23  0 22  0  0  0  0  0  0 17  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:21005.636717478985 | Update: -250.825125052168
Weight: [-4627.65220114   -85.53400093  1576.24389406 -3327.04938753
   953.63580014  2066.28450675  4929.778763    3329.24943826
  1186.61513474  3011.75796799 -5648.45745839  4144.91343135
  -212.83548595 21336.98385452]
------------------------------------

Run  1715
State: [23  0 22  0  0  0  0  0  0 16  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:20695.699667897086 | Update: -207.05699667896988
Weight: [-4706.218634     -85.53400093  1501.85439202 -3327.04938753
   953.63580014  206

Run  1777
State: [52  0  0 15  0  0  0  0 23  0  0 20]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:12549.157647123255 | Update: 276.12929558540236
Weight: [-6873.81947116   -85.53400093   984.55850701 -3860.06647519
   953.63580014  1426.40255172  4626.26452051  1833.98721145
   926.67422607  2673.75971279 -5648.45745839  2860.12943585
  -186.28936433 16925.17656081]
------------------------------------

Run  1778
State: [54  0  0 15  0  0  7  0 23  0  0 19]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:13074.35566270513 | Update: -172.40363553270254
Weight: [-6678.27119557   -85.53400093   984.55850701 -3804.16959142
   953.63580014  1426.40255172  4626.26452051  1833.98721145
  1012.67735943  2673.75971279 -5648.45745839  2934.40089506
    89.83993126 17201.30585639]
------------------------------------

Run  1779
State: [55  0  0 15  0  0  8  0 22  0  0 19]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:12679.017953129038 | Update: -128.90658723634624
Weight: [-6805.05925111   -85.53400

Run  1842
State: [ 8  0  0  0 24  9 19  0  0  0 21  6]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:39391.602277990845 | Update: -890.321404197879
Weight: [-3202.26591511   -85.53400093   984.55850701   830.08324023
   517.50873709  1338.28961258 11612.7350042   -689.02487068
  7139.7937071   2673.75971279 -6116.34528082  8334.92162386
  -253.4966593  37966.58265094]
------------------------------------

Run  1843
State: [ 8  0  0  0 24 10 20  0  0  0 21  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:38405.60242672331 | Update: -226.05402296776447
Weight: [-3299.26663631   -85.53400093   984.55850701   830.08324023
   228.38546792  1229.87325419 11382.36142505  -689.02487068
  7139.7937071   2673.75971279 -6369.36582038  8263.07976835
  -253.4966593  37076.26124674]
------------------------------------

Run  1844
State: [ 8  0  0  0 25 10 21  0  0  0 21  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:38282.428908263224 | Update: -212.5243949705182
Weight: [-3323.89527396   -85.53400

Run  1904
State: [21  0  0  0  0  0 26  0 13  9  6  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:32693.150602381564 | Update: -2801.32876272919
Weight: [-4424.28186213   -87.58557503   984.55850701   830.08324023
 -1454.04930515   964.92084091  9298.83011705   321.24807162
  6255.54344755  1751.46331062 -7273.2259727   8263.07976835
  -227.64535038 32644.42037377]
------------------------------------

Run  1905
State: [21  0  0  0  0  0  0 25 12  8  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:29541.445986658004 | Update: -299.84706274104246
Weight: [-5225.44606666   -87.58557503   984.55850701   830.08324023
 -1454.04930515   964.92084091  8306.92602307   321.24807162
  5762.39078081  1411.01794231 -7500.68595407  8263.07976835
 -3028.97411311 29843.09161104]
------------------------------------

Run  1906
State: [21  0  0  0  0  0  0 24 12  8  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:29073.974429868373 | Update: 2639.276344383972
Weight: [-5311.2006331    -87.5855750


Run  1964
State: [47  0 18  9 12 19  0 15  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:20972.720446177333 | Update: -4.800106381677324
Weight: [-7.28470110e+03 -8.75855750e+01  1.80915973e+01  6.92279165e+02
 -1.72271419e+03  4.79448880e+02  6.91163384e+03 -2.42102312e+01
  5.89328952e+03  1.64728225e+03 -7.50068595e+03  8.26307977e+03
 -3.27119094e+02  2.58306110e+04]
------------------------------------

Run  1965
State: [48  0 18  9 13 19  0 14  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:20974.2554155714 | Update: 131.5113218312108
Weight: [-7.28777356e+03 -8.75855750e+01  1.69268242e+01  6.91696154e+02
 -1.72349358e+03  4.78214895e+02  6.91163384e+03 -2.51813010e+01
  5.89328952e+03  1.64728225e+03 -7.50068595e+03  8.26307977e+03
 -3.31919201e+02  2.58258109e+04]
------------------------------------

Run  1966
State: [48  0 18 10 13 19  0 14  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:21196.908615388602 | Update: -204.4734606545535
Weight: [-

Run  2018
State: [13  0  0 23  0  0  0 12  0 21  0 23]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:46224.23405951277 | Update: -479.2605560371594
Weight: [-2516.30220643   -87.58557503  4484.64636147  4500.66099101
  2472.51454711  1020.46789721  9128.15235235  1254.42932192
 10334.08867532  -718.57670825 -7500.68595407  6930.82022373
  -504.39706395 43146.58609845]
------------------------------------

Run  2019
State: [13  0  0 23  0  0  0 11  0 21  0 23]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:45595.31106523217 | Update: 55.87770891495893
Weight: [-2601.15247113   -87.58557503  4484.64636147  4351.90194792
  2472.51454711  1020.46789721  9128.15235235  1176.86512729
 10334.08867532  -854.48057773 -7500.68595407  6782.57558393
  -504.39706395 42667.32554241]
------------------------------------

Run  2020
State: [14  0  0 24  0  0  0 10  0 21  0 23]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:45629.70069571284 | Update: -47.72700050700223
Weight: [-2591.25965118   -87.58557503  


Run  2078
State: [16  0  0  0  0 24  0  0  0  0 14 11]
Alpha:0.5, Action:0, Reward:-0.1 | Max_value:32641.245716872865 | Update: -165.19702961635812
Weight: [-4734.36684872   -94.25360675  4484.64636147  3439.85651964
  2472.51454711  -286.88346541  7627.85300805   565.07685051
 10334.08867532 -1810.41021782 -8302.75403104  5026.34119472
  -396.33650819 34599.3729933 ]
------------------------------------

Run  2079
State: [16  0  0  0  0 25  0  0  0  0 14 11]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:32553.298216010255 | Update: -212.84939889160887
Weight: [-4770.36335726   -94.25360675  4484.64636147  3439.85651964
  2472.51454711  -340.52719536  7627.85300805   565.07685051
 10334.08867532 -1810.41021782 -8334.05227261  5001.90270321
  -396.33650819 34434.17596369]
------------------------------------

Run  2080
State: [16  0  0  0  0 25  0  0  0  0 13 11]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:32221.809227721234 | Update: -394.74514039134374
Weight: [-4816.74333892   -94.

Run  2137
State: [23  0  0  0 19  0  0  0  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:35158.11955346445 | Update: -351.68119553464203
Weight: [-5509.85052353   -90.32237821  4484.64636147  3439.85651964
   994.99754155  1617.83726169  5652.6373454   -238.003143
 12419.00411467 -1810.41021782 -6572.4130743   6631.50433761
  -353.21321305 36628.18051939]
------------------------------------

Run  2138
State: [23  0  0  0 19  0  0  0  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:34672.15202231079 | Update: -70.04595628863171
Weight: [-5620.00829593   -90.32237821  4484.64636147  3439.85651964
   904.58519565  1617.83726169  5652.6373454   -238.003143
 12419.00411467 -1810.41021782 -6572.4130743   6631.50433761
  -353.21321305 36276.49932385]
------------------------------------

Run  2139
State: [24  0  0  0 19  0  0  0  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:34525.46487980122 | Update: 13.164209572591062
Weight: [-5641.94892456   -90.32237821  44

State: [33  0 14 18  0  0 19  0 16 11  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:46504.07317241308 | Update: -408.29351494397997
Weight: [-3405.80084949   -90.28156794  5517.18962211  4769.5167029
  -324.80354481  1776.85866621  4174.23554832   -65.32600228
 14346.41133204  -398.46988525 -6572.4130743   6631.50433761
  -351.30378398 41648.6841341 ]
------------------------------------

Run  2205
State: [33  0 14 18  0  0 20  0 16 11  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:45859.59500794232 | Update: -516.8273790048333
Weight: [-3589.29599692   -90.28156794  5440.13149471  4670.33562137
  -324.80354481  1776.85866621  4068.58826416   -65.32600228
 14257.94736357  -459.11642228 -6572.4130743   6631.50433761
  -351.30378398 41240.39061916]
------------------------------------

Run  2206
State: [33  0 15 18  0  0 21  0 15 10  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:45229.61251158262 | Update: -337.5712665391329
Weight: [-3821.56840056   -90.28156794  5342.58


Run  2266
State: [ 2  0  0  0  9 14  0 24  0  0 19 26]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:56163.789135399464 | Update: -13.901356538917753
Weight: [-3173.11796167   -90.28156794 10226.84850577  1773.41998132
  1051.33576582  4711.62051449  8275.69194681 -3170.87074841
 13440.82578419  -816.22053116  -356.1834582   3732.04656629
  -520.40194188 55015.33404696]
------------------------------------

Run  2267
State: [ 3  0  0  0 10 15  0 23  0  0 19 25]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:56149.63463995778 | Update: -22.718112794427725
Weight: [-3173.49660067   -90.28156794 10226.84850577  1773.41998132
  1049.64289164  4708.98727288  8275.69194681 -3175.37037815
 13440.82578419  -816.22053116  -359.75783597  3727.18574106
  -534.30329842 55001.43269042]
------------------------------------

Run  2268
State: [ 4  0  0  0 10 15  0 22  0  0 18 25]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:56119.121116758106 | Update: -4.481016480072867
Weight: [-3174.42477952   -90.28156

Run  2324
State: [16  0  0  0  0  0  0 20 10 24  0 14]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:46887.43370424991 | Update: -158.7935854123425
Weight: [-3951.78361317   -89.87902682 10226.84850577  1773.41998132
 -1512.17348318  4576.11123475  5728.43376852 -4231.95646472
 11730.8345805  -1909.92886107 -1105.05326616  2013.73072519
  -465.679464   47696.90159638]
------------------------------------

Run  2325
State: [17  0  0  0  0  0  0 19  9 24  0 14]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:46768.864309319426 | Update: -383.6501939036825
Weight: [-3986.38480827   -89.87902682 10226.84850577  1773.41998132
 -1512.17348318  4576.11123475  5728.43376852 -4274.78875574
 11709.33119167 -1961.39066867 -1105.05326616  1983.83284024
  -624.47304941 47538.10801097]
------------------------------------

Run  2326
State: [17  0  0  0  0  0  0 18  9 23  0 14]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:46183.91780127475 | Update: -560.8914986000891
Weight: [-4075.2072086    -89.8790268


Run  2379
State: [17  0  0  9  0 18  0 17  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:33743.671545228695 | Update: -181.5062745814139
Weight: [-7323.35195047   -99.6332755  10226.84850577  1347.69944387
 -1512.17348318  3516.87575212  3988.91152529 -5784.56873338
 11407.06603106 -4126.90603522 -1105.05326616  1028.14190463
  -198.11945529 35589.1926454 ]
------------------------------------

Run  2380
State: [17  0  0  9  0 18  0 15  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:33555.829273263786 | Update: -112.18369590923976
Weight: [-7365.37414475   -99.6332755  10226.84850577  1325.65404276
 -1512.17348318  3472.67093076  3988.91152529 -5826.18363943
 11407.06603106 -4126.90603522 -1105.05326616  1028.14190463
  -198.11945529 35407.68637081]
------------------------------------

Run  2381
State: [18  0  0  9  0 19  0 14  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:33567.52010817495 | Update: -192.5699758304254
Weight: [-7391.34682848   -99.6332

Run  2439
State: [29  0  9 24 14  0  0  9  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:27172.507491011213 | Update: -219.2493915736668
Weight: [-9082.51903076  -105.86194956 10117.50786641    82.55954065
 -1936.96079416  3681.59348411  2921.55357231 -5841.60112379
  9969.53342333 -4126.90603522 -1105.05326616  1028.14190463
  -178.36575148 30528.72983833]
------------------------------------

Run  2440
State: [29  0  9 24 15  0  0  8  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:26960.674173872598 | Update: -190.56341789454018
Weight: [-9.16911039e+03 -1.05861950e+02  1.00909068e+04  1.15472550e+01
 -1.97849361e+03  3.68159348e+03  2.92155357e+03 -5.86821384e+03
  9.96953342e+03 -4.12690604e+03 -1.10505327e+03  1.02814190e+03
 -1.78365751e+02  3.03094804e+04]
------------------------------------

Run  2441
State: [29  0  9 24 15  0  0  7  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:26707.12224228693 | Update: -267.84833741599505
Weight: [-9244.3723

Run  2498
State: [39  0 20  0  0 24  0  0  0 19 14  0]
Alpha:0.3333333333333333, Action:1, Reward:0.0 | Max_value:23347.909860747102 | Update: -77.82636620248982
Weight: [-11196.66056046   -104.05413813   9565.82393335   -157.08181189
  -3831.29124557   3665.827226     1512.69146512  -5654.98106313
   9969.53342333  -4140.29237878  -2087.038485     1028.14190463
    297.86180157  26685.11305487]
------------------------------------

Run  2499
State: [39  0 20  0  0 24  0  0  0 19 14  0]
Alpha:0.25, Action:1, Reward:0.0 | Max_value:23253.956622691483 | Update: -31.776157280090047
Weight: [-11237.99666264   -104.05413813   9544.84059023   -157.08181189
  -3831.29124557   3640.55500057   1512.69146512  -5654.98106313
   9969.53342333  -4160.2597639   -2101.78347415   1028.14190463
    220.03543537  26607.28668867]
------------------------------------

Run  2500
State: [39  0 20  0  0 25  0  0  0 18 14  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:23329.599678938368 | Update: 111.945162

Run  2561
State: [49  0  0  0  0  0  0  0  0  0  0 20]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:14442.374079673915 | Update: -170.67297535304715
Weight: [-12938.80029563   -104.00374442   8041.97592364   -157.08181189
  -3831.29124557   4110.86804643    -92.99885269  -5461.14657652
   8381.10474158  -4278.07390098  -2406.47069263    311.76356174
   -149.96142295  23169.05082287]
------------------------------------

Run  2562
State: [50  0  0  0  0  0  0  0  0  0  0 20]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:14181.798942039964 | Update: -141.91798942040077
Weight: [-13052.69381198   -104.00374442   8041.97592364   -157.08181189
  -3831.29124557   4110.86804643    -92.99885269  -5461.14657652
   8381.10474158  -4278.07390098  -2406.47069263    265.85705661
   -320.6343983   22998.37784752]
------------------------------------

Run  2563
State: [50  0  0  0  0  0  0  0  0  0  0 20]
Alpha:0.5, Action:0, Reward:-0.1 | Max_value:13953.148282554754 | Update: -75.14631200176154
Weigh


Run  2625
State: [16  0  0 16 21  0 11  0 20  0  0  8]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:68428.03324491647 | Update: -160.8553648697707
Weight: [-5840.94654686  -104.00374442  8041.97592364 -1780.71163852
 -2134.45090844  8974.78931285 13055.4875047  -5546.85157439
  5942.73419491 -4278.07390098 -2406.47069263 10512.01978242
   374.60852043 65472.00339581]
------------------------------------

Run  2626
State: [16  0  0 16 22  0 12  0 20  0  0  8]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:68250.22543663475 | Update: -653.112676838864
Weight: [-5875.99700466  -104.00374442  8041.97592364 -1815.44440573
 -2180.15760508  8974.78931285 13031.39064274 -5546.85157439
  5899.1690163  -4278.07390098 -2406.47069263 10494.71345113
   374.60852043 65311.14803094]
------------------------------------

Run  2627
State: [17  0  0 17 22  0 14  0 19  0  0  7]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:67026.85186149235 | Update: -670.3685186149232
Weight: [-6018.31055655  -104.00374442


Run  2690
State: [34  0  0  0  0 13 26 26  0  8 20  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:46119.94732919385 | Update: -3615.0016784121544
Weight: [-10305.85628219   -108.64117497   8041.97592364  -3943.74377488
  -4377.24514891   8441.84385796  10317.50146324  -5448.80861036
   4250.39394776  -5076.47707114  -2829.60754353  10098.99463846
   -273.45433374  52406.00113809]
------------------------------------

Run  2691
State: [34  0  0  0  0 14  0 25  0  8 19  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:40978.84257737018 | Update: -319.3053696506904
Weight: [-11979.7411459    -108.64117497   8041.97592364  -3943.74377488
  -4377.24514891   7805.98994622   9037.48927321  -6716.43188725
   4250.39394776  -5466.99351914  -3808.03395478  10098.99463846
  -3888.45601215  48790.99945967]
------------------------------------

Run  2692
State: [34  0  0  0  0 14  0 24  0  8 19  0]
Alpha:0.5, Action:0, Reward:-0.1 | Max_value:40791.057441337005 | Update: -68.44429467766895
Weight:

   -519.99009782  42572.28122943]
------------------------------------

Run  20
State: [10  0  0  0  7 11 21  0  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:43354.718595757586 | Update: 4.431314375637157
Weight: [-14517.8078145    -108.64117497   7203.18452067  -3943.74377488
  -4421.44717761   7086.94224747   8495.97387549  -6232.12641625
   2659.23402269  -5135.75971273  -4224.76173547  10098.99463846
   -519.99009782  42348.12399428]
------------------------------------

Run  21
State: [11  0  0  0  7 11 22  0  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:43415.658154099234 | Update: -378.35635564154654
Weight: [-14517.2043236    -108.64117497   7203.18452067  -3943.74377488
  -4421.02746154   7087.60177168   8497.24118913  -6232.12641625
   2659.23402269  -5135.75971273  -4224.76173547  10098.99463846
   -515.55878345  42352.55530865]
------------------------------------

Run  22
State: [11  0  0  0  8 11 23  0  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | 


Run  81
State: [ 2  0 17 21  0  0  0 21 10 25  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:31844.054048227157 | Update: -177.04408233935828
Weight: [-15056.00742027   -102.23950327   6045.10631082  -5147.71721021
  -6395.26671074   6380.43237305   5945.6219049   -5405.41413126
    950.12467415  -5079.84473824  -4224.76173547  10098.99463846
   -152.27135313  35303.27620861]
------------------------------------

Run  82
State: [ 2  0 17 21  0  0  0 20 10 24  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:31668.255467543077 | Update: -255.74630966700715
Weight: [-15060.82966862   -102.23950327   6004.53226925  -5197.89188457
  -6395.26671074   6380.43237305   5945.6219049   -5455.55698697
    926.14985343  -5139.6118493   -4224.76173547  10098.99463846
   -152.27135313  35126.23212627]
------------------------------------

Run  83
State: [ 2  0 17 21  0  0  0 19  9 24  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:31403.492153271738 | Update: -87.85968401904029
Weight: [-

Run  142
State: [12  0  0  0  0 20  0 13  0  0 16 23]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:35499.400279822905 | Update: -117.35580239701812
Weight: [-14764.00849861   -100.77012481   4045.67078019  -5358.7943131
  -6395.26671074   6618.60050954   4417.01682465  -3981.20770768
    838.50862896  -5711.85080624  -3766.61446924   9404.74201183
    184.04451791  34487.96711784]
------------------------------------

Run  143
State: [12  0  0  0  0 20  0 12  0  0 16 23]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:35493.348815596306 | Update: -27.896215481050604
Weight: [-14783.18741099   -100.77012481   4045.67078019  -5358.7943131
  -6395.26671074   6586.84346672   4417.01682465  -4001.78348607
    838.50862896  -5711.85080624  -3792.02502819   9368.44157267
    184.04451791  34370.61131544]
------------------------------------

Run  144
State: [12  0  0  0  0 21  0 11  0  0 16 23]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:35617.37877559392 | Update: -12.766428446375357
Weight: [

Run  200
State: [30  0  0  0 14  0  0 11  0  0  0 12]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:19965.463970271947 | Update: 440.4410585125588
Weight: [-16617.89309983    -97.21289487   4045.67078019  -5358.7943131
  -6509.29684299   5363.15658728   3069.98732097  -3449.81994086
   -964.65743076  -5711.85080624  -5000.76786295   7427.25079364
   -367.25331885  27028.30860979]
------------------------------------

Run  201
State: [29  0  0  0 14  0  0 10  0  0  0 12]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:20814.45974568928 | Update: 20.06421386418515
Weight: [-16437.94502101    -97.21289487   4045.67078019  -5358.7943131
  -6425.86327368   5363.15658728   3069.98732097  -3384.47848044
   -964.65743076  -5711.85080624  -5000.76786295   7498.33097375
     73.18773966  27468.7496683 ]
------------------------------------

Run  202
State: [28  0  0  0 15  0  0  8  0  0  0 12]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:21082.95266536682 | Update: 106.079243291686
Weight: [-16430.020

   -394.25736732  24843.60630806]
------------------------------------

Run  255
State: [49  0  0 11  0 22  0  8  0 20  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:12703.26378404013 | Update: 197.9358749379844
Weight: [-17288.44333042    -97.21289487   4045.67078019  -5043.38970594
  -7825.30328494   5469.9457472    2325.56559588  -2734.7970178
   -964.65743076  -4655.01446975  -5000.76786295   7090.90100369
   -324.96851278  24912.8951626 ]
------------------------------------

Run  256
State: [49  0  0 11  0 22  0  8  0 20  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:13139.386954482625 | Update: -20.09142203603915
Weight: [-17156.35673169    -97.21289487   4045.67078019  -5014.0063732
  -7825.30328494   5528.8643837    2325.56559588  -2713.44087303
   -964.65743076  -4601.55860604  -5000.76786295   7090.90100369
   -127.03263784  25110.83103754]
------------------------------------

Run  257
State: [49  0  0 11  0 23  0  7  0 20  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | 


Run  312
State: [11  0  9 25  0  0  0  0  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:129908.10658245736 | Update: 1276.971941459502
Weight: [-4.53163883e+03 -9.72128949e+01  5.43313991e+03  2.07610757e+04
 -7.82530328e+03  4.66105163e+03  1.66816643e+04  1.17641291e+04
  1.72280374e+04  5.14458226e+03 -5.00076786e+03  7.09090100e+03
 -2.50280955e+03  1.22850097e+05]
------------------------------------

Run  313
State: [11  0 10 25  0  0  0  0  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:131606.85909825814 | Update: -1089.2867437672103
Weight: [-4.34034005e+03 -9.72128949e+01  5.58807215e+03  2.11919050e+04
 -7.82530328e+03  4.66105163e+03  1.66816643e+04  1.17641291e+04
  1.72280374e+04  5.14458226e+03 -5.00076786e+03  7.09090100e+03
 -1.22583761e+03  1.24127068e+05]
------------------------------------

Run  314
State: [12  0 10 26  0  0  0  0  0  0  0  0]
Alpha:0.3333333333333333, Action:1, Reward:0.0 | Max_value:130280.8251523966 | Update: -46.10103829570


Run  351
State: [32  1 18  0 12 18  0 17  0  0 17  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:102675.22834343616 | Update: -32.33544526254991
Weight: [-8.04040015e+03 -9.72128949e+01  2.85920672e+03  1.68988927e+04
 -7.91015984e+03  4.37024788e+03  1.47683812e+04  1.03613671e+04
  1.72280374e+04  5.14458226e+03 -9.68528005e+03  7.09090100e+03
 -1.16308403e+03  1.05728024e+05]
------------------------------------

Run  352
State: [31  0 18  0 12 18  0 15  0  0 17  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:102791.89310576385 | Update: 330.3356879180501
Weight: [-8.05449197e+03 -9.76504270e+01  2.85136034e+03  1.68988927e+04
 -7.91541015e+03  4.36237277e+03  1.47683812e+04  1.03539534e+04
  1.72280374e+04  5.14458226e+03 -9.69271909e+03  7.09090100e+03
 -1.19541947e+03  1.05695688e+05]
------------------------------------

Run  353
State: [29  0 18  0 13 19  0 14  0  0 16  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:103283.31194769725 | Update: -140.3866221004282
Weight: [-7.

Run  407
State: [20  0  0  0  0  0  0 12  0 21  0 23]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:96172.41460952445 | Update: 250.42046265186218
Weight: [-11364.78637349   -111.76988731   3113.69323453  16898.89266624
  -7431.83509328   3932.68663447  16167.9819738    7454.38033461
  14867.94904981   2505.79759549 -10186.91534481   5355.11357559
  -1467.45472536  95949.77777302]
------------------------------------

Run  408
State: [21  0  0  0  0  0  0 11  0 21  0 23]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:96237.52372115212 | Update: -0.26877343974774703
Weight: [-11296.57795637   -111.76988731   3113.69323453  16898.89266624
  -7431.83509328   3932.68663447  16167.9819738    7494.90873041
  14867.94904981   2576.80930184 -10186.91534481   5432.5735101
  -1217.03426271  96200.19823567]
------------------------------------

Run  409
State: [22  0  0  0  0  0  0 10  0 21  0 23]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:96136.09754479036 | Update: 154.86039446829818
Weight: [-1129

Alpha:1.0, Action:1, Reward:0.0 | Max_value:71258.74692211307 | Update: -991.1909075728909
Weight: [-21556.13951398   -111.76988731   3113.69323453  15443.334427
  -7431.83509328   1172.73296116  14205.52575718   2518.88337435
  14285.34556497   -182.93739541 -10186.91534481    639.23161482
   2616.21249137  75187.42466172]
------------------------------------

Run  472
State: [49  0  0 14  0  0  0  0 25  0  0 10]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:68177.13299087518 | Update: -1179.6902651286364
Weight: [-22204.08237438   -111.76988731   3113.69323453  15269.4404602
  -7431.83509328   1172.73296116  14205.52575718   2518.88337435
  13936.36229258   -182.93739541 -10186.91534481    505.929578
   1625.0215838   74196.23375414]
------------------------------------

Run  473
State: [50  0  0 14  0  0  0  0 24  0  0  9]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:65090.58476506936 | Update: -205.67452897738985
Weight: [-22991.31346486   -111.76988731   3113.69323453  15046.5559517

Run  531
State: [20  0  0  0 21  0 10  0  0  0 24  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:126559.90875050821 | Update: -555.0908985183632
Weight: [-1.03115410e+04 -1.11769887e+02  3.11369323e+03  2.42410530e+04
 -6.79426588e+03  1.17273296e+03  3.15509716e+04  3.16490217e+03
  2.93888900e+04 -1.82937395e+02 -1.04204147e+04  3.22163004e+03
  1.39910296e+02  1.29676122e+05]
------------------------------------

Run  532
State: [20  0  0  0 21  0 11  0  0  0 23  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:126043.47280063808 | Update: -1068.3404050971294
Weight: [-1.04627342e+04 -1.11769887e+02  3.11369323e+03  2.42410530e+04
 -6.95199373e+03  1.17273296e+03  3.14753761e+04  3.16490217e+03
  2.93888900e+04 -1.82937395e+02 -1.06007019e+04  3.22163004e+03
  1.39910296e+02  1.29121032e+05]
------------------------------------

Run  533
State: [21  0  0  0 22  0 12  0  0  0 23  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:125053.65468926693 | Update: -728.9615988177393
Weight: [

Run  570
State: [47  0 10  0  0 26  0  0  0 18 13  0]
Alpha:1.0, Action:1, Reward:-1 | Max_value:108113.22317885366 | Update: 10554.620493213966
Weight: [-1.78102684e+04 -1.11769887e+02  2.12455515e+03  2.42410530e+04
 -1.07479843e+04 -2.04963551e+03  2.81922735e+04  3.29387142e+03
  2.93888900e+04 -3.29548181e+03 -1.45880475e+04  3.22163004e+03
 -1.48893715e+03  1.13167510e+05]
------------------------------------

Run  571
State: [48  1 10  0  0 26  0  0 26 17 13  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:134027.57817089607 | Update: -1876.079488362593
Weight: [-1.10544395e+04 -1.11769887e+02  3.54740986e+03  2.42410530e+04
 -1.07479843e+04  1.66333468e+03  2.81922735e+04  3.29387142e+03
  2.93888900e+04 -7.30076628e+02 -1.27312025e+04  3.22163004e+03
  9.06568334e+03  1.23722130e+05]
------------------------------------

Run  572
State: [48  0 11  0  0  0  0  0 26 17 13  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:129184.62356810924 | Update: -1153.9476016800472
Weight: [-12

Run  625
State: [40  0 21  0  0  0  0  0  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:105291.9138420781 | Update: 0.6317342050169827
Weight: [-1.28770073e+04 -1.54716132e+01 -1.45511445e+01  2.42410530e+04
 -1.07479843e+04  1.00335574e+03  2.40883480e+04 -3.25982225e+03
  3.03002773e+04  2.92824035e+02 -1.16271756e+04  3.22163004e+03
 -1.22891999e+03  1.12486167e+05]
------------------------------------

Run  626
State: [41  0 21  0  0  0  0  0  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:105117.23018587203 | Update: 1.5577721646113787
Weight: [-1.28766632e+04 -1.54716132e+01 -1.43723017e+01  2.42410530e+04
 -1.07479843e+04  1.00335574e+03  2.40883480e+04 -3.25982225e+03
  3.03002773e+04  2.92824035e+02 -1.16271756e+04  3.22163004e+03
 -1.22828825e+03  1.12486799e+05]
------------------------------------

Run  627
State: [42  0 22  0  0  0  0  0  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:104944.06369363924 | Update: 1.9372618505731225
Weight: [-1.2

Run  684
State: [ 8  0  0 15 20  0  7  0 22  0  0 19]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:234509.12382995983 | Update: 441.1860364754393
Weight: [-5.44266135e+03 -2.84156891e+01 -4.75341731e+03  3.74632281e+04
  6.15517005e+03  2.71654192e+04  2.00663841e+04  1.30374080e+04
  3.02109363e+04  2.92824035e+02 -1.16271756e+04  3.36934563e+04
  2.43897895e+03  2.03543522e+05]
------------------------------------

Run  685
State: [ 7  0  0 15 20  0  8  0 22  0  0 19]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:235879.7883179355 | Update: -1556.8363068876497
Weight: [-5.39459403e+03 -2.84156891e+01 -4.75341731e+03  3.75525374e+04
  6.27456246e+03  2.71654192e+04  2.01084425e+04  1.30374080e+04
  3.03423735e+04  2.92824035e+02 -1.16271756e+04  3.38061903e+04
  2.43897895e+03  2.03984708e+05]
------------------------------------

Run  686
State: [ 6  0  0 16 21  0 10  0 21  0  0 19]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:232656.60780849453 | Update: -1979.53827905652
Weight: [-5.5


Run  735
State: [20  0  0  0  0  0  0  0  0 15 25  9]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:137195.95665943052 | Update: -1372.059566594311
Weight: [-1.32768563e+04 -2.84156891e+01 -4.75341731e+03  2.15741208e+04
 -1.73503575e+03  2.71654192e+04  1.38476667e+04  6.81875729e+03
  2.32551847e+04 -1.16921292e+04 -1.68336982e+04  2.30214557e+04
 -1.39306965e+03  1.46089219e+05]
------------------------------------

Run  736
State: [20  0  0  0  0  0  0  0  0 15 25  9]
Alpha:0.5, Action:0, Reward:-0.1 | Max_value:137042.85665904536 | Update: 91.83083852859272
Weight: [-1.36505718e+04 -2.84156891e+01 -4.75341731e+03  2.15741208e+04
 -1.73503575e+03  2.71654192e+04  1.38476667e+04  6.81875729e+03
  2.32551847e+04 -1.19700398e+04 -1.72978955e+04  2.28553843e+04
 -1.39306965e+03  1.44717159e+05]
------------------------------------

Run  737
State: [20  0  0  0  0  0  7  0  0 15 24  9]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:137199.14962935157 | Update: -1329.6456392338441
Weight: 

Run  788
State: [23  0  0  0  0  0  7  0 23  0 11  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:110988.48843743475 | Update: -397.0726866231562
Weight: [-2.31683623e+04 -2.84156891e+01 -4.75341731e+03  2.15741208e+04
 -1.73503575e+03  2.24332298e+04  1.04010098e+04  3.90650595e+03
  1.80490741e+04 -1.40541290e+04 -2.41467924e+04  2.20692649e+04
 -1.13110545e+03  1.15645291e+05]
------------------------------------

Run  789
State: [24  0  0  0  0  0  8  0 22  0 11  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:110092.52483362275 | Update: 18.78150453271519
Weight: [-2.32927381e+04 -2.84156891e+01 -4.75341731e+03  2.15741208e+04
 -1.73503575e+03  2.24332298e+04  1.03631568e+04  3.90650595e+03
  1.79254020e+04 -1.40541290e+04 -2.42059012e+04  2.20692649e+04
 -1.52817814e+03  1.15248219e+05]
------------------------------------

Run  790
State: [26  0  0  0  0  0  9  0 21  0 10  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:110256.85982364813 | Update: -961.5091120819561
Weight: [-2.


Run  843
State: [36  0 12  0 20  0  8  0  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:79766.47166052733 | Update: -625.6405662162433
Weight: [-2.96769688e+04 -2.55488728e+01 -5.94250170e+03  2.15741208e+04
 -4.54660997e+03  2.24332298e+04  8.57811185e+03  1.70704882e+03
  1.55534927e+04 -1.40541290e+04 -2.48600278e+04  2.20692649e+04
 -1.19914770e+03  9.54013903e+04]
------------------------------------

Run  844
State: [36  0 12  0 21  0 10  0  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:78951.10016351574 | Update: -755.1919610087207
Weight: [-2.99837054e+04 -2.55488728e+01 -6.04371185e+03  2.15741208e+04
 -4.71591891e+03  2.24332298e+04  8.50994909e+03  1.70704882e+03
  1.55534927e+04 -1.40541290e+04 -2.48600278e+04  2.20692649e+04
 -1.19914770e+03  9.47757498e+04]
------------------------------------

Run  845
State: [36  0 13  0 21  0 11  0  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:77916.5911610189 | Update: 419.98178394991555
Weight: [-3.0

Run  901
State: [ 2  0 24 16  0  0 12  0 20 13  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:224868.09806520777 | Update: -2248.680980652076
Weight: [-1.48509265e+04 -2.55488728e+01  1.76962117e+04  3.85524539e+04
 -6.71376756e+03  1.87888012e+04  1.77445976e+04  3.91636989e+02
  3.81717858e+04  2.22766470e+02 -2.48600278e+04  2.20692649e+04
  3.05900732e+04  1.67355405e+05]
------------------------------------

Run  902
State: [ 2  0 24 16  0  0 12  0 20 13  0  0]
Alpha:0.5, Action:1, Reward:0.0 | Max_value:220212.90808212626 | Update: -861.8820923912281
Weight: [-1.49121750e+04 -2.55488728e+01  1.69686714e+04  3.80669064e+04
 -6.71376756e+03  1.87888012e+04  1.73771111e+04  3.91636989e+02
  3.75627655e+04 -1.71974350e+02 -2.48600278e+04  2.20692649e+04
  2.83413923e+04  1.65106724e+05]
------------------------------------

Run  903
State: [ 2  0 24 17  0  0 14  0 19 13  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:218481.21367030044 | Update: -1945.7132769337622
Weight: [-1


Run  952
State: [25  0  0  0  0  0 10  0  0  0 23 19]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:184434.67437376117 | Update: -1731.6556903306919
Weight: [-2.00787350e+04 -2.55488728e+01  3.90356787e+04  3.81518285e+04
 -6.71376756e+03  1.87888012e+04  3.16842627e+04 -1.66987234e+04
  5.25258130e+04  1.10540174e+04 -2.82253782e+04  3.36510424e+03
 -1.29899305e+03  1.94768575e+05]
------------------------------------

Run  953
State: [26  0  0  0  0  0 11  0  0  0 23 18]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:182612.33504196096 | Update: -1397.9398924578854
Weight: [-2.06683109e+04 -2.55488728e+01  3.90356787e+04  3.81518285e+04
 -6.71376756e+03  1.87888012e+04  3.14484356e+04 -1.66987234e+04
  5.25258130e+04  1.10540174e+04 -2.87643661e+04  2.92262324e+03
 -1.29899305e+03  1.93036919e+05]
------------------------------------

Run  954
State: [26  0  0  0  0  0 12  0  0  0 23 18]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:182738.28008813286 | Update: 123.25058818783145
Weight

State: [29  0  0  0 21  0 11  0 20  0 10  8]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:162258.6307816429 | Update: 909.2769575690618
Weight: [-2.90568670e+04 -3.67280839e+01  3.90356787e+04  3.81518285e+04
 -1.05864870e+04  1.70941032e+04  2.36693347e+04 -1.89049164e+04
  4.91408426e+04  1.10540174e+04 -3.55674735e+04 -2.50630681e+03
 -2.74848340e+03  1.65187371e+05]
------------------------------------

Run  1008
State: [30  0  0  0 22  0 12  0 20  0 10  8]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:163831.8724049295 | Update: 525.0858039213927
Weight: [-2.86977530e+04 -3.67280839e+01  3.90356787e+04  3.81518285e+04
 -1.03281180e+04  1.70941032e+04  2.38055485e+04 -1.89049164e+04
  4.93871062e+04  1.10540174e+04 -3.54444223e+04 -2.40847825e+03
 -1.83920644e+03  1.66096648e+05]
------------------------------------

Run  1009
State: [30  0  0  0 22  0 13  0 20  0 10  8]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:164400.1607822098 | Update: -1804.4800142179593
Weight: [-2.84832222e+

Run  1070
State: [46  0  0  0  0 11 21  0  0 10  0  0]
Alpha:0.5, Action:1, Reward:0.0 | Max_value:115466.88263137813 | Update: -135.54793398124457
Weight: [-4.03616880e+04 -4.97375593e+01  3.90356787e+04  3.81518285e+04
 -1.54811493e+04  1.70492284e+04  1.91163261e+04 -2.19694705e+04
  4.49326758e+04  2.56693258e+03 -3.69220795e+04 -2.81858530e+03
 -9.12574908e+02  1.32429832e+05]
------------------------------------

Run  1071
State: [47  0  0  0  0 11 23  0  0 10  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:115715.16675543164 | Update: -701.4499119851098
Weight: [-4.04466039e+04 -4.97375593e+01  3.90356787e+04  3.81518285e+04
 -1.54811493e+04  1.70290544e+04  1.90775607e+04 -2.19694705e+04
  4.49326758e+04  2.54862909e+03 -3.69220795e+04 -2.81858530e+03
 -1.04812284e+03  1.32294284e+05]
------------------------------------

Run  1072
State: [47  0  0  0  0 12 24  0  0  9  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:114882.87194374451 | Update: -892.1113653746143
Weight

Run  1120
State: [ 2  0 14 18  0  0 18  0 17  0  0  0]
Alpha:0.5, Action:0, Reward:-0.1 | Max_value:361640.55167829926 | Update: -2749.4838288481114
Weight: [-1.10871042e+04 -2.10963614e+01  6.48136539e+04  7.02792616e+04
 -1.54811493e+04  6.63105154e+04  1.47591351e+04 -2.24502527e+04
  6.10426276e+04  1.73680054e+03 -3.69220795e+04 -2.81858530e+03
 -3.41887414e+03  3.16850054e+05]
------------------------------------

Run  1121
State: [ 2  0 14 18  6  0 19  0 16  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:359358.2259215223 | Update: -2552.766672357626
Weight: [-1.11619934e+04 -2.10963614e+01  6.42947378e+04  6.96113677e+04
 -1.54811493e+04  6.63105154e+04  1.40851413e+04 -2.24502527e+04
  6.04096709e+04  1.73680054e+03 -3.69220795e+04 -2.81858530e+03
 -3.41887414e+03  3.14100570e+05]
------------------------------------

Run  1122
State: [ 2  0 15 18  7  0 21  0 16  0  0  0]
Alpha:0.5, Action:1, Reward:0.0 | Max_value:356347.63046832895 | Update: -12.05137241462944
Weight:


Run  1166
State: [18  0 23  0 20  0  7  0  0  0 24  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:211869.30427375686 | Update: -2052.9952165239665
Weight: [-2.13564615e+04 -2.10963614e+01  4.24065154e+04  4.48295292e+04
 -3.09726195e+04  6.63105154e+04  4.83147005e+03 -2.62174949e+04
  5.33168936e+04  1.73680054e+03 -4.67399020e+04 -2.81858530e+03
 -7.26012341e+02  2.26991945e+05]
------------------------------------

Run  1167
State: [18  0 23  0 20  0  8  0  0  0 24  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:208743.86276482948 | Update: -2451.0093149877794
Weight: [-2.18597286e+04 -2.10963614e+01  4.17699637e+04  4.48295292e+04
 -3.15281948e+04  6.63105154e+04  4.63575770e+03 -2.62174949e+04
  5.33168936e+04  1.73680054e+03 -4.74066916e+04 -2.81858530e+03
 -7.26012341e+02  2.24938950e+05]
------------------------------------

Run  1168
State: [18  0 23  0 21  0  9  0  0  0 24  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:205489.67657076623 | Update: -1269.3885581911309
Wei


Run  1222
State: [19  0  0  0  0  0  7  0 22 15 11 19]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:180258.31962470931 | Update: -44.54453797711176
Weight: [-3.20422869e+04  1.27727209e+02  3.31848872e+04  4.48295292e+04
 -3.98666100e+04  5.98858083e+04 -2.31365899e+03 -2.32506709e+04
  5.05197160e+04 -6.68311077e+01 -5.66090911e+04 -9.01541895e+03
 -2.22502181e+03  1.84930408e+05]
------------------------------------

Run  1223
State: [20  0  0  0  0  0  8  0 22 14 11 19]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:180219.71344346536 | Update: -1783.2019128036045
Weight: [-3.20538131e+04  1.27727209e+02  3.31848872e+04  4.48295292e+04
 -3.98666100e+04  5.98858083e+04 -2.31790543e+03 -2.32506709e+04
  5.05064453e+04 -7.58536007e+01 -5.66157220e+04 -9.02680119e+03
 -2.26956635e+03  1.84885864e+05]
------------------------------------

Run  1224
State: [20  0  0  0  0  0 10  0 21 14 10 19]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:176318.92054619375 | Update: -1085.3060929799685
Weigh

Run  1277
State: [28  0  0  0  0  0  8  0  0  0  0  8]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:132986.0746787033 | Update: -812.4751712428115
Weight: [-4.29227328e+04  1.39342392e+02  3.31848872e+04  4.48295292e+04
 -3.98666100e+04  5.98858083e+04 -6.87142594e+03 -2.48925019e+04
  4.51422942e+04 -3.33530312e+03 -5.88203815e+04 -1.56806788e+04
 -1.19551993e+03  1.52467463e+05]
------------------------------------

Run  1278
State: [29  0  0  0  0  0  9  0  0  0  0  8]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:130758.65071948458 | Update: -571.9251304156642
Weight: [-4.32325506e+04  1.39342392e+02  3.31848872e+04  4.48295292e+04
 -3.98666100e+04  5.98858083e+04 -6.95994409e+03 -2.48925019e+04
  4.51422942e+04 -3.33530312e+03 -5.88203815e+04 -1.57680925e+04
 -2.00799510e+03  1.51654987e+05]
------------------------------------

Run  1279
State: [31  0  0  0  0  0 10  0  0  0  0  8]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:129979.45746883332 | Update: 1184.3863539811719
Weight: [-

Run  1342
State: [ 2  0  0 18 24  0 20  0 16  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:219465.05966052908 | Update: -2551.0623949786823
Weight: [-2.37143412e+04  1.39342392e+02  3.31848872e+04  4.96720969e+04
 -3.28535889e+04  5.44994214e+04 -3.34865425e+01 -2.14116792e+04
  4.29606115e+04 -3.33530312e+03 -5.88203815e+04 -1.56436702e+04
  4.12855781e+04  1.68485495e+05]
------------------------------------

Run  1343
State: [ 2  0  0 19 25  0 21  0 15  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:213601.0006602707 | Update: 36588.486945877026
Weight: [-2.37838259e+04  1.39342392e+02  3.31848872e+04  4.90524026e+04
 -3.36820217e+04  5.44994214e+04 -7.28324173e+02 -2.14116792e+04
  4.24078790e+04 -3.33530312e+03 -5.88203815e+04 -1.56436702e+04
  3.87345157e+04  1.65934432e+05]
------------------------------------

Run  1344
State: [ 2  0  0 19 25  0 22  0 15  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:260911.3433238289 | Update: -3151.4465865095844
Weight: 

Run  1403
State: [28  0 16  0  0 14  0 24  0  8 19  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:144320.13219054593 | Update: 9137.732944719173
Weight: [-3.78407116e+04  1.39342392e+02  2.28691282e+04  5.37361419e+04
 -9.33231967e+03  5.10912243e+04  1.44699540e+04 -2.84694251e+04
  5.60508359e+04 -2.36672330e+04 -7.40795471e+04 -1.56436702e+04
 -9.70145466e+03  1.74079101e+05]
------------------------------------

Run  1404
State: [29  0 16  0  0 15  0 23  0  7 19  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:158407.10816671516 | Update: 161.40940494736424
Weight: [-3.43562582e+04  1.39342392e+02  2.48400817e+04  5.37361419e+04
 -9.33231967e+03  5.28221243e+04  1.44699540e+04 -2.55116983e+04
  5.60508359e+04 -2.26801145e+04 -7.17300131e+04 -1.56436702e+04
 -5.63721713e+02  1.83216834e+05]
------------------------------------

Run  1405
State: [30  0 17  0  0 15  0 22  0  7 18  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:159227.10991589093 | Update: -598.4462014297605
Weight: [-


Run  1459
State: [37  0  0  0 10  0  0 22 11  0  0 25]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:109023.65187807349 | Update: -1793.6589336061443
Weight: [-4.99830968e+04  1.37786482e+02  1.17158188e+04  5.37361419e+04
 -1.10980739e+04  4.99587754e+04  5.94993570e+03 -2.84918819e+04
  4.16132494e+04 -2.26611158e+04 -7.59189623e+04 -1.84892483e+04
 -8.37246370e+02  1.45723742e+05]
------------------------------------

Run  1460
State: [39  0  0  0 10  0  0 21 10  0  0 25]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:106565.43051296558 | Update: 1803.2421716379322
Weight: [-5.08869123e+04  1.37786482e+02  1.17158188e+04  5.37361419e+04
 -1.13407711e+04  4.99587754e+04  5.94993570e+03 -2.90240770e+04
  4.13460679e+04 -2.26611158e+04 -7.59189623e+04 -1.90923067e+04
 -2.63090530e+03  1.43930083e+05]
------------------------------------

Run  1461
State: [39  0  0  0 11  0  0 20 10  0  0 25]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:107749.75586514696 | Update: -1788.4328919956024
Weight

------------------------------------

Run  1512
State: [ 2  0  0  7  0 15  0 22  0 25  0 15]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:277140.6365674574 | Update: -1997.280385264312
Weight: [-1.95793742e+04  1.37786482e+02  1.17158188e+04  5.73624630e+04
  5.56267107e+03  6.77966775e+04  2.38927365e+04 -1.81502016e+04
  4.08286848e+04 -9.89269057e+03 -7.59189623e+04  3.94444065e+01
  2.66200285e+04  2.39818296e+05]
------------------------------------

Run  1513
State: [ 2  0  0  8  0 15  0 22  0 25  0 15]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:273869.4059387755 | Update: 23196.13199795634
Weight: [-1.96337752e+04  1.37786482e+02  1.17158188e+04  5.71737851e+04
  5.56267107e+03  6.73913222e+04  2.38927365e+04 -1.87428131e+04
  4.08286848e+04 -1.05669389e+04 -7.59189623e+04 -3.63467194e+02
  2.46227481e+04  2.37821016e+05]
------------------------------------

Run  1514
State: [ 2  0  0  8  0 16  0 21  0 24  0 14]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:303900.2908995842 | 


Run  1571
State: [ 6  0  0 22  0  0  0 16  8  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:264814.99857702415 | Update: -2255.794529505598
Weight: [-2.38425740e+04  1.33196535e+02  1.17158188e+04  4.89033423e+04
  5.56267107e+03  7.76479821e+04  1.65457886e+04  4.60346910e+03
  1.51698428e+04  6.77407243e+03 -7.59189623e+04  1.07331001e+04
 -3.28258245e+03  2.49214721e+05]
------------------------------------

Run  1572
State: [ 6  0  0 23  0  0  0 15  7  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:262159.098304154 | Update: 605.4705019854009
Weight: [-2.40269010e+04  1.33196535e+02  1.17158188e+04  4.82336025e+04
  5.56267107e+03  7.76479821e+04  1.65457886e+04  4.11669454e+03
  1.49254640e+04  6.77407243e+03 -7.59189623e+04  1.07331001e+04
 -3.28258245e+03  2.46958927e+05]
------------------------------------

Run  1573
State: [ 6  0  0 23  0  0  0 14  7  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:262528.9695147407 | Update: -2951.9329584225197
Weight: [

Run  1632
State: [25  0  9  0 15 22  0  8  0  0 15  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:188144.03730170496 | Update: -68.32104145636549
Weight: [-3.52826338e+04  1.32879759e+02  1.11270318e+04  3.26028887e+04
  4.57858752e+03  7.61176536e+04  1.48116796e+04  7.28218061e+02
  1.45816790e+04  6.77407243e+03 -8.36763644e+04  1.07331001e+04
 -1.34243427e+03  1.91656269e+05]
------------------------------------

Run  1633
State: [24  0  9  0 15 22  0  7  0  0 15  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:190629.68167678802 | Update: 668.5563971456431
Weight: [-3.53058950e+04  1.32879759e+02  1.11187426e+04  3.26028887e+04
  4.56472090e+03  7.60973168e+04  1.48116796e+04  7.20846613e+02
  1.45816790e+04  6.77407243e+03 -8.36902331e+04  1.07331001e+04
 -1.41075531e+03  1.91587948e+05]
------------------------------------

Run  1634
State: [23  0  9  0 15 23  0  0  0  0 14  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:191555.30918631563 | Update: -441.1972113864613
Weight: [

Alpha:1.0, Action:0, Reward:-0.1 | Max_value:127850.5235881688 | Update: -1278.605235881696
Weight: [-4.97216934e+04  1.17819926e+02  3.33863248e+03  3.26028887e+04
 -1.34953051e+03  6.80694505e+04  1.53996926e+04 -1.97664632e+03
  2.63872748e+03  2.15803505e+03 -8.83591192e+04  1.07331001e+04
 -1.99776642e+03  1.47372442e+05]
------------------------------------

Run  1691
State: [31  0 21  0  0  0  0  0  0 18  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:125456.56733671944 | Update: 33.756410343397874
Weight: [-5.02614977e+04  1.17819926e+02  2.97666154e+03  3.26028887e+04
 -1.34953051e+03  6.80694505e+04  1.53996926e+04 -1.97664632e+03
  2.63872748e+03  1.84725737e+03 -8.83591192e+04  1.07331001e+04
 -1.99776642e+03  1.46093836e+05]
------------------------------------

Run  1692
State: [32  0 21  0  0  0  0  0  0 17  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:125501.12336804284 | Update: -1255.1112336804363
Weight: [-5.02472464e+04  1.17819926e+02  2.98621792e+03  3.26

Weight: [-38361.73084668    242.63276787   8245.03957714  31709.64641059
  -1349.53050883  68655.98238517  17080.13432222  -4419.97912642
   2638.72747919   5977.1208873  -88359.11921237   7994.40513992
   -811.05599914 166777.78996719]
------------------------------------

Run  1738
State: [40  0  0 11  0 22  0  8  0  0  0 23]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:172780.72413728412 | Update: -845.4298700651852
Weight: [-38765.80310118    242.63276787   8245.03957714  31599.53370967
  -1349.53050883  68445.22361094  17080.13432222  -4510.01420358
   2638.72747919   5977.1208873  -88359.11921237   7764.96563894
   -811.05599914 166036.03429247]
------------------------------------

Run  1739
State: [40  0  0 11  0 23  0  7  0  0  0 22]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:171029.32167720422 | Update: -1371.3964185732184
Weight: [-39226.3519925     242.63276787   8245.03957714  31474.0307043
  -1349.53050883  68193.5684968   17080.13432222  -4601.23123388
   2638.72747919 

Run  1798
State: [ 6  0  0 25 16  0  0  0  0  0  0 11]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:319506.26008481364 | Update: -1049.1126316355076
Weight: [-1.53454699e+04  2.42632768e+02  8.24503958e+03  6.64162670e+04
  1.46162705e+04  5.86964055e+04  3.97552555e+04  1.91312731e+04
  2.37816672e+04  5.97712089e+03 -8.83591192e+04  3.25804592e+04
 -2.35493653e+03  2.90577280e+05]
------------------------------------

Run  1799
State: [ 7  0  0 25 16  0  0  0  0  0  0 11]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:318257.426022055 | Update: -3182.6742602205486
Weight: [-1.54311957e+04  2.42632768e+02  8.24503958e+03  6.60623137e+04
  1.43891444e+04  5.86964055e+04  3.97552555e+04  1.91312731e+04
  2.37816672e+04  5.97712089e+03 -8.83591192e+04  3.24252583e+04
 -3.40404916e+03  2.89528167e+05]
------------------------------------

Run  1800
State: [ 7  0  0 25 16  0  0  0  0  0  0 11]
Alpha:0.5, Action:0, Reward:-0.1 | Max_value:315527.1449165433 | Update: -1046.4774475981249
Weight:

Alpha:1.0, Action:0, Reward:-0.1 | Max_value:236576.642470715 | Update: 5728.621860881802
Weight: [-2.72362048e+04  1.21118032e+02  8.24503958e+03  5.72547959e+04
  6.47728056e+03  4.91956434e+04  3.41122996e+04  1.32329956e+04
  2.37816672e+04 -6.83150964e+03 -8.99625429e+04  3.03991595e+04
 -1.99317267e+03  2.33856063e+05]
------------------------------------

Run  1860
State: [15  0  0  0  0 26  0  0 26 17 13  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:225718.97524426636 | Update: -20272.5771525804
Weight: [-2.61439691e+04  1.21118032e+02  8.24503958e+03  5.72547959e+04
  6.47728056e+03  5.12108937e+04  3.41122996e+04  1.32329956e+04
  2.37816672e+04 -5.43911128e+03 -8.89547225e+04  3.03991595e+04
 -1.99317267e+03  2.39584685e+05]
------------------------------------

Run  1861
State: [15  0  0  0  0  0  0  0 26 17 13  0]
Alpha:0.5, Action:0, Reward:-0.1 | Max_value:201418.295102453 | Update: -493.7972285443975
Weight: [-3.02852852e+04  1.21118032e+02  8.24503958e+03  5.7254795


Run  1910
State: [27  0 10  0  0  0  0  0  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:172896.85808367425 | Update: -1729.0685808367562
Weight: [-4.13780948e+04  8.39512740e+01  7.21119638e+03  5.72547959e+04
  6.47728056e+03  4.40792802e+04  3.14860937e+04  7.53199531e+03
  1.06286055e+04 -1.35839510e+04 -9.47872037e+04  3.03991595e+04
 -1.62336790e+03  1.87139742e+05]
------------------------------------

Run  1911
State: [27  0 10  0  0  0  0  0  0  0  0  0]
Alpha:0.5, Action:0, Reward:-0.1 | Max_value:170902.5817785255 | Update: -854.5629088926362
Weight: [-4.20138855e+04  8.39512740e+01  6.97810289e+03  5.72547959e+04
  6.47728056e+03  4.40792802e+04  3.14860937e+04  7.53199531e+03
  1.06286055e+04 -1.35839510e+04 -9.47872037e+04  3.03991595e+04
 -1.62336790e+03  1.85410674e+05]
------------------------------------

Run  1912
State: [27  0 10  0  0  0  0  0  0  0  0  0]
Alpha:0.3333333333333333, Action:0, Reward:-0.1 | Max_value:169916.9444363451 | Update: -566.42314

State: [44  0 20 12 16 24  0  0  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:151399.40229671355 | Update: -1514.0940229671542
Weight: [-5.40760319e+04  8.39512740e+01  1.88986343e+03  5.52916484e+04
  4.06502281e+03  4.02809620e+04  2.75333843e+04  2.44368755e+03
  1.06286055e+04 -1.35839510e+04 -9.47872037e+04  3.03991595e+04
 -9.08611209e+02  1.60379168e+05]
------------------------------------

Run  1964
State: [44  0 20 12 16 24  0  0  0  0  0  0]
Alpha:0.5, Action:0, Reward:-0.1 | Max_value:149499.60357242948 | Update: -478.36865618619777
Weight: [-5.49833168e+04  8.39512740e+01  1.48163733e+03  5.50464498e+04
  3.73723121e+03  3.97892967e+04  2.75333843e+04  2.44368755e+03
  1.06286055e+04 -1.35839510e+04 -9.47872037e+04  3.03991595e+04
 -9.08611209e+02  1.58865074e+05]
------------------------------------

Run  1965
State: [44  0 20 12 16 25  0  0  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:149536.4077830626 | Update: -686.2373097890522
Weight: [-5.526

Run  2012
State: [ 7  0  0 24  0  0  0 10  0 21  0 23]
Alpha:0.5, Action:1, Reward:0.0 | Max_value:343882.4931108548 | Update: -1899.4946973913466
Weight: [-2.27446672e+04  8.39512740e+01  1.68973536e+04  8.96096868e+04
  1.94382325e+04  3.45026626e+04  4.58879356e+04  2.67049953e+04
  2.47704656e+04  1.57351266e+04 -9.47872037e+04  5.89034694e+04
  1.17402103e+04  2.79363566e+05]
------------------------------------

Run  2013
State: [ 7  0  0 24  0  0  0  9  0 21  0 23]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:338315.0448841923 | Update: -4569.504415884672
Weight: [-2.29257488e+04  8.39512740e+01  1.68973536e+04  8.89944629e+04
  1.94382325e+04  3.45026626e+04  4.58879356e+04  2.64488146e+04
  2.47704656e+04  1.51964870e+04 -9.47872037e+04  5.83159187e+04
  9.84071556e+03  2.77464071e+05]
------------------------------------

Run  2014
State: [ 9  0  0 24  0  0  0  8  0 20  0 23]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:327544.6853152872 | Update: -3527.0546453850693
Weight: [-

Run  2065
State: [29  0  0  0  0 21  0 10  0  0 16 12]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:196068.4753365749 | Update: 2193.1016393270693
Weight: [-3.60809840e+04  9.70781297e+01  1.68973536e+04  6.09524330e+04
  1.94382325e+04  3.14558214e+04  4.07430593e+04  1.89480225e+04
  2.47704656e+04  1.05557139e+03 -9.74517004e+04  3.99557143e+04
 -2.34360700e+03  2.11667696e+05]
------------------------------------

Run  2066
State: [28  0  0  0  0 21  0 10  0  0 15 12]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:199356.8676561116 | Update: -1448.5182656509278
Weight: [-3.52148304e+04  9.70781297e+01  1.68973536e+04  6.09524330e+04
  1.94382325e+04  3.20789584e+04  4.07430593e+04  1.92438013e+04
  2.47704656e+04  1.05557139e+03 -9.69768373e+04  4.03096460e+04
 -1.50505358e+02  2.13860798e+05]
------------------------------------

Run  2067
State: [27  0  0  0  0 22  0  8  0  0 15 12]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:197694.4430829908 | Update: -147.7175984129426
Weight: [-3

Run  2120
State: [30  0  0  0 16  0  0  0  0  0  0  0]
Alpha:0.5, Action:1, Reward:0.0 | Max_value:150845.37613906036 | Update: 376.0576924449706
Weight: [-5.02893647e+04  8.10391962e+01  1.68973536e+04  6.09524330e+04
  1.56525007e+04  2.89082708e+04  3.74453231e+04  1.18635254e+04
  1.43781919e+04  1.05557139e+03 -1.00772975e+05  3.77622490e+04
 -2.94544887e+03  1.68687982e+05]
------------------------------------

Run  2121
State: [31  0  0  0 16  0  0  0  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:150621.13765689518 | Update: 380.3925126498507
Weight: [-5.01357213e+04  8.10391962e+01  1.68973536e+04  6.09524330e+04
  1.57339148e+04  2.89082708e+04  3.74453231e+04  1.18635254e+04
  1.43781919e+04  1.05557139e+03 -1.00772975e+05  3.77622490e+04
 -2.56939118e+03  1.69064040e+05]
------------------------------------

Run  2122
State: [32  0  0  0 16  0  0  0  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:151303.35387323087 | Update: -1299.12604197755
Weight: [-4

Run  2174
State: [53  0  9 11  0 23  0  7  0 20  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:118495.29651842956 | Update: -1582.7334238012263
Weight: [-6.37971974e+04  8.10391962e+01  1.54125846e+04  5.90356695e+04
  9.32871063e+03  2.48683258e+04  3.23268681e+04  1.32419978e+04
  1.43781919e+04 -4.12910637e+03 -1.00772975e+05  3.77622490e+04
 -1.90522459e+03  1.46433824e+05]
------------------------------------

Run  2175
State: [53  0  9 12  0 23  0  0  0 19  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:116298.68344530651 | Update: -628.0902742219332
Weight: [-6.49396069e+04  8.10391962e+01  1.52205549e+04  5.88007147e+04
  9.32871063e+03  2.43757862e+04  3.23268681e+04  1.30925759e+04
  1.43781919e+04 -4.55654977e+03 -1.00772975e+05  3.77622490e+04
 -1.90522459e+03  1.44851091e+05]
------------------------------------

Run  2176
State: [53  0 10 12  0 24  0  0  0 19  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:114712.10369964162 | Update: 258.4015149172483
Weight:

Alpha:1.0, Action:0, Reward:-0.1 | Max_value:476340.74244309764 | Update: -4692.933373626845
Weight: [ -16052.87644448    -474.95862444   63798.32214078  118678.15537937
    9328.7106301    22143.208263     85164.8665974    -5232.83079111
   84030.87259496   42356.51424874 -100772.97453988   37762.24902261
   -4222.80617638  417482.68658037]
------------------------------------

Run  2216
State: [ 2  0 18 23  0  0  0 14  7  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:469645.91676236794 | Update: -5745.079716634878
Weight: [ -16180.70048636    -474.95862444   62659.55526249  117221.50248119
    9328.7106301    22143.208263     85164.8665974    -6182.21936744
   83586.01974041   42356.51424874 -100772.97453988   37762.24902261
   -4222.80617638  412789.75320674]
------------------------------------

Run  2217
State: [ 2  0 18 23  0  0  0 13  6  0  0  0]
Alpha:0.5, Action:1, Reward:0.0 | Max_value:456947.13094616414 | Update: -3084.944921160437
Weight: [ -16337.18242458    -474.


Run  2259
State: [20  0  0  0  9 13  0 25  0  0 19 26]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:282766.4997396768 | Update: -2643.481675251096
Weight: [ -32647.60029965    -474.95862444   31860.46078814   91224.46211377
    3891.07051649   13612.71682568   67295.79996979  -23291.84563531
   82790.77892598   42356.51424874 -117476.5605674    22326.40258342
    4610.22308146  314248.89028145]
------------------------------------

Run  2260
State: [20  0  0  0  9 14  0 25  0  0 19 26]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:276683.7989014348 | Update: -474.0437361021759
Weight: [ -33367.62013681    -474.95862444   31860.46078814   91224.46211377
    3569.1535968    13147.74659514   67295.79996979  -24183.14719605
   82790.77892598   42356.51424874 -118156.26417278   21402.06815786
    1966.74140621  311605.4086062 ]
------------------------------------

Run  2261
State: [20  0  0  0  9 14  0 24  0  0 19 26]
Alpha:1.0, Action:0, Reward:-1 | Max_value:276555.58385970816 | Update: -

Run  2318
State: [30  0  0  0  0  0  0 18  9 23  0 14]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:243588.83845019955 | Update: 704.0133599041728
Weight: [ -48733.80195902    -490.18817015   31860.46078814   91224.46211377
  -10710.61634141   10147.72591075   59424.26812469  -33094.88049772
   68705.74153527   35220.34345342 -125864.33691934    6313.74754595
   -4287.64733036  252180.68989312]
------------------------------------

Run  2319
State: [31  0  0  0  0  0  0 17  8 23  0 14]
Alpha:1.0, Action:1, Reward:-1 | Max_value:244246.56461515155 | Update: 831.1022001641686
Weight: [ -48446.16789511    -490.18817015   31860.46078814   91224.46211377
  -10710.61634141   10147.72591075   59424.26812469  -32923.97274644
   68791.54352583   35438.99350139 -125864.33691934    6446.30019524
   -3583.63397046  252884.70325303]
------------------------------------

Run  2320
State: [32  1  0  0  0  0  0 16  8 23  0 13]
Alpha:1.0, Action:0, Reward:-1 | Max_value:245552.6685193677 | Update: -2292

Run  2374
State: [42  0  0  9  0 19  0 15  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:169933.08077461165 | Update: -30.68443758614012
Weight: [ -70619.03932868    -545.29044895   31860.46078814   88916.24408166
  -10710.61634141    5227.33403567   48896.4332809   -34759.31810602
   67671.89588479   26098.02813962 -125864.33691934    2542.21489629
   -2160.55276869  203546.08802595]
------------------------------------

Run  2375
State: [42  0  0 10  0 19  0 14  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:169466.1657429706 | Update: 43.40245598476031
Weight: [ -70636.59048037    -545.29044895   31860.46078814   88912.51720921
  -10710.61634141    5219.44584753   48896.4332809   -34765.52562106
   67671.89588479   26098.02813962 -125864.33691934    2542.21489629
   -2160.55276869  203515.40358836]
------------------------------------

Run  2376
State: [43  0  0 10  0 20  0 13  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:169036.4714877336 | Update: -6

Run  2423
State: [ 2  0  7 22 12  0  0 17  8  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:452721.95716558094 | Update: 44325.99619296956
Weight: [ -28971.6830767     -545.29044895   40275.54161468  130596.91174496
   11166.76004561   10108.36691823   35069.71179605   19679.57639307
   81294.81870459   26098.02813962 -125864.33691934    2542.21489629
   49118.72993138  346951.05241009]
------------------------------------

Run  2424
State: [ 2  0  7 22 12  0  0 16  8  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:503696.2955094557 | Update: 42513.28919172607
Weight: [ -27764.35121358    -545.29044895   44458.41268263  143757.1889841
   18363.97942506   10108.36691823   35069.71179605   29842.43258821
   86096.82190324   26098.02813962 -125864.33691934    2542.21489629
   49118.72993138  391277.04860306]
------------------------------------

Run  2425
State: [ 2  0  7 22 12  0  0 15  7  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:555890.8540091431 | Update: -34


Run  2483
State: [14  0 18  0  0 19  0 13  0 22 16  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:337541.63237035164 | Update: -2688.085489920748
Weight: [ -37905.62754473    -530.8225728    35840.66094459  125605.80292279
    8052.98711067    4875.19687817   28827.5525795    28021.76170124
   88677.01626201   18337.078803   -137833.08303833    2542.21489629
   -1414.36548692  353615.12888536]
------------------------------------

Run  2484
State: [13  0 19  0  0 20  0 12  0 22 16  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:333732.37700614484 | Update: -3574.2521784914425
Weight: [ -38418.14572346    -530.8225728    35188.3817669   125605.80292279
    8052.98711067    4184.15849696   28827.5525795    27550.46458434
   88677.01626201   17538.52055884 -138415.12289845    2542.21489629
   -1414.36548692  350927.04339544]
------------------------------------

Run  2485
State: [13  0 19  0  0 20  0 12  0 21 16  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:328667.3371395698 | Updat

Run  2538
State: [26  0  0  0  0  0  0 11  0  0  0 23]
Alpha:0.5, Action:1, Reward:0.0 | Max_value:240093.20657284986 | Update: 2981.7621402141085
Weight: [ -63681.94252674    -530.8225728    23496.86712526  125605.80292279
    8052.98711067    8020.618769     22257.94009125   11594.00790573
   70026.7598526     8674.51629077 -143467.54579005  -12293.88275525
  -10255.36277054  266615.85357596]
------------------------------------

Run  2539
State: [28  0  0  0  0  0  0 10  0  0  0 23]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:243660.1938703664 | Update: -2599.0337899695733
Weight: [ -62626.13560212    -530.8225728    23496.86712526  125605.80292279
    8052.98711067    8020.618769     22257.94009125   12036.36605811
   70026.7598526     8674.51629077 -143467.54579005  -11371.5655546
   -7273.60063033  269597.61571618]
------------------------------------

Run  2540
State: [28  0  0  0  0  0  0  9  0  0  0 23]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:240392.01719770284 | Update: 


Run  2598
State: [57  0  0 12 16 24  0  0  0  0  0 11]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:176557.92505702542 | Update: 226.615709822363
Weight: [ -85818.18731457    -530.8225728    23496.86712526  123717.00752221
    5778.48111626    4039.00616474   17970.24910089    5575.74054967
   70026.7598526     8674.51629077 -143467.54579005  -22036.90820272
   -3160.93385944  225007.00918489]
------------------------------------

Run  2599
State: [58  0  0 12 16 24  0  0  0  0  0 11]
Alpha:1.0, Action:1, Reward:50 | Max_value:242339.77491265934 | Update: 65931.78122532365
Weight: [ -85642.27225042    -530.8225728    23496.86712526  123753.70659673
    5827.54195696    4112.59412192   17970.24910089    5575.74054967
   70026.7598526     8674.51629077 -143467.54579005  -22003.38371313
   -2934.31814962  225233.62489471]
------------------------------------

Run  2600
State: [ 2  0  0 12 16 25  0  0  0  0  0 11]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:386463.69819187594 | Update: -35

Run  2660
State: [22  0  0  0  0  0  7  0  0 15 24  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:585382.2128697841 | Update: 1499.9033991340548
Weight: [-4.01187638e+04 -5.30822573e+02  2.34968671e+04  1.71012986e+05
  1.19224300e+05  6.39178458e+04  1.02729563e+04 -1.84491145e+04
  1.90892762e+05 -3.08556237e+04 -1.32141422e+05  5.02684880e+04
 -6.79716868e+03  6.45034287e+05]
------------------------------------

Run  2661
State: [22  0  0  0  0  0  8  0  0 14 24  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:587389.8429950619 | Update: -5732.147946321755
Weight: [-3.96693731e+04 -5.30822573e+02  2.34968671e+04  1.71012986e+05
  1.19224300e+05  6.39178458e+04  1.04159423e+04 -1.84491145e+04
  1.90892762e+05 -3.05518184e+04 -1.31654270e+05  5.02684880e+04
 -5.29726528e+03  6.46534190e+05]
------------------------------------

Run  2662
State: [22  0  0  0  0  0  9  0  0 14 24  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:580390.4022471735 | Update: -5670.658491231152
Weight: [-

Run  2713
State: [28  0 12  0  0  0  9  0 21  0 10  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:605801.3622202856 | Update: -5783.483430359745
Weight: [-6.09959582e+04 -5.30822573e+02  2.64393826e+04  1.71012986e+05
  1.19224300e+05  5.86925914e+04 -6.00617814e+03 -2.59616328e+04
  1.86932449e+05 -4.07905795e+04 -1.50578834e+05  5.02684880e+04
 -8.34064280e+03  5.92463602e+05]
------------------------------------

Run [   3] - Total reward:  397.30 Mean scores: 11.00 Means Scores[:-10]: 11.00 Score: 12 
Run  0
State: [2 0 0 0 0 0 0 0 0 0 0 0]
Alpha:0.125, Action:0, Reward:-0.1 | Max_value:584958.6685793751 | Update: -731.2108357242105
Weight: [-6.32013497e+04 -5.30822573e+02  2.55037859e+04  1.71012986e+05
  1.19224300e+05  5.86925914e+04 -6.71504433e+03 -2.59616328e+04
  1.85287764e+05 -4.07905795e+04 -1.51361505e+05  5.02684880e+04
 -8.34064280e+03  5.86680119e+05]
------------------------------------

Run  1
State: [2 0 0 0 0 0 0 0 0 0 0 0]
Alpha:0.1111111111111111, Action:0, Re


Run  38
State: [20  0  8 10 13 19  0 13  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:595221.8633650192 | Update: -1854.205161951948
Weight: [-6.65553488e+04 -5.30822573e+02  2.44962887e+04  1.68248955e+05
  1.17171681e+05  5.55143562e+04 -7.29772021e+03 -3.05990677e+04
  1.85287764e+05 -4.07905795e+04 -1.51361505e+05  5.02684880e+04
 -3.84061052e+03  5.58227824e+05]
------------------------------------

Run  39
State: [21  0  8 10 13 20  0 12  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:594758.388781577 | Update: 1576.7106898113852
Weight: [-6.70603890e+04 -5.30822573e+02  2.42963183e+04  1.67998724e+05
  1.16845525e+05  5.50376872e+04 -7.29772021e+03 -3.09241621e+04
  1.85287764e+05 -4.07905795e+04 -1.51361505e+05  5.02684880e+04
 -5.69481568e+03  5.56373619e+05]
------------------------------------

Run  40
State: [22  0  8 10 14 21  0 11  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:596235.370253956 | Update: -2337.7622563100886
Weight: [-6.660945

Run  92
State: [31  0 19 24  0  0  0 10  0 21  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:387967.4199400955 | Update: -2413.6658752073417
Weight: [-123084.57862836    -451.13836654   -3638.52099724  133336.1283223
   83027.64671038   57769.34232208  -36102.44206303  -41513.88372595
  147561.01302283  -67110.83035078 -151361.5052818    50268.48801329
   -4807.44084539  420840.88385791]
------------------------------------

Run  93
State: [31  0 19 24  0  0  0  9  0 20  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:385558.81027284666 | Update: -2784.5147492592223
Weight: [-124103.58524996    -451.13836654   -4256.74901711  132554.37048326
   83027.64671038   57769.34232208  -36102.44206303  -41839.40959312
  147561.01302283  -67795.27334936 -151361.5052818    50268.48801329
   -4807.44084539  418427.2179827 ]
------------------------------------

Run  94
State: [31  0 20 24  0  0  0  7  0 20  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:381629.13536897453 | Update: 1061

Run  146
State: [56  0  0  0  0 22  0  8  0  0 15 23]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:266697.1442489732 | Update: -2092.920839934959
Weight: [-140435.7320049     -451.13836654  -19319.55125855  103893.16099904
   83027.64671038   65039.75404816  -25450.36136311  -22961.45915821
  147561.01302283  -83773.34019584 -157925.17046877   63781.92478763
   -2154.71641618  370827.38053593]
------------------------------------

Run  147
State: [57  0  0  0  0 23  0  7  0  0 15 22]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:263184.91346199776 | Update: -2319.227422516531
Weight: [-142031.90144391    -451.13836654  -19319.55125855  103893.16099904
   83027.64671038   64416.76419236  -25450.36136311  -23187.27330368
  147561.01302283  -83773.34019584 -158350.01861726   63134.54354141
   -4247.63725611  368734.45969599]
------------------------------------

Run  148
State: [57  0  0  0  0 23  0  6  0  0 15 22]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:262970.56929178705 | Update: 14

Run  203
State: [11  0  0  0 16  0  0  0  0  0  0 11]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:664985.0674108915 | Update: -4335.77252096741
Weight: [-5.29338881e+04 -4.51138367e+02 -1.93195513e+04  1.03893161e+05
  6.44740369e+04  8.47322902e+04 -2.29381094e+04 -4.54199741e+04
  2.41948471e+05 -8.37733402e+04 -9.93325843e+04  1.44439003e+05
 -5.91720905e+03  6.35274876e+05]
------------------------------------

Run  204
State: [ 9  0  0  0 17  0  0  0  0  0  0 11]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:659054.7325565402 | Update: -7794.793589401641
Weight: [-5.35834153e+04 -4.51138367e+02 -1.93195513e+04  1.03893161e+05
  6.35353701e+04  8.47322902e+04 -2.29381094e+04 -4.54199741e+04
  2.41948471e+05 -8.37733402e+04 -9.93325843e+04  1.43797589e+05
 -5.91720905e+03  6.30939103e+05]
------------------------------------

Run  205
State: [ 8  0  0  0 17  0  0  0  0  0  0 10]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:651331.0877120306 | Update: -5770.658821478253
Weight: [-5.4


Run  263
State: [24  0  0 13  0  0  0  0 26 17  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:540712.8002419323 | Update: -8299.539119970752
Weight: [-7.42299825e+04 -4.51138367e+02 -1.93195513e+04  1.02292871e+05
  2.35406378e+04  7.07880155e+04 -4.80642887e+04 -4.41312856e+04
  2.40875283e+05 -9.02881151e+04 -9.93325843e+04  1.34137086e+05
 -5.47611135e+03  4.85838730e+05]
------------------------------------

Run  264
State: [25  0  0 14  0  0  0  0 25 17  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:531032.856115233 | Update: -4065.511734322994
Weight: [-7.69426926e+04 -4.51138367e+02 -1.93195513e+04  1.00836805e+05
  2.35406378e+04  7.07880155e+04 -4.80642887e+04 -4.41312856e+04
  2.37953141e+05 -9.21933289e+04 -9.93325843e+04  1.34137086e+05
 -5.47611135e+03  4.77539191e+05]
------------------------------------

Run  265
State: [25  0  0 14  0  0  0  0 25 16  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:521412.98555687675 | Update: -4008.3924447148456
Weight: [-7


Run  320
State: [24  0 12  0  0  0  0  0  0  0 25  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:300826.20270188607 | Update: -3008.362027018855
Weight: [-115747.50049848    -588.50703767  -24483.70227717   75449.92168952
   23540.63783853   70788.01550829  -54830.74288717  -41763.54925383
  219620.88725388 -102794.61650301 -106132.26615036  134137.08570483
   -3062.57510966  378525.92634427]
------------------------------------

Run  321
State: [24  0 12  0  0  0  0  0  0  0 25  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:291706.6769307928 | Update: -5221.198530340684
Weight: [-116730.78569713    -588.50703767  -24970.36634014   75449.92168952
   23540.63783853   70788.01550829  -54830.74288717  -41763.54925383
  219620.88725388 -102794.61650301 -107150.05989006  134137.08570483
   -3062.57510966  375517.56431725]
------------------------------------

Run  322
State: [25  0 12  0  0  0  7  0  0  0 24  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:282087.6842150185 | Update: 235

Run  379
State: [39  0 23  0 21  0  9  0 22  0 11  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:335133.10045403114 | Update: -32.52982198994141
Weight: [-119219.17766181    -593.86454488  -26279.93093561   75449.92168952
   22816.55398258   81518.90171535  -55222.52317467  -32364.72499218
  195751.15212594 -102794.61650301 -108306.08090617  134137.08570483
   -8503.2102087   369877.0612112 ]
------------------------------------

Run  380
State: [41  0 23  0 21  0 10  0 21  0 10  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:334329.1078859659 | Update: -4095.498468319827
Weight: [-119236.4553032     -593.86454488  -26290.01713283   75449.92168952
   22807.31070551   81518.90171535  -55226.51026845  -32364.72499218
  195741.46090888 -102794.61650301 -108310.92334385  134137.08570483
   -8535.74003069  369844.53138921]
------------------------------------

Run  381
State: [41  0 23  0 21  0 11  0 21  0 10  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:324269.28377156844 | Update: 181


Run  440
State: [ 2  0  0  0  0  8 15  0  0 12  0 18]
Alpha:0.5, Action:0, Reward:-0.1 | Max_value:534880.4003435164 | Update: -2874.0032128627063
Weight: [ -18155.92725471    -593.86454488   -2947.37687949   75449.92168952
   46915.95184499   80599.56743812  -29305.7006641    30962.40648012
  227030.53966076  -50984.912654   -103702.11935602  178415.61954441
   -4667.80983103  498107.73885276]
------------------------------------

Run  441
State: [ 2  0  0  0  0  8 16  0  0 12  0 18]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:528801.5885936989 | Update: -8085.3107030481915
Weight: [ -18234.20808203    -593.86454488   -2947.37687949   75449.92168952
   46915.95184499   80288.47981828  -29892.79888284   30962.40648012
  227030.53966076  -51450.61597563 -103702.11935602  177719.89194793
   -4667.80983103  495233.7356399 ]
------------------------------------

Run  442
State: [ 2  0  0  0  0  8 17  0  0 12  0 17]
Alpha:0.5, Action:1, Reward:0.0 | Max_value:520652.40201034356 | Update: 

Run  503
State: [26  0  0 19  8  0 23  0 14  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:352950.0727694547 | Update: -4134.932795860514
Weight: [ -57439.83632914    -593.86454488   -2947.37687949   49544.20110038
   44335.01582446   74272.96000494  -44448.94192887   13416.41656839
  182821.0564627   -55730.17505235 -103702.11935602  156290.04030116
  -16527.60073824  335654.01743689]
------------------------------------

Run  504
State: [26  0  0 19  8  0 24  0 14  0  0  0]
Alpha:0.5, Action:1, Reward:0.0 | Max_value:345979.2869154324 | Update: 5821.3186200606
Weight: [ -58903.96742794    -593.86454488   -2947.37687949   48483.95689114
   43887.4226736    74272.96000494  -45744.11720607   13416.41656839
  182037.13880985  -55730.17505235 -103702.11935602  156290.04030116
  -16527.60073824  331519.08464103]
------------------------------------

Run  505
State: [27  0  0 19  8  0 25  0 14  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:351463.86522097525 | Update: -5917.


Run  562
State: [17  0  0  0  0 15  0 22  0 25 18  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:226785.7670652132 | Update: 958.3891601899231
Weight: [ -86445.26235097    -409.26592648   -2947.37687949   22946.07267287
   22534.29119789   76888.22645135  -40907.66541637   12285.7865231
  179672.28136823  -43759.60231657 -105879.09799785  156290.04030116
     821.94646891  264886.87159202]
------------------------------------

Run  563
State: [16  0  6  0  0 16  0 21  0 24 18  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:228912.06574717083 | Update: -1453.9253149039869
Weight: [ -86223.37682345    -409.26592648   -2947.37687949   22946.07267287
   22534.29119789   77082.73497785  -40907.66541637   12570.14941874
  179672.28136823  -43436.06621969 -105645.6425212   156290.04030116
     821.94646891  265845.26075221]
------------------------------------

Run  564
State: [15  0  6  0  0 16  0 19  0 24 18  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:228161.29649506242 | Update: 362


Run  629
State: [24  0 19  0  0  0  0 11  0  0  0  0]
Alpha:0.5, Action:0, Reward:-0.1 | Max_value:255606.8343722226 | Update: -1530.4226369813405
Weight: [-8.45738125e+04 -4.24012274e+02  2.74119494e+02  2.29460727e+04
  2.25342912e+04  8.12548574e+04 -3.43262713e+04  1.87100567e+04
  1.68283804e+05 -3.81904823e+04 -1.06477437e+05  1.56290040e+05
 -3.98611355e+03  2.80908585e+05]
------------------------------------

Run  630
State: [24  0 19  0  0  0  0  9  0  0  0  0]
Alpha:0.5, Action:0, Reward:-0.1 | Max_value:253535.6744463355 | Update: -1392.3665498393384
Weight: [-8.50740322e+04 -4.24012274e+02 -1.17877661e+02  2.29460727e+04
  2.25342912e+04  8.12548574e+04 -3.43262713e+04  1.84830114e+04
  1.68283804e+05 -3.81904823e+04 -1.06477437e+05  1.56290040e+05
 -3.98611355e+03  2.79378162e+05]
------------------------------------

Run  631
State: [24  0 19  0  0  0  0  8  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:251631.58324348787 | Update: -2769.8096847499837
Weight:


Run  689
State: [57  0  0 12 16 25  0  0  0  0  0 21]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:446252.1043997729 | Update: 1957.0798323016497
Weight: [ -28575.65590633    -424.01227375   24781.43622207   23815.39927338
   34301.39002613   98683.72974325   -3707.23940857   31727.08494828
  168283.80380377  -38190.48230974 -106477.43712634  181326.78254496
   -5955.474009    372096.55524763]
------------------------------------

Run  690
State: [57  0  0 12 17 25  0  0  0  0  0 21]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:450242.9972083955 | Update: -4176.855865605874
Weight: [ -27056.43279282    -424.01227375   24781.43622207   24132.33678247
   34725.08519694   99345.72372621   -3707.23940857   31727.08494828
  168283.80380377  -38190.48230974 -106477.43712634  181879.50525895
   -3998.39417669  374053.63507993]
------------------------------------

Run  691
State: [57  0  0 13 17 25  0  0  0  0  0 21]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:443310.87640759884 | Update: 477


Run  748
State: [20  0  0  0  0  0  0  0  0 16 25  9]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:247741.95575413958 | Update: -2477.519557541411
Weight: [-18842.41587902   -424.01227375  24781.43622207  -2669.89822361
  20215.9979405  109068.68977281 -17235.93288036  20159.95808761
 138461.348368   -55208.40401537 -93935.75917002 162966.92844865
  -3682.44570037 276857.41571019]
------------------------------------

Run  749
State: [20  0  0  0  0  0  0  0  0 16 25  9]
Alpha:0.5, Action:0, Reward:-0.1 | Max_value:245397.8331833369 | Update: -850.6765469749953
Weight: [-19517.2316892    -424.01227375  24781.43622207  -2669.89822361
  20215.9979405  109068.68977281 -17235.93288036  20159.95808761
 138461.348368   -55743.68019144 -94773.95745633 162667.05426631
  -3682.44570037 274379.89615265]
------------------------------------

Run  750
State: [20  0  0  0  0  0  0  0  0 15 25  9]
Alpha:0.3333333333333333, Action:0, Reward:-0.1 | Max_value:244336.9870262712 | Update: -814.489956754


Run  811
State: [27  1  0  0  0  0 17  0 18  0  9  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:263637.6531576557 | Update: 1572.2256792777916
Weight: [-24820.46628478   -579.10262405  24781.43622207  -2669.89822361
  20215.9979405  111614.08425734 -15106.0222721   24391.0153464
 129089.98144429 -53670.24846062 -96091.8149926  161611.37771719
  -4310.44547825 256609.03926951]
------------------------------------

Run  812
State: [26  0  0  0  0  0 17  0 17  0  8  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:265407.0437265282 | Update: -2854.936105559231
Weight: [-24242.34785898   -557.82878036  24781.43622207  -2669.89822361
  20215.9979405  111614.08425734 -14742.02673311  24391.0153464
 129473.21307192 -53670.24846062 -95900.32456751 161611.37771719
  -2738.21979898 258181.26494878]
------------------------------------

Run  813
State: [26  0  0  0  0  0 18  0 17  0  8  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:261981.52285183588 | Update: 252.5558911206317
Weight: [-25253

Run  871
State: [41  0 15  0 26 12 24  0  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:221743.1819537528 | Update: -18117.18804492356
Weight: [-31368.88214738   -557.82878036  24491.86013258  -2669.89822361
  18824.59208687 113231.50697032  -9221.81798234  14734.50386417
 124680.5841481  -53670.24846062 -96060.65653124 161611.37771719
  -2277.11197854 230489.91033556]
------------------------------------

Run  872
State: [43  0 16  6 26 13 25  0  0 26  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:197477.2491325227 | Update: 23760.786127858126
Weight: [-41484.97508703   -557.82878036  20828.32817386  -2669.89822361
  12450.92860395 110289.94819866 -15143.35281274  14734.50386417
 124680.5841481  -53670.24846062 -96060.65653124 161611.37771719
 -20394.30002347 212372.72229063]
------------------------------------

Run  873
State: [44  0 16  7  0 13  0 26  0 26  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:239044.91871087565 | Update: 2127.291984584648
Weight: [-2757

Run  937
State: [12  0  0 23  0  0  0 11  0  0  0 23]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:430581.65861146897 | Update: 583.8336127023213
Weight: [-24748.38464897   -557.82878036  97190.04392992  33051.80850169
  20810.02133363 107443.48770097  43847.62887045 -19532.36584552
 166843.77741675 -15392.2818995  -96060.65653124 124122.87185674
  -4443.60478273 388425.17261581]
------------------------------------

Run  938
State: [12  0  0 24  0  0  0 11  0  0  0 23]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:431161.52397639246 | Update: -598.8095521196956
Weight: [-24652.97143994   -557.82878036  97190.04392992  33233.02627239
  20810.02133363 107443.48770097  43847.62887045 -19445.75143899
 166843.77741675 -15392.2818995  -96060.65653124 124303.46298306
  -3859.77117003 389009.00622851]
------------------------------------

Run  939
State: [14  0  0 24  0  0  0  9  0  0  0 23]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:430339.54514919146 | Update: 81.56790075189201
Weight: [-2475

Alpha:1.0, Action:0, Reward:-0.1 | Max_value:312778.66287297226 | Update: -1669.1849987838068
Weight: [ -51587.16666591    -605.2445559    97190.04392992   13413.33258775
   16819.1701669   101089.10306729   36363.26481372  -22334.08397195
  166843.77741675  -15392.2818995  -107789.58498922  100066.86100515
   -1100.11772579  295034.28338883]
------------------------------------

Run  1000
State: [23  0  0  0 17 25  0  0  0  0 13 11]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:310591.91402546805 | Update: -3088.34439339448
Weight: [ -52110.0086703     -605.2445559    97190.04392992   13413.33258775
   16435.21688496  100524.49123725   36363.26481372  -22334.08397195
  166843.77741675  -15392.2818995  -108105.82890371   99819.92942586
   -1100.11772579  293365.09839005]
------------------------------------

Run  1001
State: [23  0  0  0 17 26  0  0  0  0 13 10]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:365468.55955531466 | Update: 56401.363655788125
Weight: [ -53077.37668169    -605.


Run  1061
State: [34  0  0  0  0  0  9  0  0 14  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:165681.3858261619 | Update: -1656.9138582616288
Weight: [ -78845.93594282    -621.5097368    97190.04392992   13413.33258775
     530.25805141  102469.32465208   23618.05091132  -28423.98954497
  134909.24528337  -31776.77962593 -112930.80584271   97419.90600849
   -2082.28454452  205302.5782722 ]
------------------------------------

Run  1062
State: [34  0  0  0  0  0  9  0  0 14  0  0]
Alpha:0.5, Action:0, Reward:-0.1 | Max_value:164222.8725894122 | Update: -502.2854421314987
Weight: [ -79613.15080869    -621.5097368    97190.04392992   13413.33258775
     530.25805141  102469.32465208   23414.96738578  -28423.98954497
  134909.24528337  -32090.01356085 -112930.80584271   97419.90600849
   -2082.28454452  203645.66441394]
------------------------------------

Run  1063
State: [34  0  0  0  0  0 11  0  0 14  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:162932.92421498866 | Update


Run  1122
State: [ 2  0 14 17  0  0 16  0 18  0  0  0]
Alpha:0.3333333333333333, Action:1, Reward:0.0 | Max_value:396114.2374784798 | Update: -1178.099888316957
Weight: [ -29444.95375021    -621.5097368   105065.13922293   23947.58729158
     530.25805141  104556.28895833   31342.6428566   -27472.64962582
  144302.55923382  -28017.25626995 -112930.80584271   97419.90600849
   60312.38549229  268850.38940992]
------------------------------------

Run  1123
State: [ 2  0 14 17  0  0 17  0 18  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:391893.08093229926 | Update: -5549.614214228583
Weight: [ -29477.04231347    -621.5097368   104842.79385627   23677.3066825
     530.25805141  104556.28895833   31085.93784237  -27472.64962582
  144015.39617342  -28017.25626995 -112930.80584271   97419.90600849
   59134.28560397  267672.2895216 ]
------------------------------------

Run  1124
State: [ 2  0 14 18  0  0 17  0 17  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:380079.7608035

Alpha:1.0, Action:0, Reward:-0.1 | Max_value:279106.1565062545 | Update: -469.1100759576075
Weight: [ -33506.9827045     -773.71469784   94712.22325779   12243.07106913
  -16138.50374411  103729.36516213   54960.9952839   -39497.00922049
  160291.24149566  -28017.25626995 -116276.9549399    97419.90600849
    -875.14157778  255205.61236072]
------------------------------------

Run  1183
State: [11  0 26  0 25 11 23  0  0  0 20  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:280353.311123463 | Update: -875.0320282936445
Weight: [ -33577.25847377    -773.71469784   94547.79894231   12243.07106913
  -16297.19016309  103659.54627243   54820.44565722  -39497.00922049
  160291.24149566  -28017.25626995 -116410.27138492   97419.90600849
    -875.14157778  254736.50228476]
------------------------------------

Run  1184
State: [11  0 26  0 26 12 24  0  0  0 20  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:279072.1801351501 | Update: -2790.8218013514997
Weight: [ -33708.34401653    -773.71

Run  1244
State: [19  0  0  0  0  0  0 22 11  7  0 15]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:202481.5537459808 | Update: -4337.022520397964
Weight: [ -49828.46335448    -765.87284268   81404.88424651   12243.07106913
  -29476.98886218  102417.12044332   28965.82699659  -43232.18292911
  133065.77957745  -38806.43360908 -122487.84000989   81289.05247886
  -15454.70919666  197962.82316241]
------------------------------------

Run  1245
State: [19  0  0  0  0  0  0 21 10  7  0 14]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:198328.691673044 | Update: 14601.392301294138
Weight: [ -50950.69740067    -765.87284268   81404.88424651   12243.07106913
  -29476.98886218  102417.12044332   28965.82699659  -44519.01747776
  132419.74120311  -39216.3832945  -122487.84000989   80414.14442944
  -15454.70919666  193625.80064201]
------------------------------------

Run  1246
State: [19  0  0  0  0  0  0 20 10  6  0 14]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:217068.12894662676 | Update: -

    -909.57783064  162770.55858174]
------------------------------------

Run  1307
State: [42  0  0  9 12 19  0 15  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:138544.46185414115 | Update: -1222.2353037499124
Weight: [ -65411.21801727    -765.87284268   81404.88424651   12137.64637933
  -32271.50284568   98119.62149653   18615.94374817  -44485.78363769
  132303.46554283  -37919.09873386 -122487.84000989   79311.94870043
    -343.29851151  163336.83790087]
------------------------------------

Run  1308
State: [42  0  0  9 13 19  0 14  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:138830.38079531552 | Update: 700.0300371363992
Weight: [ -66110.3228049     -765.87284268   81404.88424651   11989.1960429
  -32469.95739538   97805.41589967   18615.94374817  -44733.0439635
  132303.46554283  -37919.09873386 -122487.84000989   79311.94870043
    -343.29851151  162114.60259712]
------------------------------------

Run  1309
State: [42  0  0 10 13 20  0 13  0  0  0  0]

Run  1370
State: [10  0  9 25  0  0  0  0  0 19  0  0]
Alpha:0.5, Action:1, Reward:0.0 | Max_value:209748.59095267468 | Update: 319.18145414472383
Weight: [ -31552.7846639     -765.87284268   79171.81475702   19898.9879288
  -11352.13991901   98477.09374488   27250.0338981   -50178.70509036
  144068.11146086  -57280.10946038 -122487.84000989   79311.94870043
   -2098.25423513  211784.73578038]
------------------------------------

Run  1371
State: [11  0 10 25  0  0  0  0  0 19  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:211179.2097770717 | Update: 709.6439281699422
Weight: [ -31509.3160481     -765.87284268   79210.54035136   20006.67449042
  -11352.13991901   98477.09374488   27250.0338981   -50178.70509036
  144068.11146086  -57198.21923052 -122487.84000989   79311.94870043
   -1779.07278099  212103.91723453]
------------------------------------

Run  1372
State: [11  0 10 26  0  0  0  0  0 18  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:211190.59671839382 | Update: -18


Run  1432
State: [23  0 22  0  0  0  0  0 25  0 12  0]
Alpha:0.5, Action:0, Reward:-0.1 | Max_value:514160.76427506225 | Update: 20528.853772438713
Weight: [ 26963.15559746   -767.85264074 126998.19550668   4149.41463015
 -11352.13991901 125770.47561091  27288.41139342 -37156.98269412
 175045.03302771 -60542.42666849 -79142.51057668  79311.94870043
  48569.82668624 375442.62074643]
------------------------------------

Run  1433
State: [23  0 22  0  0  0  0  0 24  0 12  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:540325.7671598225 | Update: -6387.095584684401
Weight: [ 33393.44766341   -767.85264074 133086.62548107   4149.41463015
 -11352.13991901 125770.47561091  27288.41139342 -37156.98269412
 181994.93474641 -60542.42666849 -75808.74071811  79311.94870043
  48569.82668624 395971.47451887]
------------------------------------

Run  1434
State: [24  0 22  0  0  0  0  0 23  0 11  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:525534.925070185 | Update: 36927.28185085667
Weight: [ 

Run  1489
State: [37  0  0  0 21  0 10  0  0  0  0 19]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:369382.5404467234 | Update: -4254.41260342102
Weight: [ 22941.14798971   -617.12906912 144350.23404263   4149.41463015
 -33730.57928593 125770.47561091  13864.36376936 -61926.81399329
 196578.43277193 -60542.42666849 -62701.57634698  35818.41156848
  -3242.17053103 356926.94761938]
------------------------------------

Run  1490
State: [37  0  0  0 22  0 12  0  0  0  0 18]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:363330.1828799238 | Update: -3633.401828799222
Weight: [ 20797.37155562   -617.12906912 144350.23404263   4149.41463015
 -34939.46123034 125770.47561091  13284.9726144  -61926.81399329
 196578.43277193 -60542.42666849 -62701.57634698  34731.30358935
  -3242.17053103 352672.53501596]
------------------------------------

Run  1491
State: [37  0  0  0 22  0 12  0  0  0  0 18]
Alpha:0.5, Action:0, Reward:-0.1 | Max_value:370227.06981548993 | Update: 4191.199139039847
Weight: [ 

Run  1549
State: [ 8  0  0 18  0  0 18  0 17 11  0  7]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:339000.82498377276 | Update: -196.71271676092874
Weight: [ -5807.48891882   -617.12906912 144350.23404263   3274.72823744
 -47811.86207892 122565.40414712  -2141.05491715 -56144.72654444
 173793.02683563 -54189.47823647 -62701.57634698  25755.0118182
  -3539.66629853 305325.03985544]
------------------------------------

Run  1550
State: [ 8  0  0 18  0  0 18  0 17 11  0  6]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:334114.11995366734 | Update: -4252.731690384448
Weight: [ -5828.92081443   -617.12906912 144350.23404263   3226.94354509
 -47811.86207892 122565.40414712  -2189.27602633 -56144.72654444
 173747.74173773 -54218.69727773 -62701.57634698  25736.49314139
  -3736.37901529 305128.32713868]
------------------------------------

Run  1551
State: [10  0  0 18  0  0 20  0 16 11  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:329698.10845470306 | Update: -2556.418323488557
Weight: 

Run  1611
State: [17  0  0  0  9 14  0 24  0  0 19  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:179301.10168861764 | Update: 679.624117698404
Weight: [-26145.04937636   -627.01025945 144350.23404263 -18408.61763533
 -49605.39377046 119032.84085068 -20464.72948732 -61932.61852731
 163027.29438745 -59555.59379912 -67274.92522345  25393.33152114
    698.12350817 203719.22131824]
------------------------------------

Run  1612
State: [17  0  0  0 10 15  0 23  0  0 19  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:182740.31750838208 | Update: 745.2640936404641
Weight: [-25987.7033189    -627.01025945 144350.23404263 -18408.61763533
 -49522.63075963 119161.5775321  -20464.72948732 -61712.63590334
 163027.29438745 -59555.59379912 -67100.17728878  25393.33152114
    698.12350817 204398.84543594]
------------------------------------

Run  1613
State: [17  0  0  0 10 15  0 21  0  0 18  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:197717.41997910923 | Update: 12756.245394657773
Weight: [-


Run  1673
State: [30  0 18  0  0  0  0 13  6 22  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:201917.90013661943 | Update: -10313.376247781067
Weight: [-31726.8603674    -674.08719174 142423.99117172 -18408.61763533
 -52340.22745786 115092.31046026 -18749.64646992 -45628.16600843
 145294.12176719 -45740.2948432  -70026.08573043  25393.33152114
  -3078.91890849 201476.38332774]
------------------------------------

Run  1674
State: [29  0 18  0  0  0  0 13  0 22  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:191290.91921540035 | Update: 1272.0353665387956
Weight: [-35940.5280217    -674.08719174 139921.39197858 -18408.61763533
 -52340.22745786 115092.31046026 -18749.64646992 -47436.39141993
 144456.15640852 -48804.12285711 -70026.08573043  25393.33152114
 -13392.29515627 191163.00707996]
------------------------------------

Run  1675
State: [29  0 19  0  0  0  0 12  0 21  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:194103.06169697264 | Update: -821.7499468811147
Weigh

Alpha:1.0, Action:0, Reward:-0.1 | Max_value:216597.4842111917 | Update: 238.30003175351885
Weight: [-34158.83116013   -709.64291911 122988.39903671 -16912.76222127
 -52340.22745786 115371.42539818 -36544.88887151 -43845.25281139
 144456.15640852 -47896.09717049 -70026.08573043  21821.97247756
   2404.37487387 193044.72316444]
------------------------------------

Run  1732
State: [16  0  0 10  0 20  0 12  0  0  0 23]
Alpha:0.5, Action:0, Reward:-0.1 | Max_value:220570.41762065026 | Update: 1935.528764323899
Weight: [-34106.90547389   -709.64291911 122988.39903671 -16880.60282013
 -52340.22745786 115435.91052936 -36544.88887151 -43806.68600298
 144456.15640852 -47896.09717049 -70026.08573043  21895.68332634
   2404.37487387 193283.02319619]
------------------------------------

Run  1733
State: [14  0  0 10  0 21  0 10  0  0  0 23]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:222773.58049774 | Update: -2452.0202243840904
Weight: [-33685.15286822   -709.64291911 122988.39903671 -16619.39

Run  1786
State: [13  0  0 23 13  0  0 12  0  0  0 13]
Alpha:0.5, Action:0, Reward:-0.1 | Max_value:91958.34084053333 | Update: -788.9150766902676
Weight: [-42044.7913591    -754.98404755 122988.39903671 -33264.60471453
 -57549.58451309 114301.59317236 -42813.06109311 -49334.46960059
 114470.66221258 -47896.09717049 -70026.08573043   7130.68043681
 -10302.89519258 127246.07858688]
------------------------------------

Run  1787
State: [13  0  0 24 14  0  0 11  0  0  0 12]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:91090.48137721125 | Update: 9484.574740635653
Weight: [-42184.46413676   -754.98404755 122988.39903671 -33509.47830241
 -57688.35547476 114301.59317236 -42813.06109311 -49462.14871331
 114470.66221258 -47896.09717049 -70026.08573043   6992.75223192
 -10302.89519258 126457.16351019]
------------------------------------

Run  1788
State: [14  0  0 24 14  0  0 10  0  0  0 12]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:102763.798966721 | Update: -112.84167010457895
Weight: [-40


Run  1846
State: [12  0  0  0  0 23  0  7  0 20 15  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:420310.81584808906 | Update: -4268.711586098478
Weight: [ 11866.00518005   -685.11116871 122988.39903671 -34513.8286232
 -50150.93917731 163679.23042057 -33258.98071857   4864.27563854
 114470.66221258  31682.34585224 -11379.59537817   6445.70678254
  11146.86167633 349648.52026058]
------------------------------------

Run  1847
State: [12  0  0  0  0 23  0  6  0 20 15  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:410118.60601704055 | Update: -4708.720754602458
Weight: [ 11168.3895204    -685.11116871 122988.39903671 -34513.8286232
 -50150.93917731 162350.82652447 -33258.98071857   4461.27734515
 114470.66221258  30529.50953678 -12246.1136808    6445.70678254
   6878.15009023 345379.80867448]
------------------------------------

Run  1848
State: [12  0  0  0  0 23  0  0  0 19 14  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:401788.86279623886 | Update: 328.2600085585145
Weight: [ 


Run  1905
State: [31  0 11  0  0  0  0  0  0  0  0  0]
Alpha:0.5, Action:0, Reward:-0.1 | Max_value:272447.6326947314 | Update: -1362.288163473655
Weight: [ -9501.67732325   -685.11116871 120584.84051271 -34513.8286232
 -50150.93917731 152877.50207326 -38194.19904319   7653.42176527
 103543.47762544  16627.26729517 -22349.70527309   6445.70678254
  -2868.04162349 258577.59648038]
------------------------------------

Run  1906
State: [31  0 11  0  0  0  0  0  0  0  0  0]
Alpha:0.3333333333333333, Action:0, Reward:-0.1 | Max_value:270812.5769014541 | Update: -902.741923004835
Weight: [-10076.8110538    -685.11116871 120382.82738085 -34513.8286232
 -50150.93917731 152877.50207326 -38194.19904319   7653.42176527
 103543.47762544  16627.26729517 -22349.70527309   6445.70678254
  -2868.04162349 257215.30831691]
------------------------------------

Run  1907
State: [31  0 11  0  0  0  0  0  0  0  0  0]
Alpha:0.25, Action:0, Reward:-0.1 | Max_value:269729.0811693326 | Update: -674.347702923

Run  1963
State: [56  0 22 14 19  0  0  0 25  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:334310.8965408236 | Update: -4634.905797583051
Weight: [ -5293.02860907   -685.11116871 121808.85391397 -33602.82559444
 -49921.37081608 155503.03380576 -36057.67351236  19494.24817037
  90070.67789412  16627.26729517 -22349.70527309   6445.70678254
  24220.26618439 267983.22407496]
------------------------------------

Run  1964
State: [57  0 22 14 19  0  0  0 24  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:319588.98947640636 | Update: 15070.68227802799
Weight: [ -8827.84695748   -685.11116871 120434.2374985  -34478.52045357
 -51112.94065691 155503.03380576 -36057.67351236  19494.24817037
  88501.56253455  16627.26729517 -22349.70527309   6445.70678254
  19585.36038681 263348.31827737]
------------------------------------

Run  1965
State: [58  0 22 14 19  0  0  0 23  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:344273.3176998135 | Update: 12130.148740793753
Weight: [ 


Run  2020
State: [18  0  0  0  0  0  6  0  0 15 24 19]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:242338.63410333643 | Update: 6488.442539928015
Weight: [-5.21880039e+01 -6.85111169e+02  1.14921113e+05 -4.63652404e+04
 -5.65583942e+04  1.55503034e+05 -4.31719964e+04  1.05126775e+04
  8.30970914e+04  9.14311956e+03 -1.04466028e+04  2.93609544e+03
  9.49987075e+03  2.37757905e+05]
------------------------------------

Run  2021
State: [18  0  0  0  0  0  7  0  0 15 24 19]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:249339.1725841533 | Update: 5703.726843901648
Weight: [  1538.37592782   -685.11116871 114921.11286812 -46365.24038995
 -56558.3941529  155503.03380576 -42641.81567241  10512.677536
  83097.09144529  10457.35314425  -8339.23003188   4594.05349954
   9499.87074766 244246.34730912]
------------------------------------

Run  2022
State: [18  0  0  0  0  0  9  0  0 14 24 19]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:256111.46146157716 | Update: -3094.438648282143
Weight: [  

   5928.2911241  331636.42094911]
------------------------------------

Run  2076
State: [29  0  0  0  0  0 12  0 20  0 10  8]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:370560.6885351361 | Update: -5761.691296311852
Weight: [ 36261.50107021   -629.86394039 114921.11286812 -46365.24038995
 -56558.3941529  160868.63691131 -36873.55482447  26828.45686897
  80750.16127472  17425.89588166  12891.68129613  21263.11624252
   5928.2911241  332490.96287561]
------------------------------------

Run  2077
State: [29  0  0  0  0  0 13  0 19  0  9  7]
Alpha:1.0, Action:0, Reward:-1 | Max_value:356893.55651644716 | Update: -3926.847580826492
Weight: [ 3.39859523e+04 -6.29863940e+02  1.14921113e+05 -4.63652404e+04
 -5.65583942e+04  1.60868637e+05 -3.78151484e+04  2.68284569e+04
  7.91896966e+04  1.74258959e+04  1.21119595e+04  2.06432194e+04
  1.66599828e+02  3.26729272e+05]
------------------------------------

Run  2078
State: [29  1  0  0  0  0 14  0 19  0  9  7]
Alpha:1.0, Action:0, Reward:-1 


Run  2134
State: [33  0  0  0 24  9 18  0  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:215828.82454351286 | Update: -600.6503362156509
Weight: [ 1.48463999e+02 -8.32243684e+02  1.14921113e+05 -4.63652404e+04
 -6.89429339e+04  1.60280298e+05 -4.48572722e+04  1.87187763e+04
  6.71771600e+04  1.74258959e+04  8.16739292e+03  1.78307832e+04
 -1.51642317e+03  2.28071311e+05]
------------------------------------

Run  2135
State: [33  0  0  0 24 10 19  0  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:214382.62026076947 | Update: -2756.824359837512
Weight: [-1.21480092e+02 -8.32243684e+02  1.14921113e+05 -4.63652404e+04
 -6.91379893e+04  1.60207156e+05 -4.50045125e+04  1.87187763e+04
  6.71771600e+04  1.74258959e+04  8.16739292e+03  1.78307832e+04
 -1.51642317e+03  2.27470660e+05]
------------------------------------

Run  2136
State: [33  0  0  0 24 10 20  0  0  0  0  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:211125.864402419 | Update: -1518.9861602167366
Weight

  -4969.74650533 316159.13194207]
------------------------------------

Run  2193
State: [57  0 15 19  0  0 24  0 14  9  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:342750.1472681508 | Update: 1542.2450326457038
Weight: [ 27767.34201422   -832.24368393 106874.27180479 -47395.14379134
 -56112.78680223 178801.82386331 -43222.65139652  68411.24062782
  33658.14551686  60875.38179484   8167.39292438  17830.78318366
  -4969.74650533 312084.39574359]
------------------------------------

Run  2194
State: [57  0 15 19  0  0 24  0 14  9  0  0]
Alpha:0.5, Action:1, Reward:0.0 | Max_value:345605.49470340344 | Update: -26.113569018867565
Weight: [ 28964.54115838   -832.24368393 107186.13388577 -46999.69447068
 -56112.78680223 178801.82386331 -42718.57440316  68411.24062782
  33950.53070564  61062.81076967   8167.39292438  17830.78318366
  -3427.50147268 313626.64077624]
------------------------------------

Run  2195
State: [59  0 16 20  0  0 26  0 13  9  0  0]
Alpha:1.0, Action:1, Reward:5

Run  2253
State: [19  0  0  0 10 15  0 23  0  0 19 25]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:312603.9956333853 | Update: -3198.6374271507375
Weight: [ 33743.8360937   -1271.90955292  94082.40208158 -55026.3904821
 -58743.00101721 175115.11702403 -41063.33403688  43977.50114708
  31862.16678871  60563.51453203   6931.19346758  11947.25933128
  -1666.98123929 258579.61674785]
------------------------------------

Run  2254
State: [18  0  0  0 10 15  0 21  0  0 18 25]
Alpha:0.5, Action:0, Reward:-0.1 | Max_value:310081.503266894 | Update: -594.3135012541898
Weight: [ 32916.16692544  -1271.90955292  94082.40208158 -55026.3904821
 -59175.80376006 174465.94205563 -41063.33403688  42985.2966188
  31862.16678871  60563.51453203   6108.74577978  10871.82350749
  -4865.61866644 255380.9793207 ]
------------------------------------

Run  2255
State: [17  0  0  0 10 16  0 21  0  0 18 25]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:306863.3402967162 | Update: -606.0218996981857
Weight: [ 3277


Run  2313
State: [22  0  0  0  0  0  0 16  8 23  0 13]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:332260.24356647785 | Update: -5710.729222271591
Weight: [ 38039.40758705  -1284.24040303  94082.40208158 -55026.3904821
 -56320.42072031 147093.97136832 -17941.53059678  50368.973904
  23633.32962492  90904.27021315  -4782.04263973   5818.39188056
  15731.72465464 264840.22403243]
------------------------------------

Run  2314
State: [23  0  0  0  0  0  0 14  7 22  0 13]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:319804.81612100976 | Update: -2703.299747637473
Weight: [ 36328.39812574  -1284.24040303  94082.40208158 -55026.3904821
 -56320.42072031 147093.97136832 -17941.53059678  49136.66409587
  23014.66468003  89130.65157255  -4782.04263973   4819.96929556
  10020.99543237 259129.49481015]
------------------------------------

Run  2315
State: [24  0  0  0  0  0  0 14  7 22  0 13]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:312715.58339260117 | Update: 3226.4401701177703
Weight: [ 3


Run  2375
State: [52  0  0 11  0 22  0  9  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:312307.8048366906 | Update: -2052.8744203145034
Weight: [ 34889.78827333  -1284.24040303  94082.40208158 -51630.69982927
 -56320.42072031 150979.35168    -14821.09341268  52786.33727939
  22642.72414494  82890.00352493  -4782.04263973  -1227.9128101
  -2018.88302469 244864.29835183]
------------------------------------

Run  2376
State: [53  0  0 11  0 22  0  7  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:306940.26159552846 | Update: -1012.3120391384
Weight: [ 33435.99087004  -1284.24040303  94082.40208158 -51935.44646588
 -56320.42072031 150368.28225268 -14821.09341268  52537.1572419
  22642.72414494  82890.00352493  -4782.04263973  -1227.9128101
  -4071.757445   242811.42393152]
------------------------------------

Run  2377
State: [55  0  0 11  0 23  0  0  0  0  0  0]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:307037.49740745197 | Update: 3786.6281355238752
Weight: [ 327

State: [ 9  0 11  0 18  0  0  0  0  0 26  0]
Alpha:0.5, Action:0, Reward:-0.1 | Max_value:222109.19518007763 | Update: -1110.595975900389
Weight: [ 16629.81607705  -1387.79821565  93848.29532616 -56087.26474783
 -55189.67952927 137580.09580899 -13198.09953726  47476.42589571
  21397.10184102  82890.00352493   3069.53906893  -1227.9128101
  -1721.6469557  218515.87662917]
------------------------------------

Run  2436
State: [ 9  0 11  0 18  0  0  0  0  0 26  0]
Alpha:0.3333333333333333, Action:0, Reward:-0.1 | Max_value:220754.11965259074 | Update: -735.8803988419725
Weight: [ 16493.69142995  -1387.79821565  93683.60552165 -56087.26474783
 -55460.17100945 137580.09580899 -13198.09953726  47476.42589571
  21397.10184102  82890.00352493   2678.77096113  -1227.9128101
  -1721.6469557  217405.28065327]
------------------------------------

Run  2437
State: [ 9  0 11  0 18  0  0  0  0  0 26  0]
Alpha:0.25, Action:0, Reward:-0.1 | Max_value:219070.6515388828 | Update: -744.100547073358
Weig

Run  2494
State: [16  0 23  0  0  0  7  0 22 15 11  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:285362.14236241305 | Update: -3865.9911591811106
Weight: [ 21501.4703949   -1387.79821565  94355.38127344 -56087.26474783
 -39554.25143241 121009.40991824   5415.55665406  51259.01161744
  19969.7093357   80426.2431893    7436.15029042  -1227.9128101
  -1778.56541138 228570.51392161]
------------------------------------

Run  2495
State: [16  0 23  0  0  0  8  0 22 14 11  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:280045.0047860667 | Update: -3079.488765228074
Weight: [ 20659.0691478   -1387.79821565  93156.69198104 -56087.26474783
 -39554.25143241 121009.40991824   5047.01112167  51259.01161744
  18817.96127263  79643.18694733   6860.65309685  -1227.9128101
  -1778.56541138 224704.52276243]
------------------------------------

Run  2496
State: [16  0 23  0  0  0  9  0 21 14 10  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:276113.29817163385 | Update: -2697.0689150989638
Weight:


Run  2557
State: [38  0  0  0  0  9 17  0  0  0  0 17]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:149913.07148236412 | Update: -1510.975010515016
Weight: [-6.36477510e+03 -1.38779822e+03  7.84413150e+04 -5.60872647e+04
 -3.95542514e+04  1.21009518e+05 -5.20991052e+03  4.29619538e+04
  5.73489051e+03  7.15550442e+04  2.23907635e+03 -1.90111947e+04
  5.92073574e+01  1.44035743e+05]
------------------------------------

Run  2558
State: [38  0  0  0  0  9 18  0  0  0  0 17]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:145985.11214273365 | Update: -3285.0289774685807
Weight: [-7.14672520e+03 -1.38779822e+03  7.84413150e+04 -5.60872647e+04
 -3.95542514e+04  1.20825523e+05 -5.55972554e+03  4.29619538e+04
  5.73489051e+03  7.15550442e+04  2.23907635e+03 -1.93566450e+04
  5.92073574e+01  1.42524768e+05]
------------------------------------

Run  2559
State: [39  0  0  0  6 10 20  0  0  0  0 17]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:140550.38998846407 | Update: 1077.9506052098295
Weight


Run  2614
State: [ 5  0  0 18 24  0 18  0 17  0  0  6]
Alpha:1.0, Action:1, Reward:0.0 | Max_value:131363.25102800215 | Update: -1481.8034751114756
Weight: [-4.55016931e+03 -1.38779822e+03  7.84413150e+04 -5.24184975e+04
 -3.48554418e+04  1.15994968e+05  1.47263442e+02  3.82616412e+04
  7.99076530e+03  7.15550442e+04  2.23907635e+03 -1.84780115e+04
  6.10575776e+02  1.54898332e+05]
------------------------------------

Run  2615
State: [ 6  0  0 18 24  0 19  0 16  0  0  6]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:130339.10932042956 | Update: 194.24447055776545
Weight: [ -4651.07107324  -1387.79821565  78441.31495162 -52778.45143022
 -35336.64313758 115994.96825884   -215.97798106  38261.64120886
   7649.64035412  71555.04417956   2239.07635419 -18597.58128012
   -871.22769935 153416.52856622]
------------------------------------

Run  2616
State: [ 6  0  0 18 24  0 20  0 16  0  0  0]
Alpha:0.5, Action:0, Reward:-0.1 | Max_value:130005.70837072353 | Update: -941.9271788653641
Weigh

Run  2673
State: [19  0  0  0  0 11 22  0  0 10 21  0]
Alpha:0.5, Action:1, Reward:0.0 | Max_value:238439.21406742808 | Update: -1199.3967227506218
Weight: [-5.17818859e+03 -1.43333362e+03  7.84413150e+04 -3.59729189e+04
 -1.02320499e+04  1.15309076e+05  2.33724175e+04  5.64540493e+04
  2.36373314e+04  8.20238742e+04 -1.95114787e+02 -1.85819073e+04
 -7.72254119e+02  2.05380915e+05]
------------------------------------

Run  2674
State: [19  0  0  0  0 11 23  0  0  9 20  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:238779.59686608883 | Update: -516.7461893930158
Weight: [ -5488.54063891  -1433.33362212  78441.31495162 -35972.91893406
 -10232.04985166 115130.5668856   23013.06738148  56454.04928355
  23637.33143616  81861.91569952   -535.97147625 -18581.90729641
  -1971.65084136 204181.51874873]
------------------------------------

Run  2675
State: [19  0  0  0  0 12 24  0  0  9 20  0]
Alpha:1.0, Action:0, Reward:-0.1 | Max_value:238428.5765015686 | Update: -2073.1844515080156
Weight

In [None]:
print_result(i, scores, total_reward, score)

In [None]:
scores

In [None]:
ql_agent_app.update_W(old_state, state, action, reward)

In [None]:
old_state, state, action, reward

In [None]:
teste = np.frombuffer(state, dtype=np.uint8, count=-1)
teste.reshape(1,-1)

In [None]:
from sklearn.preprocessing import normalize

In [None]:
normalize(teste.reshape(1,-1), norm='l1', axis=0)

In [None]:
scaler = StandardScaler(with_mean=False)
observation_examples = np.array([env.observation_space.sample()[RAM_mask] for x in range(10000)])
scaler.fit(observation_examples)

In [None]:
scaler.transform([[1,0,1,0,0,1,0,1,0,0,1,1]])

In [None]:
np.append([1,2,3],[1,2])