In [1]:
import torch
import numpy as np
import random
import os
from datetime import date

from backpropamine_A2C import BP_RNetwork, Standard_RNetwork, Standard_FFNetwork
from BP_A2C.BP_A2C_agent import A2C_Agent

%matplotlib inline

In [2]:
# Environment specific parameters
env_name = 'CartPole-v0'
n_runs = 10
n_evaluations = 100
max_steps = 200
num_training_episodes = 10000
num_evaluation_episodes = 100
max_reward = 200

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
# A2C hyperparameters
entropy_coef = 0.03 
value_pred_coef = 0.1 
gammaR = 0.99
max_grad_norm = 4.0
batch_size = 128
print_every = 10
save_every = 50

In [4]:
# Adam hyperparameters
learning_rate = 1e-4 # For Adam optimizer
l2_coef = 0

In [5]:
selection_method = "evaluation"

In [6]:
training_seeds = np.load('rstdp_cartpole_stuff/seeds/training_seeds.npy')
evaluation_seeds = np.load('rstdp_cartpole_stuff/seeds/evaluation_seeds.npy')

In [7]:
# Create Results Directory
dirs = os.listdir('./BP_A2C/training_results/')
if not any('a2c_result' in d for d in dirs):
    result_id = 1
else:
    results = [d for d in dirs if 'a2c_result' in d]
    result_id = len(results) + 1

# Get today's date and add it to the results directory
d = date.today()
result_dir = 'BP_A2C/training_results/BP_RNN_POLELENGTH0.1_a2c_result_' + str(result_id) + "_{}_entropycoef_{}_valuepredcoef_{}_batchsize_{}_maxsteps_{}_\
maxgradnorm_{}_gammaR_{}_l2coef_{}_learningrate_{}_numtrainepisodes_{}_selectionmethod_{}".format(
    str(d.year) + str(d.month) + str(d.day), entropy_coef, value_pred_coef, batch_size, max_steps, max_grad_norm, gammaR,
    l2_coef, learning_rate, num_training_episodes, selection_method)
os.mkdir(result_dir)
print('Created Directory {} to store the results in'.format(result_dir))

Created Directory BP_A2C/training_results/BP_RNN_POLELENGTH0.1_a2c_result_11_20231024_entropycoef_0.03_valuepredcoef_0.1_batchsize_128_maxsteps_200_maxgradnorm_4.0_gammaR_0.99_l2coef_0_learningrate_0.0001_numtrainepisodes_10000_selectionmethod_evaluation to store the results in


In [9]:
smoothed_scores_dqn_all = []
dqn_completion_after = []

for i_run in range(1, n_runs):
    print("Run # {}".format(i_run))
    seed = int(training_seeds[i_run])
    
    torch.manual_seed(seed)
    random.seed(seed)

    agent_net = BP_RNetwork(4, 64, 2, seed)
    
    # optimizer = torch.optim.Adam(agent_net.parameters(), lr=1.0*learning_rate, eps=1e-4, weight_decay=l2_coef)
    optimizer = torch.optim.Adam(agent_net.parameters(), lr = learning_rate)
    agent = A2C_Agent(env_name, seed, agent_net, entropy_coef, value_pred_coef, gammaR,
                      max_grad_norm, max_steps, batch_size, num_training_episodes, optimizer, print_every,
                      save_every, i_run, result_dir, selection_method, num_evaluation_episodes, evaluation_seeds, max_reward)

    smoothed_scores, scores, best_average, best_average_after = agent.train_agent()

    # # policy_net = QNetwork(architecture, seed).to(device)
    # # target_net = QNetwork(architecture, seed).to(device)
    # policy_net = BP_RNetwork(4, 64, 2, seed).to(device)
    # target_net = BP_RNetwork(4, 64, 2, seed).to(device)

    # target_net.load_state_dict(policy_net.state_dict())

    # # optimizer = optim.Adam(policy_net.parameters(), lr=learning_rate)
    # optimizer = optim.Adam(policy_net.parameters(), lr = learning_rate, weight_decay = l2_coef) 
    # agent = Agent(env_name, policy_net, target_net, architecture, batch_size,
    #               replay_memory_size, discount_factor, eps_start, eps_end, eps_decay,
    #               update_every, target_update_frequency, optimizer, learning_rate,
    #               num_episodes, max_steps, i_run, result_dir, seed, tau)
    
    # smoothed_scores, scores, best_average, best_average_after = agent.train_agent()

    # np.save(result_dir + '/scores_{}'.format(i_run), scores)
    # np.save(result_dir + '/smoothed_scores_DQN_{}'.format(i_run), smoothed_scores)

    # # save smoothed scores in list to plot later
    # dqn_completion_after.append(best_average_after)
    # smoothed_scores_dqn_all.append(smoothed_scores)
    # print("")

Run # 1
Batch size larger than 1 not implemented yet. Program will continue with batch size set to 1.




Episode 10	Average evaluation: 7.18
Episode 20	Average evaluation: 7.18
Episode 30	Average evaluation: 7.18
Episode 40	Average evaluation: 7.18
Episode 50	Average evaluation: 7.18
Episode 60	Average evaluation: 7.18
Episode 70	Average evaluation: 7.18
Episode 80	Average evaluation: 7.18
Episode 90	Average evaluation: 7.27
Episode 100	Average evaluation: 8.0
Episode 110	Average evaluation: 8.66
Episode 120	Average evaluation: 9.4
Episode 130	Average evaluation: 10.52
Episode 140	Average evaluation: 12.97
Episode 150	Average evaluation: 13.41
Episode 160	Average evaluation: 15.25
Episode 170	Average evaluation: 15.06
Episode 180	Average evaluation: 12.54
Episode 190	Average evaluation: 11.58
Episode 200	Average evaluation: 11.33
Episode 210	Average evaluation: 11.35
Episode 220	Average evaluation: 11.12
Episode 230	Average evaluation: 10.72
Episode 240	Average evaluation: 11.52
Episode 250	Average evaluation: 11.08
Episode 260	Average evaluation: 11.41
Episode 270	Average evaluation: 11.

KeyboardInterrupt: 

In [None]:
Episode 20	Average evaluation: 5.15
Episode 30	Average evaluation: 8.29
Episode 40	Average evaluation: 17.67
Episode 50	Average evaluation: 17.81
Episode 60	Average evaluation: 20.37
Episode 70	Average evaluation: 34.45
Episode 80	Average evaluation: 44.51
Episode 90	Average evaluation: 47.92
Episode 100	Average evaluation: 47.21
Episode 110	Average evaluation: 45.87
Episode 120	Average evaluation: 52.43
Episode 130	Average evaluation: 65.67
Episode 140	Average evaluation: 85.97
Episode 150	Average evaluation: 98.05
Episode 160	Average evaluation: 119.18
Episode 170	Average evaluation: 130.96
Episode 180	Average evaluation: 134.98
Episode 190	Average evaluation: 129.95
Episode 200	Average evaluation: 127.62
Episode 210	Average evaluation: 128.75
Episode 220	Average evaluation: 132.38
Episode 230	Average evaluation: 139.57
Episode 240	Average evaluation: 160.61
Episode 250	Average evaluation: 190.35
Episode 260	Average evaluation: 199.86
Episode 270	Average evaluation: 200.0
Episode 280	Average evaluation: 200.0
Episode 290	Average evaluation: 200.0
Episode 300	Average evaluation: 200.0
Episode 310	Average evaluation: 200.0
Episode 320	Average evaluation: 200.0
Episode 330	Average evaluation: 200.0
Episode 340	Average evaluation: 200.0
Episode 350	Average evaluation: 200.0
Episode 360	Average evaluation: 200.0
Episode 370	Average evaluation: 199.91
Episode 380	Average evaluation: 198.54
Episode 390	Average evaluation: 198.91
Episode 400	Average evaluation: 199.78
Episode 410	Average evaluation: 200.0
Episode 420	Average evaluation: 200.0
Episode 430	Average evaluation: 200.0
Episode 440	Average evaluation: 200.0
Episode 450	Average evaluation: 200.0
Episode 460	Average evaluation: 200.0
Episode 470	Average evaluation: 200.0
Episode 480	Average evaluation: 200.0
Episode 490	Average evaluation: 200.0
Episode 500	Average evaluation: 200.0
Episode 510	Average evaluation: 200.0
Episode 520	Average evaluation: 200.0
Episode 530	Average evaluation: 200.0
Episode 540	Average evaluation: 200.0
Episode 550	Average evaluation: 200.0
Episode 560	Average evaluation: 200.0
Episode 570	Average evaluation: 200.0
Episode 580	Average evaluation: 200.0
Episode 590	Average evaluation: 200.0
Episode 600	Average evaluation: 200.0
Episode 610	Average evaluation: 200.0
Episode 620	Average evaluation: 200.0
Episode 630	Average evaluation: 200.0
Episode 640	Average evaluation: 200.0
Episode 650	Average evaluation: 200.0
Episode 660	Average evaluation: 200.0
Episode 670	Average evaluation: 200.0
Episode 680	Average evaluation: 200.0
Episode 690	Average evaluation: 200.0
Episode 700	Average evaluation: 200.0
Episode 710	Average evaluation: 200.0
Episode 720	Average evaluation: 200.0
Episode 730	Average evaluation: 200.0
Episode 740	Average evaluation: 200.0
Episode 750	Average evaluation: 200.0
Episode 760	Average evaluation: 200.0
Episode 770	Average evaluation: 200.0
Episode 780	Average evaluation: 200.0
Episode 790	Average evaluation: 200.0
Episode 800	Average evaluation: 200.0
Best evaluation:  200.0  reached at episode  270 . Model saved in folder best.
Run # 1
Batch size larger than 1 not implemented yet. Program will continue with batch size set to 1.
Episode 10	Average evaluation: 8.09
Episode 20	Average evaluation: 9.22
Episode 30	Average evaluation: 12.2
Episode 40	Average evaluation: 16.48
Episode 50	Average evaluation: 17.8
Episode 60	Average evaluation: 16.56
Episode 70	Average evaluation: 22.35
Episode 80	Average evaluation: 34.38
Episode 90	Average evaluation: 39.36
Episode 100	Average evaluation: 58.52
Episode 110	Average evaluation: 75.76
Episode 120	Average evaluation: 82.82
Episode 130	Average evaluation: 112.31
Episode 140	Average evaluation: 118.02
Episode 150	Average evaluation: 121.6
Episode 160	Average evaluation: 118.47
Episode 170	Average evaluation: 123.13
Episode 180	Average evaluation: 137.94
Episode 190	Average evaluation: 136.79
Episode 200	Average evaluation: 141.03
Episode 210	Average evaluation: 122.75
Episode 220	Average evaluation: 151.41
Episode 230	Average evaluation: 173.54
Episode 240	Average evaluation: 186.4
Episode 250	Average evaluation: 197.94
Episode 260	Average evaluation: 200.0
Episode 270	Average evaluation: 200.0
Episode 280	Average evaluation: 198.29
Episode 290	Average evaluation: 200.0
Episode 300	Average evaluation: 198.29
Episode 310	Average evaluation: 199.16
Episode 320	Average evaluation: 200.0
Episode 330	Average evaluation: 200.0
Episode 340	Average evaluation: 200.0
Episode 350	Average evaluation: 200.0
Episode 360	Average evaluation: 200.0
Episode 370	Average evaluation: 200.0
Episode 380	Average evaluation: 200.0
Episode 390	Average evaluation: 200.0
Episode 400	Average evaluation: 200.0
Episode 410	Average evaluation: 200.0
Episode 420	Average evaluation: 200.0
Episode 430	Average evaluation: 200.0
Episode 440	Average evaluation: 200.0
Episode 450	Average evaluation: 200.0
Episode 460	Average evaluation: 200.0
Episode 470	Average evaluation: 200.0
Episode 480	Average evaluation: 200.0
Episode 490	Average evaluation: 200.0
Episode 500	Average evaluation: 200.0
Episode 510	Average evaluation: 200.0
Episode 520	Average evaluation: 200.0
Episode 530	Average evaluation: 200.0
Episode 540	Average evaluation: 200.0
Episode 550	Average evaluation: 200.0
Episode 560	Average evaluation: 200.0
Episode 570	Average evaluation: 200.0
Episode 580	Average evaluation: 200.0
Episode 590	Average evaluation: 200.0
Episode 600	Average evaluation: 200.0
Episode 610	Average evaluation: 200.0
Episode 620	Average evaluation: 200.0
Episode 630	Average evaluation: 200.0
Episode 640	Average evaluation: 200.0
Episode 650	Average evaluation: 200.0
Episode 660	Average evaluation: 200.0
Episode 670	Average evaluation: 200.0
Episode 680	Average evaluation: 198.08
Episode 690	Average evaluation: 190.5
Episode 700	Average evaluation: 199.91
Episode 710	Average evaluation: 200.0
Episode 720	Average evaluation: 200.0
Episode 730	Average evaluation: 200.0
Episode 740	Average evaluation: 200.0
Episode 750	Average evaluation: 200.0
Episode 760	Average evaluation: 200.0
Episode 770	Average evaluation: 200.0
Episode 780	Average evaluation: 200.0
Episode 790	Average evaluation: 200.0
Episode 800	Average evaluation: 200.0
Best evaluation:  200.0  reached at episode  260 . Model saved in folder best.
Run # 2
Batch size larger than 1 not implemented yet. Program will continue with batch size set to 1.
Episode 10	Average evaluation: 5.13
Episode 20	Average evaluation: 5.13
Episode 30	Average evaluation: 29.91
Episode 40	Average evaluation: 76.09
Episode 50	Average evaluation: 81.99
Episode 60	Average evaluation: 111.94
Episode 70	Average evaluation: 162.72
Episode 80	Average evaluation: 174.56
Episode 90	Average evaluation: 178.9
Episode 100	Average evaluation: 185.43
Episode 110	Average evaluation: 192.56
Episode 120	Average evaluation: 187.66
Episode 130	Average evaluation: 195.6
Episode 140	Average evaluation: 200.0
Episode 150	Average evaluation: 200.0
Episode 160	Average evaluation: 197.28
Episode 170	Average evaluation: 200.0
Episode 180	Average evaluation: 200.0
Episode 190	Average evaluation: 200.0
Episode 200	Average evaluation: 200.0
Episode 210	Average evaluation: 200.0
Episode 220	Average evaluation: 200.0
Episode 230	Average evaluation: 200.0
Episode 240	Average evaluation: 200.0
Episode 250	Average evaluation: 200.0
Episode 260	Average evaluation: 200.0
Episode 270	Average evaluation: 200.0
Episode 280	Average evaluation: 200.0
Episode 290	Average evaluation: 200.0
Episode 300	Average evaluation: 200.0
Episode 310	Average evaluation: 200.0
Episode 320	Average evaluation: 200.0
Episode 330	Average evaluation: 200.0
Episode 340	Average evaluation: 200.0
Episode 350	Average evaluation: 200.0
Episode 360	Average evaluation: 200.0
Episode 370	Average evaluation: 200.0
Episode 380	Average evaluation: 200.0
Episode 390	Average evaluation: 200.0
Episode 400	Average evaluation: 200.0
Episode 410	Average evaluation: 200.0
Episode 420	Average evaluation: 200.0
Episode 430	Average evaluation: 200.0
Episode 440	Average evaluation: 200.0
Episode 450	Average evaluation: 200.0
Episode 460	Average evaluation: 199.97
Episode 470	Average evaluation: 200.0
Episode 480	Average evaluation: 200.0
Episode 490	Average evaluation: 200.0
Episode 500	Average evaluation: 200.0
Episode 510	Average evaluation: 200.0
Episode 520	Average evaluation: 200.0
Episode 530	Average evaluation: 200.0
Episode 540	Average evaluation: 199.89
Episode 550	Average evaluation: 192.36
Episode 560	Average evaluation: 181.53
Episode 570	Average evaluation: 167.08
Episode 580	Average evaluation: 156.57
Episode 590	Average evaluation: 173.72
Episode 600	Average evaluation: 199.97
Episode 610	Average evaluation: 200.0
Episode 620	Average evaluation: 200.0
Episode 630	Average evaluation: 200.0
Episode 640	Average evaluation: 200.0
Episode 650	Average evaluation: 200.0
Episode 660	Average evaluation: 200.0
Episode 670	Average evaluation: 200.0
Episode 680	Average evaluation: 200.0
Episode 690	Average evaluation: 200.0
Episode 700	Average evaluation: 199.89
Episode 710	Average evaluation: 194.79
Episode 720	Average evaluation: 192.26
Episode 730	Average evaluation: 194.1
Episode 740	Average evaluation: 195.72
Episode 750	Average evaluation: 199.56
Episode 760	Average evaluation: 200.0
Episode 770	Average evaluation: 200.0
Episode 780	Average evaluation: 200.0
Episode 790	Average evaluation: 200.0
Episode 800	Average evaluation: 200.0
Best evaluation:  200.0  reached at episode  140 . Model saved in folder best.
Run # 3
Batch size larger than 1 not implemented yet. Program will continue with batch size set to 1.
Episode 10	Average evaluation: 62.26
Episode 20	Average evaluation: 106.35
Episode 30	Average evaluation: 109.95
Episode 40	Average evaluation: 143.07
Episode 50	Average evaluation: 192.73
Episode 60	Average evaluation: 183.61
Episode 70	Average evaluation: 198.8
Episode 80	Average evaluation: 199.06
Episode 90	Average evaluation: 190.79
Episode 100	Average evaluation: 184.59
Episode 110	Average evaluation: 194.15
Episode 120	Average evaluation: 199.59
Episode 130	Average evaluation: 200.0
Episode 140	Average evaluation: 199.88
Episode 150	Average evaluation: 199.83
Episode 160	Average evaluation: 199.7
Episode 170	Average evaluation: 200.0
Episode 180	Average evaluation: 200.0
Episode 190	Average evaluation: 200.0
Episode 200	Average evaluation: 200.0
Episode 210	Average evaluation: 200.0
Episode 220	Average evaluation: 200.0
Episode 230	Average evaluation: 200.0
Episode 240	Average evaluation: 200.0
Episode 250	Average evaluation: 200.0
Episode 260	Average evaluation: 200.0
Episode 270	Average evaluation: 200.0
Episode 280	Average evaluation: 200.0
Episode 290	Average evaluation: 200.0
Episode 300	Average evaluation: 200.0
Episode 310	Average evaluation: 200.0
Episode 320	Average evaluation: 200.0
Episode 330	Average evaluation: 200.0
Episode 340	Average evaluation: 200.0
Episode 350	Average evaluation: 200.0
Episode 360	Average evaluation: 200.0
Episode 370	Average evaluation: 200.0
Episode 380	Average evaluation: 200.0
Episode 390	Average evaluation: 200.0
Episode 400	Average evaluation: 200.0
Episode 410	Average evaluation: 200.0
Episode 420	Average evaluation: 200.0
Episode 430	Average evaluation: 200.0
Episode 440	Average evaluation: 200.0
Episode 450	Average evaluation: 199.71
Episode 460	Average evaluation: 199.86
Episode 470	Average evaluation: 199.99
Episode 480	Average evaluation: 199.77
Episode 490	Average evaluation: 200.0
Episode 500	Average evaluation: 197.61
Episode 510	Average evaluation: 198.47
Episode 520	Average evaluation: 198.77
Episode 530	Average evaluation: 199.0
Episode 540	Average evaluation: 199.49
Episode 550	Average evaluation: 199.99
Episode 560	Average evaluation: 200.0
Episode 570	Average evaluation: 200.0
Episode 580	Average evaluation: 200.0
Episode 590	Average evaluation: 200.0
Episode 600	Average evaluation: 200.0
Episode 610	Average evaluation: 200.0
Episode 620	Average evaluation: 200.0
Episode 630	Average evaluation: 200.0
Episode 640	Average evaluation: 200.0
Episode 650	Average evaluation: 200.0
Episode 660	Average evaluation: 200.0
Episode 670	Average evaluation: 200.0
Episode 680	Average evaluation: 200.0
Episode 690	Average evaluation: 200.0
Episode 700	Average evaluation: 200.0
Episode 710	Average evaluation: 200.0
Episode 720	Average evaluation: 200.0
Episode 730	Average evaluation: 199.58
Episode 740	Average evaluation: 197.24
Episode 750	Average evaluation: 192.51
Episode 760	Average evaluation: 196.22
Episode 770	Average evaluation: 198.17
Episode 780	Average evaluation: 196.84
Episode 790	Average evaluation: 192.21
Episode 800	Average evaluation: 194.86
Best evaluation:  200.0  reached at episode  130 . Model saved in folder best.
Run # 4
Batch size larger than 1 not implemented yet. Program will continue with batch size set to 1.
Episode 10	Average evaluation: 19.24
Episode 20	Average evaluation: 56.16
Episode 30	Average evaluation: 69.71
Episode 40	Average evaluation: 57.29
Episode 50	Average evaluation: 83.49
Episode 60	Average evaluation: 118.19
Episode 70	Average evaluation: 112.27
Episode 80	Average evaluation: 134.25
Episode 90	Average evaluation: 123.48
Episode 100	Average evaluation: 83.98
Episode 110	Average evaluation: 87.15
Episode 120	Average evaluation: 101.65
Episode 130	Average evaluation: 96.79
Episode 140	Average evaluation: 98.02
Episode 150	Average evaluation: 102.06
Episode 160	Average evaluation: 121.76
Episode 170	Average evaluation: 156.83
Episode 180	Average evaluation: 165.94
Episode 190	Average evaluation: 121.68
Episode 200	Average evaluation: 137.05
Episode 210	Average evaluation: 191.07
Episode 220	Average evaluation: 199.45
Episode 230	Average evaluation: 199.2
Episode 240	Average evaluation: 200.0
Episode 250	Average evaluation: 200.0
Episode 260	Average evaluation: 200.0
Episode 270	Average evaluation: 200.0
Episode 280	Average evaluation: 200.0
Episode 290	Average evaluation: 200.0
Episode 300	Average evaluation: 200.0
Episode 310	Average evaluation: 200.0
Episode 320	Average evaluation: 200.0
Episode 330	Average evaluation: 195.68
Episode 340	Average evaluation: 196.48
Episode 350	Average evaluation: 200.0
Episode 360	Average evaluation: 200.0
Episode 370	Average evaluation: 200.0
Episode 380	Average evaluation: 200.0
Episode 390	Average evaluation: 200.0
Episode 400	Average evaluation: 200.0
Episode 410	Average evaluation: 200.0
Episode 420	Average evaluation: 200.0
Episode 430	Average evaluation: 200.0
Episode 440	Average evaluation: 200.0
Episode 450	Average evaluation: 200.0
Episode 460	Average evaluation: 200.0
Episode 470	Average evaluation: 200.0
Episode 480	Average evaluation: 200.0
Episode 490	Average evaluation: 200.0
Episode 500	Average evaluation: 200.0
Episode 510	Average evaluation: 200.0
Episode 520	Average evaluation: 200.0
Episode 530	Average evaluation: 200.0
Episode 540	Average evaluation: 200.0
Episode 550	Average evaluation: 200.0
Episode 560	Average evaluation: 200.0
Episode 570	Average evaluation: 200.0
Episode 580	Average evaluation: 200.0
Episode 590	Average evaluation: 200.0
Episode 600	Average evaluation: 200.0
Episode 610	Average evaluation: 200.0
Episode 620	Average evaluation: 200.0
Episode 630	Average evaluation: 200.0
Episode 640	Average evaluation: 200.0
Episode 650	Average evaluation: 200.0
Episode 660	Average evaluation: 200.0
Episode 670	Average evaluation: 200.0
Episode 680	Average evaluation: 200.0
Episode 690	Average evaluation: 200.0
Episode 700	Average evaluation: 200.0
Episode 710	Average evaluation: 200.0
Episode 720	Average evaluation: 200.0
Episode 730	Average evaluation: 200.0
Episode 740	Average evaluation: 200.0
Episode 750	Average evaluation: 200.0
Episode 760	Average evaluation: 200.0
Episode 770	Average evaluation: 200.0
Episode 780	Average evaluation: 200.0
Episode 790	Average evaluation: 200.0
Episode 800	Average evaluation: 200.0
Best evaluation:  200.0  reached at episode  240 . Model saved in folder best.
Run # 5
Batch size larger than 1 not implemented yet. Program will continue with batch size set to 1.
Episode 10	Average evaluation: 5.15
Episode 20	Average evaluation: 34.08
Episode 30	Average evaluation: 44.71
Episode 40	Average evaluation: 47.5
Episode 50	Average evaluation: 36.61
Episode 60	Average evaluation: 33.34
Episode 70	Average evaluation: 30.53
Episode 80	Average evaluation: 31.64
Episode 90	Average evaluation: 23.93
Episode 100	Average evaluation: 23.26
Episode 110	Average evaluation: 24.4
Episode 120	Average evaluation: 24.52
Episode 130	Average evaluation: 31.69
Episode 140	Average evaluation: 46.38
Episode 150	Average evaluation: 47.75
Episode 160	Average evaluation: 56.04
Episode 170	Average evaluation: 60.99
Episode 180	Average evaluation: 73.46
Episode 190	Average evaluation: 71.43
Episode 200	Average evaluation: 88.23
Episode 210	Average evaluation: 105.02
Episode 220	Average evaluation: 99.34
Episode 230	Average evaluation: 104.19
Episode 240	Average evaluation: 111.9
Episode 250	Average evaluation: 128.4
Episode 260	Average evaluation: 139.91
Episode 270	Average evaluation: 140.92
Episode 280	Average evaluation: 140.77
Episode 290	Average evaluation: 140.68
Episode 300	Average evaluation: 145.06
Episode 310	Average evaluation: 154.79
Episode 320	Average evaluation: 148.87
Episode 330	Average evaluation: 139.44
Episode 340	Average evaluation: 146.93
Episode 350	Average evaluation: 154.91
Episode 360	Average evaluation: 177.09
Episode 370	Average evaluation: 198.4
Episode 380	Average evaluation: 200.0
Episode 390	Average evaluation: 199.37
Episode 400	Average evaluation: 199.66
Episode 410	Average evaluation: 199.94
Episode 420	Average evaluation: 200.0
Episode 430	Average evaluation: 200.0
Episode 440	Average evaluation: 199.23
Episode 450	Average evaluation: 199.34
Episode 460	Average evaluation: 199.81
Episode 470	Average evaluation: 198.78
Episode 480	Average evaluation: 199.8
Episode 490	Average evaluation: 200.0
Episode 500	Average evaluation: 200.0
Episode 510	Average evaluation: 200.0
Episode 520	Average evaluation: 200.0
Episode 530	Average evaluation: 200.0
Episode 540	Average evaluation: 200.0
Episode 550	Average evaluation: 200.0
Episode 560	Average evaluation: 200.0
Episode 570	Average evaluation: 199.54
Episode 580	Average evaluation: 200.0
Episode 590	Average evaluation: 200.0
Episode 600	Average evaluation: 200.0
Episode 610	Average evaluation: 200.0
Episode 620	Average evaluation: 200.0
Episode 630	Average evaluation: 200.0
Episode 640	Average evaluation: 200.0
Episode 650	Average evaluation: 200.0
Episode 660	Average evaluation: 200.0
Episode 670	Average evaluation: 200.0
Episode 680	Average evaluation: 200.0
Episode 690	Average evaluation: 200.0
Episode 700	Average evaluation: 200.0
Episode 710	Average evaluation: 200.0
Episode 720	Average evaluation: 200.0
Episode 730	Average evaluation: 200.0
Episode 740	Average evaluation: 200.0
Episode 750	Average evaluation: 200.0
Episode 760	Average evaluation: 200.0
Episode 770	Average evaluation: 200.0
Episode 780	Average evaluation: 200.0
Episode 790	Average evaluation: 200.0
Episode 800	Average evaluation: 200.0
Best evaluation:  200.0  reached at episode  380 . Model saved in folder best.
Run # 6
Batch size larger than 1 not implemented yet. Program will continue with batch size set to 1.
Episode 10	Average evaluation: 12.04
Episode 20	Average evaluation: 78.5
Episode 30	Average evaluation: 155.05
Episode 40	Average evaluation: 196.97
Episode 50	Average evaluation: 199.99
Episode 60	Average evaluation: 195.32
Episode 70	Average evaluation: 188.88
Episode 80	Average evaluation: 194.91
Episode 90	Average evaluation: 198.4
Episode 100	Average evaluation: 194.65
Episode 110	Average evaluation: 189.73
Episode 120	Average evaluation: 196.91
Episode 130	Average evaluation: 198.23
Episode 140	Average evaluation: 190.02
Episode 150	Average evaluation: 175.94
Episode 160	Average evaluation: 172.23
Episode 170	Average evaluation: 168.36
Episode 180	Average evaluation: 154.83
Episode 190	Average evaluation: 152.99
Episode 200	Average evaluation: 147.55
Episode 210	Average evaluation: 157.77
Episode 220	Average evaluation: 165.73
Episode 230	Average evaluation: 169.79
Episode 240	Average evaluation: 173.47
Episode 250	Average evaluation: 179.23
Episode 260	Average evaluation: 185.51
Episode 270	Average evaluation: 191.76
Episode 280	Average evaluation: 196.26
Episode 290	Average evaluation: 199.61
Episode 300	Average evaluation: 199.76
Episode 310	Average evaluation: 200.0
Episode 320	Average evaluation: 199.94
Episode 330	Average evaluation: 199.86
Episode 340	Average evaluation: 199.69
Episode 350	Average evaluation: 194.57
Episode 360	Average evaluation: 179.61
Episode 370	Average evaluation: 170.58
Episode 380	Average evaluation: 161.81
Episode 390	Average evaluation: 168.45
Episode 400	Average evaluation: 180.38
Episode 410	Average evaluation: 171.83
Episode 420	Average evaluation: 166.48
Episode 430	Average evaluation: 163.02
Episode 440	Average evaluation: 176.05
Episode 450	Average evaluation: 192.52
Episode 460	Average evaluation: 190.57
Episode 470	Average evaluation: 181.07
Episode 480	Average evaluation: 183.13
Episode 490	Average evaluation: 173.32
Episode 500	Average evaluation: 172.22
Episode 510	Average evaluation: 186.39
Episode 520	Average evaluation: 196.87
Episode 530	Average evaluation: 195.51
Episode 540	Average evaluation: 199.04
Episode 550	Average evaluation: 199.89
Episode 560	Average evaluation: 199.83
Episode 570	Average evaluation: 199.26
Episode 580	Average evaluation: 196.0
Episode 590	Average evaluation: 192.57
Episode 600	Average evaluation: 182.84
Episode 610	Average evaluation: 190.91
Episode 620	Average evaluation: 194.68
Episode 630	Average evaluation: 194.93
Episode 640	Average evaluation: 198.32
Episode 650	Average evaluation: 199.49
Episode 660	Average evaluation: 199.9
Episode 670	Average evaluation: 199.9
Episode 680	Average evaluation: 199.98
Episode 690	Average evaluation: 200.0
Episode 700	Average evaluation: 199.99
Episode 710	Average evaluation: 199.76
Episode 720	Average evaluation: 199.77
Episode 730	Average evaluation: 200.0
Episode 740	Average evaluation: 200.0
Episode 750	Average evaluation: 200.0
Episode 760	Average evaluation: 200.0
Episode 770	Average evaluation: 200.0
Episode 780	Average evaluation: 200.0
Episode 790	Average evaluation: 200.0
Episode 800	Average evaluation: 200.0
Best evaluation:  200.0  reached at episode  310 . Model saved in folder best.
Run # 7
Batch size larger than 1 not implemented yet. Program will continue with batch size set to 1.
Episode 10	Average evaluation: 198.94
Episode 20	Average evaluation: 199.54
Episode 30	Average evaluation: 199.83
Episode 40	Average evaluation: 200.0
Episode 50	Average evaluation: 200.0
Episode 60	Average evaluation: 200.0
Episode 70	Average evaluation: 200.0
Episode 80	Average evaluation: 200.0
Episode 90	Average evaluation: 200.0
Episode 100	Average evaluation: 200.0
Episode 110	Average evaluation: 200.0
Episode 120	Average evaluation: 200.0
Episode 130	Average evaluation: 200.0
Episode 140	Average evaluation: 200.0
Episode 150	Average evaluation: 200.0
Episode 160	Average evaluation: 200.0
Episode 170	Average evaluation: 200.0
Episode 180	Average evaluation: 200.0
Episode 190	Average evaluation: 200.0
Episode 200	Average evaluation: 200.0
Episode 210	Average evaluation: 200.0
Episode 220	Average evaluation: 200.0
Episode 230	Average evaluation: 200.0
Episode 240	Average evaluation: 200.0
Episode 250	Average evaluation: 200.0
Episode 260	Average evaluation: 200.0
Episode 270	Average evaluation: 200.0
Episode 280	Average evaluation: 200.0
Episode 290	Average evaluation: 200.0
Episode 300	Average evaluation: 200.0
Episode 310	Average evaluation: 200.0
Episode 320	Average evaluation: 200.0
Episode 330	Average evaluation: 200.0
Episode 340	Average evaluation: 200.0
Episode 350	Average evaluation: 200.0
Episode 360	Average evaluation: 200.0
Episode 370	Average evaluation: 200.0
Episode 380	Average evaluation: 200.0
Episode 390	Average evaluation: 200.0
Episode 400	Average evaluation: 200.0
Episode 410	Average evaluation: 200.0
Episode 420	Average evaluation: 200.0
Episode 430	Average evaluation: 200.0
Episode 440	Average evaluation: 200.0
Episode 450	Average evaluation: 200.0
Episode 460	Average evaluation: 200.0
Episode 470	Average evaluation: 200.0
Episode 480	Average evaluation: 200.0
Episode 490	Average evaluation: 200.0
Episode 500	Average evaluation: 200.0
Episode 510	Average evaluation: 200.0
Episode 520	Average evaluation: 200.0
Episode 530	Average evaluation: 200.0
Episode 540	Average evaluation: 200.0
Episode 550	Average evaluation: 200.0
Episode 560	Average evaluation: 200.0
Episode 570	Average evaluation: 200.0
Episode 580	Average evaluation: 200.0
Episode 590	Average evaluation: 200.0
Episode 600	Average evaluation: 200.0
Episode 610	Average evaluation: 200.0
Episode 620	Average evaluation: 200.0
Episode 630	Average evaluation: 200.0
Episode 640	Average evaluation: 200.0
Episode 650	Average evaluation: 200.0
Episode 660	Average evaluation: 199.81
Episode 670	Average evaluation: 200.0
Episode 680	Average evaluation: 200.0
Episode 690	Average evaluation: 200.0
Episode 700	Average evaluation: 200.0
Episode 710	Average evaluation: 200.0
Episode 720	Average evaluation: 200.0
Episode 730	Average evaluation: 200.0
Episode 740	Average evaluation: 200.0
Episode 750	Average evaluation: 200.0
Episode 760	Average evaluation: 200.0
Episode 770	Average evaluation: 200.0
Episode 780	Average evaluation: 200.0
Episode 790	Average evaluation: 200.0
Episode 800	Average evaluation: 200.0
Best evaluation:  200.0  reached at episode  40 . Model saved in folder best.
Run # 8
Batch size larger than 1 not implemented yet. Program will continue with batch size set to 1.
Episode 10	Average evaluation: 200.0
Episode 20	Average evaluation: 200.0
Episode 30	Average evaluation: 200.0
Episode 40	Average evaluation: 200.0
Episode 50	Average evaluation: 200.0
Episode 60	Average evaluation: 200.0
Episode 70	Average evaluation: 200.0
Episode 80	Average evaluation: 200.0
Episode 90	Average evaluation: 200.0
Episode 100	Average evaluation: 200.0
Episode 110	Average evaluation: 200.0
Episode 120	Average evaluation: 200.0
Episode 130	Average evaluation: 200.0
Episode 140	Average evaluation: 200.0
Episode 150	Average evaluation: 200.0
Episode 160	Average evaluation: 200.0
Episode 170	Average evaluation: 200.0
Episode 180	Average evaluation: 200.0
Episode 190	Average evaluation: 200.0
Episode 200	Average evaluation: 200.0
Episode 210	Average evaluation: 200.0
Episode 220	Average evaluation: 200.0
Episode 230	Average evaluation: 200.0
Episode 240	Average evaluation: 200.0
Episode 250	Average evaluation: 200.0
Episode 260	Average evaluation: 200.0
Episode 270	Average evaluation: 200.0
Episode 280	Average evaluation: 200.0
Episode 290	Average evaluation: 199.99
Episode 300	Average evaluation: 195.39
Episode 310	Average evaluation: 175.38
Episode 320	Average evaluation: 168.51
Episode 330	Average evaluation: 186.54
Episode 340	Average evaluation: 197.99
Episode 350	Average evaluation: 199.95
Episode 360	Average evaluation: 200.0
Episode 370	Average evaluation: 200.0
Episode 380	Average evaluation: 200.0
Episode 390	Average evaluation: 200.0
Episode 400	Average evaluation: 200.0
Episode 410	Average evaluation: 200.0
Episode 420	Average evaluation: 200.0
Episode 430	Average evaluation: 200.0
Episode 440	Average evaluation: 200.0
Episode 450	Average evaluation: 200.0
Episode 460	Average evaluation: 200.0
Episode 470	Average evaluation: 199.75
Episode 480	Average evaluation: 196.06
Episode 490	Average evaluation: 199.87
Episode 500	Average evaluation: 191.27
Episode 510	Average evaluation: 195.11
Episode 520	Average evaluation: 198.39
Episode 530	Average evaluation: 193.69
Episode 540	Average evaluation: 198.24
Episode 550	Average evaluation: 198.83
Episode 560	Average evaluation: 198.28
Episode 570	Average evaluation: 196.57
Episode 580	Average evaluation: 198.99
Episode 590	Average evaluation: 200.0
Episode 600	Average evaluation: 200.0
Episode 610	Average evaluation: 200.0
Episode 620	Average evaluation: 200.0
Episode 630	Average evaluation: 200.0
Episode 640	Average evaluation: 200.0
Episode 650	Average evaluation: 200.0
Episode 660	Average evaluation: 200.0
Episode 670	Average evaluation: 200.0
Episode 680	Average evaluation: 200.0
Episode 690	Average evaluation: 199.3
Episode 700	Average evaluation: 195.97
Episode 710	Average evaluation: 198.52
Episode 720	Average evaluation: 199.14
Episode 730	Average evaluation: 199.87
Episode 740	Average evaluation: 200.0
Episode 750	Average evaluation: 200.0
Episode 760	Average evaluation: 200.0
Episode 770	Average evaluation: 200.0
Episode 780	Average evaluation: 199.86
Episode 790	Average evaluation: 199.81
Episode 800	Average evaluation: 199.98
Best evaluation:  200.0  reached at episode  10 . Model saved in folder best.
Run # 9
Batch size larger than 1 not implemented yet. Program will continue with batch size set to 1.
Episode 10	Average evaluation: 148.11
Episode 20	Average evaluation: 198.52
Episode 30	Average evaluation: 200.0
Episode 40	Average evaluation: 200.0
Episode 50	Average evaluation: 200.0
Episode 60	Average evaluation: 200.0
Episode 70	Average evaluation: 199.28
Episode 80	Average evaluation: 189.9
Episode 90	Average evaluation: 179.98
Episode 100	Average evaluation: 172.94
Episode 110	Average evaluation: 174.96
Episode 120	Average evaluation: 184.36
Episode 130	Average evaluation: 180.21
Episode 140	Average evaluation: 185.77
Episode 150	Average evaluation: 191.11
Episode 160	Average evaluation: 199.52
Episode 170	Average evaluation: 200.0
Episode 180	Average evaluation: 200.0
Episode 190	Average evaluation: 200.0
Episode 200	Average evaluation: 200.0
Episode 210	Average evaluation: 199.82
Episode 220	Average evaluation: 199.69
Episode 230	Average evaluation: 199.45
Episode 240	Average evaluation: 198.91
Episode 250	Average evaluation: 197.34
Episode 260	Average evaluation: 200.0
Episode 270	Average evaluation: 200.0
Episode 280	Average evaluation: 200.0
Episode 290	Average evaluation: 200.0
Episode 300	Average evaluation: 200.0
Episode 310	Average evaluation: 200.0
Episode 320	Average evaluation: 200.0
Episode 330	Average evaluation: 200.0
Episode 340	Average evaluation: 200.0
Episode 350	Average evaluation: 200.0
Episode 360	Average evaluation: 200.0
Episode 370	Average evaluation: 200.0
Episode 380	Average evaluation: 200.0
Episode 390	Average evaluation: 200.0
Episode 400	Average evaluation: 200.0
Episode 410	Average evaluation: 200.0
Episode 420	Average evaluation: 199.85
Episode 430	Average evaluation: 199.81
Episode 440	Average evaluation: 200.0
Episode 450	Average evaluation: 200.0
Episode 460	Average evaluation: 200.0
Episode 470	Average evaluation: 200.0
Episode 480	Average evaluation: 200.0
Episode 490	Average evaluation: 200.0
Episode 500	Average evaluation: 200.0
Episode 510	Average evaluation: 200.0
Episode 520	Average evaluation: 200.0
Episode 530	Average evaluation: 200.0
Episode 540	Average evaluation: 200.0
Episode 550	Average evaluation: 200.0
Episode 560	Average evaluation: 200.0
Episode 570	Average evaluation: 200.0
Episode 580	Average evaluation: 200.0
Episode 590	Average evaluation: 200.0
Episode 600	Average evaluation: 200.0
Episode 610	Average evaluation: 200.0
Episode 620	Average evaluation: 200.0
Episode 630	Average evaluation: 200.0
Episode 640	Average evaluation: 200.0
Episode 650	Average evaluation: 200.0
Episode 660	Average evaluation: 200.0
Episode 670	Average evaluation: 200.0
Episode 680	Average evaluation: 200.0
Episode 690	Average evaluation: 200.0
Episode 700	Average evaluation: 200.0
Episode 710	Average evaluation: 200.0
Episode 720	Average evaluation: 200.0
Episode 730	Average evaluation: 200.0
Episode 740	Average evaluation: 200.0
Episode 750	Average evaluation: 199.73
Episode 760	Average evaluation: 199.82
Episode 770	Average evaluation: 200.0
Episode 780	Average evaluation: 200.0
Episode 790	Average evaluation: 200.0
Episode 800	Average evaluation: 200.0
Best evaluation:  200.0  reached at episode  30 . Model saved in folder best.