In [11]:
import torch
import numpy as np
import random
import os
from datetime import date

from backpropamine_A2C import BP_RNetwork, Standard_RNetwork, Standard_FFNetwork
from BP_A2C.BP_A2C_agent import A2C_Agent

%matplotlib inline

In [12]:
# Environment specific parameters
env_name = 'CartPole-v0'
n_runs = 10
n_evaluations = 100
max_steps = 200
num_training_episodes = 2000
num_evaluation_episodes = 100
max_reward = 200

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [13]:
# A2C hyperparameters
entropy_coef = 0.03 
value_pred_coef = 0.1 
gammaR = 0.99
max_grad_norm = 4.0
batch_size = 128
print_every = 10
save_every = 50

In [14]:
# Adam hyperparameters
learning_rate = 1e-4 # For Adam optimizer
l2_coef = 0

In [15]:
selection_method = "evaluation"
training_method = "range"
range_min = 0.9
range_max = 1.1

In [16]:
training_seeds = np.load('rstdp_cartpole_stuff/seeds/training_seeds.npy')
evaluation_seeds = np.load('rstdp_cartpole_stuff/seeds/evaluation_seeds.npy')

In [8]:
# Create Results Directory
dirs = os.listdir('./BP_A2C/training_results/')
if not any('a2c_result' in d for d in dirs):
    result_id = 1
else:
    results = [d for d in dirs if 'a2c_result' in d]
    result_id = len(results) + 1

# Get today's date and add it to the results directory
d = date.today()
result_dir = 'BP_A2C/training_results/Standard_MLP_a2c_result_' + str(result_id) + "_{}_entropycoef_{}_valuepredcoef_{}_batchsize_{}_maxsteps_{}_\
maxgradnorm_{}_gammaR_{}_l2coef_{}_learningrate_{}_numtrainepisodes_{}_selectionmethod_{}_trainingmethod_{}".format(
    str(d.year) + str(d.month) + str(d.day), entropy_coef, value_pred_coef, batch_size, max_steps, max_grad_norm, gammaR,
    l2_coef, learning_rate, num_training_episodes, selection_method, training_method)
if training_method == "range":
    result_dir += "_rangemin_{}_rangemax_{}".format(range_min, range_max)

os.mkdir(result_dir)
print('Created Directory {} to store the results in'.format(result_dir))

Created Directory BP_A2C/training_results/Standard_MLP_a2c_result_12_20231024_entropycoef_0.03_valuepredcoef_0.1_batchsize_128_maxsteps_200_maxgradnorm_4.0_gammaR_0.99_l2coef_0_learningrate_0.0001_numtrainepisodes_10000_selectionmethod_evaluation_trainingmethod_range_rangemin_0.9_rangemax_1.1 to store the results in


In [17]:
smoothed_scores_dqn_all = []
dqn_completion_after = []

for i_run in range(n_runs):
    print("Run # {}".format(i_run))
    seed = int(training_seeds[i_run])
    
    torch.manual_seed(seed)
    random.seed(seed)

    agent_net = Standard_FFNetwork(4, 64, 64, 2, seed)
    
    # optimizer = torch.optim.Adam(agent_net.parameters(), lr=1.0*learning_rate, eps=1e-4, weight_decay=l2_coef)
    optimizer = torch.optim.Adam(agent_net.parameters(), lr = learning_rate)
    agent = A2C_Agent(env_name, seed, agent_net, entropy_coef, value_pred_coef, gammaR,
                      max_grad_norm, max_steps, batch_size, num_training_episodes, optimizer, print_every,
                      save_every, i_run, result_dir, selection_method, num_evaluation_episodes, evaluation_seeds, max_reward)

    smoothed_scores, scores, best_average, best_average_after = agent.train_agent_on_range(range_min, range_max)


Run # 0
Batch size larger than 1 not implemented yet. Program will continue with batch size set to 1.
Episode 10	Average evaluation: 9.95
Episode 20	Average evaluation: 10.79
Episode 30	Average evaluation: 9.65
Episode 40	Average evaluation: 17.96
Episode 50	Average evaluation: 40.47
Episode 60	Average evaluation: 56.19
Episode 70	Average evaluation: 81.72
Episode 80	Average evaluation: 49.6
Episode 90	Average evaluation: 78.09
Episode 100	Average evaluation: 77.51
Episode 110	Average evaluation: 52.38
Episode 120	Average evaluation: 43.67
Episode 130	Average evaluation: 70.29
Episode 140	Average evaluation: 81.49
Episode 150	Average evaluation: 81.0
Episode 160	Average evaluation: 96.68
Episode 170	Average evaluation: 67.26
Episode 180	Average evaluation: 51.35
Episode 190	Average evaluation: 47.34
Episode 200	Average evaluation: 54.67
Episode 210	Average evaluation: 51.27
Episode 220	Average evaluation: 48.25
Episode 230	Average evaluation: 40.85
Episode 240	Average evaluation: 33.49