In [1]:
import torch
import numpy as np
import random
import os
from datetime import date

from BP_A2C.backpropamine_A2C import BP_RNetwork, Standard_RNetwork, Standard_FFNetwork
from BP_A2C.BP_A2C_agent import A2C_Agent

%matplotlib inline

In [2]:
# Environment specific parameters
env_name = 'CartPole-v0'
n_runs = 10
n_evaluations = 100
max_steps = 200
num_training_episodes = 20000
num_evaluation_episodes = 100
max_reward = 200

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
# A2C hyperparameters
entropy_coef = 0.03 
value_pred_coef = 0.1 
gammaR = 0.99
max_grad_norm = 4.0
batch_size = 128
print_every = 10
save_every = 50

In [4]:
# Adam hyperparameters
learning_rate = 1e-4 # For Adam optimizer
l2_coef = 0

In [5]:
selection_method = "evaluation"
training_method = "range"
range_min = 0.7
range_max = 9.0

In [6]:
training_seeds = np.load('rstdp_cartpole_stuff/seeds/training_seeds.npy')
evaluation_seeds = np.load('rstdp_cartpole_stuff/seeds/evaluation_seeds.npy')

In [7]:
# Create Results Directory
dirs = os.listdir('./BP_A2C/training_results/')
if not any('a2c_result' in d for d in dirs):
    result_id = 1
else:
    results = [d for d in dirs if 'a2c_result' in d]
    result_id = len(results) + 1

# Get today's date and add it to the results directory
d = date.today()
result_dir = 'BP_A2C/training_results/TESTBP_RNN_a2c_result_' + str(result_id) + "_{}_entropycoef_{}_valuepredcoef_{}_batchsize_{}_maxsteps_{}_\
maxgradnorm_{}_gammaR_{}_l2coef_{}_learningrate_{}_numtrainepisodes_{}_selectionmethod_{}_trainingmethod_{}".format(
    str(d.year) + str(d.month) + str(d.day), entropy_coef, value_pred_coef, batch_size, max_steps, max_grad_norm, gammaR,
    l2_coef, learning_rate, num_training_episodes, selection_method, training_method)
if training_method == "range":
    result_dir += "_rangemin_{}_rangemax_{}".format(range_min, range_max)

os.mkdir(result_dir)
print('Created Directory {} to store the results in'.format(result_dir))

Created Directory BP_A2C/training_results/TESTBP_RNN_a2c_result_21_20231031_entropycoef_0.03_valuepredcoef_0.1_batchsize_128_maxsteps_200_maxgradnorm_4.0_gammaR_0.99_l2coef_0_learningrate_0.0001_numtrainepisodes_20000_selectionmethod_evaluation_trainingmethod_range_rangemin_0.7_rangemax_9.0 to store the results in


In [8]:
smoothed_scores_dqn_all = []
dqn_completion_after = []
best_average_after_all = []
# for i_run in [6, 7]:
i_run = 2
print("Run # {}".format(i_run))
seed = int(training_seeds[i_run]/3) #REMOVE THIS 2 LATER

torch.manual_seed(seed)
random.seed(seed)

agent_net = BP_RNetwork(4, 64, 2, seed).to(device)

# optimizer = torch.optim.Adam(agent_net.parameters(), lr=1.0*learning_rate, eps=1e-4, weight_decay=l2_coef)
optimizer = torch.optim.Adam(agent_net.parameters(), lr = learning_rate)
agent = A2C_Agent(env_name, seed, agent_net, entropy_coef, value_pred_coef, gammaR,
                    max_grad_norm, max_steps, batch_size, num_training_episodes, optimizer, print_every,
                    save_every, i_run, result_dir, selection_method, num_evaluation_episodes, evaluation_seeds, max_reward)

smoothed_scores, scores, best_average, best_average_after = agent.train_agent()
best_average_after_all.append(best_average_after)



Run # 2
Batch size larger than 1 not implemented yet. Program will continue with batch size set to 1.
0
0
0




TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

In [None]:
with open(f"{result_dir}/best_average_after.txt", 'w') as f:
    for i, best_episode in enumerate(best_average_after_all):
        f.write(f"{i}: {best_episode}\n")

    f.write(f"Average: {np.mean(best_average_after_all)}, std dev: {np.std(best_average_after_all)}")