In [1]:
import torch
import numpy as np
from collections import deque
import matplotlib.pyplot as plt
%matplotlib inline

from IPython import display
from unityagents import UnityEnvironment

from utils import create_next_id_folder
from ddpg_agent_multi import SharedAgent

In [2]:
# Create simulation environment
env = UnityEnvironment(file_name='Reacher_Windows_x86_64_20/Reacher.exe')
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

INFO:unityagents:
'Academy' started successfully!
Unity Academy name: Academy
        Number of Brains: 1
        Number of External Brains : 1
        Lesson number : 0
        Reset Parameters :
		goal_size -> 5.0
		goal_speed -> 1.0
Unity brain name: ReacherBrain
        Number of Visual Observations (per agent): 0
        Vector Observation space type: continuous
        Vector Observation space size (per agent): 33
        Number of stacked Vector Observation: 1
        Vector Action space type: continuous
        Vector Action space size (per agent): 4
        Vector Action descriptions: , , , 


In [3]:
# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of agents
num_agents = len(env_info.agents)
print('Number of agents:', num_agents)

# size of each action
action_size = brain.vector_action_space_size
print('Size of each action:', action_size)

# examine the state space 
states = env_info.vector_observations
state_size = states.shape[1]
print('There are {} agents. Each observes a state with length: {}'.format(states.shape[0], state_size))
print('The state for the first agent looks like:', states[0])

Number of agents: 20
Size of each action: 4
There are 20 agents. Each observes a state with length: 33
The state for the first agent looks like: [ 0.00000000e+00 -4.00000000e+00  0.00000000e+00  1.00000000e+00
 -0.00000000e+00 -0.00000000e+00 -4.37113883e-08  0.00000000e+00
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00 -1.00000000e+01  0.00000000e+00
  1.00000000e+00 -0.00000000e+00 -0.00000000e+00 -4.37113883e-08
  0.00000000e+00  0.00000000e+00  0.00000000e+00  0.00000000e+00
  0.00000000e+00  0.00000000e+00  5.75471878e+00 -1.00000000e+00
  5.55726624e+00  0.00000000e+00  1.00000000e+00  0.00000000e+00
 -1.68164849e-01]


In [4]:
states.shape

(20, 33)

In [5]:
states.shape[1]

33

In [6]:
len(env_info.rewards)

20

In [7]:
# Initialize Feed-forward DNNs for Actor and Critic models. 
agent = SharedAgent(state_size=state_size, action_size=action_size, random_seed=0)

env_info = env.reset(train_mode=False)[brain_name]     # reset the environment    
states = env_info.vector_observations                  # get the current state (for each agent)
scores = np.zeros(num_agents)                          # initialize the score (for each agent)
steps = 0
while True:
    steps += 1 
    actions = np.random.randn(num_agents, action_size) # select an action (for each agent)
    actions = np.clip(actions, -1, 1)                  # all actions between -1 and 1
    env_info = env.step(actions)[brain_name]           # send all actions to tne environment
    next_states = env_info.vector_observations         # get next state (for each agent)
    rewards = env_info.rewards                         # get reward (for each agent)
    dones = env_info.local_done                        # see if episode finished
    scores += env_info.rewards                         # update the score (for each agent)
    states = next_states                               # roll over states to next time step
    if np.any(dones):                                  # exit loop if episode finished
        break
print('Total score (averaged over agents) this episode: {}'.format(np.mean(scores)))

Total score (averaged over agents) this episode: 0.1769999960437417


In [8]:
scores

array([0.43999999, 0.31999999, 0.        , 0.        , 0.34999999,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.47999999, 0.22999999, 0.        , 0.26999999,
       0.38999999, 0.38999999, 0.        , 0.46999999, 0.2       ])

In [9]:
scores / steps

array([0.00043956, 0.00031968, 0.        , 0.        , 0.00034965,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.00047952, 0.00022977, 0.        , 0.00026973,
       0.00038961, 0.00038961, 0.        , 0.00046953, 0.0001998 ])

In [10]:
import os
import json

def ddpg(env, 
        state_size, 
        action_size, 
        random_seed,
        num_agents,
        n_episodes=2000, 
        max_t=1000, 
        actor_model_save_path='actor.pth',
        critic_model_save_path='critic.pth',
        BUFFER_SIZE = int(1e4),  # replay buffer size
        BATCH_SIZE = 128,        # minibatch size
        GAMMA = 0.99,            # discount factor
        TAU = 1e-3,              # for soft update of target parameters
        LR_ACTOR = 1e-4,         # learning rate of the actor 
        LR_CRITIC = 1e-3,        # learning rate of the critic
        WEIGHT_DECAY = 0,        # L2 weight decay
        learn_every=20, 
        learn_iterations=10,
        continue_from=None,
        extra_episodes=0,
        save_every=100,
        ):
    """Deep Q-Learning.
    
    Params
    ======
        n_episodes (int): maximum number of training episodes
        max_t (int): maximum number of timesteps per episode
    """
    scores_agents = []                        # list containing scores from each episode
    scores_window = deque(maxlen=100)  # last 100 scores
    agent = SharedAgent(state_size=state_size, action_size=action_size,
                    random_seed=random_seed,
                    BUFFER_SIZE=BUFFER_SIZE,                  
                    BATCH_SIZE=BATCH_SIZE,
                    GAMMA=GAMMA,
                    TAU=TAU,
                    LR_ACTOR=LR_ACTOR,
                    LR_CRITIC=LR_CRITIC,
                    WEIGHT_DECAY=WEIGHT_DECAY,
                  )
    
    if continue_from is not None:
        agent.actor_local.load_state_dict(torch.load(os.path.join(f'{continue_from}','actor.pth')))
        agent.critic_local.load_state_dict(torch.load(os.path.join(f'{continue_from}','critic.pth')))
        with open(os.path.join(continue_from, "scores.json"), 'r') as file:
            scores_agents = json.load(file)
            for score in scores_agents[-100:]:
                scores_window.append(np.array(score))

    for i_episode in range(1, n_episodes+1):

        env_info = env.reset(train_mode=True)[brain_name]
        states = env_info.vector_observations                  # get the current state
        scores = np.zeros(num_agents)
        t_to_learn = 0
        for t in range(max_t):
            actions = [agent.act(state) for state in states]

            # actions = agent.act(states)                        # select an action (for each agent)
            actions = np.clip(actions, -1, 1)                  # all actions between -1 and 1
            env_info = env.step(actions)[brain_name]           # send all actions to tne environment
            next_states = env_info.vector_observations         # get next state (for each agent)
            rewards = env_info.rewards                         # get reward (for each agent)
            dones = env_info.local_done                        # see if episode finished

            scores += env_info.rewards

            # Step all experiences
            for i in range(num_agents):
                agent.step(states[i], actions[i], rewards[i], next_states[i], dones[i])
            # agent.step(states, actions, rewards, next_states, dones)
            t_to_learn += 1

            if t_to_learn >= learn_every:
                # Learn 20 times
                agent.learn_multiple(num_updates=learn_iterations)
                t_to_learn = 0

            states = next_states                               # roll over states to next time step
            if np.any(dones):                                  # exit loop if episode finished
                break

        scores_window.append(scores)                      # save most recent score
        scores_agents.append(scores)                      # save most recent score
        
        column_means_np = np.array(scores_window).mean(axis=0)

        column_means_list = list(column_means_np)

        # Calculate the mean of these column means
        mean_of_column_means_np = column_means_np.mean()

        print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, mean_of_column_means_np), column_means_list, end="")
        if i_episode % 100 == 0:
            print('\rEpisode {}\tAverage Score: {:.2f}'.format(i_episode, mean_of_column_means_np), column_means_list)

        if i_episode % save_every == 0:
            torch.save(agent.actor_local.state_dict(), actor_model_save_path)
            torch.save(agent.critic_local.state_dict(), critic_model_save_path)
            list_of_python_lists = []
            for arr in scores:
                list_of_python_lists.append(arr.tolist())

            with open(os.path.join(actor_model_save_path[:-9], 'scores.json'), 'w') as f:
                json.dump(list_of_python_lists, f, indent=4) # indent makes it human-readable

    torch.save(agent.actor_local.state_dict(), actor_model_save_path)
    torch.save(agent.critic_local.state_dict(), critic_model_save_path)
    return scores_agents

In [12]:
import os
import json
random_seed = 0

base_path = 'ddpg_trials'
combinations = [{
    #     "n_episodes": 200,
    #     "max_t": 1000,
    #     "BUFFER_SIZE": int(1e4),
    #     "BATCH_SIZE": 128,
    #     "GAMMA": 0.99,
    #     "TAU": 1e-3,
    #     "LR_ACTOR": 1e-4,
    #     "LR_CRITIC": 1e-3,
    #     "WEIGHT_DECAY": 0,
    #     "learn_every": 20,
    #     "learn_iterations": 10,
    # },
    # {
    #     "n_episodes": 200,
    #     "max_t": 1000,
    #     "BUFFER_SIZE": int(1e4),
    #     "BATCH_SIZE": 128,
    #     "GAMMA": 0.99,
    #     "TAU": 1e-3,
    #     "LR_ACTOR": 1e-3, # Changed
    #     "LR_CRITIC": 1e-3,
    #     "WEIGHT_DECAY": 0,
    #     "learn_every": 20,
    #     "learn_iterations": 10,
    # },
    #     "n_episodes": 200,
    #     "max_t": 1000,
    #     "BUFFER_SIZE": int(1e10), # Changed 100000
    #     "BATCH_SIZE": 256, # Changed
    #     "GAMMA": 0.95, # Changed
    #     "TAU": 1e-3, # Changed
    #     "LR_ACTOR": 5e-5, # Changed
    #     "LR_CRITIC": 5e-4,
    #     "WEIGHT_DECAY": 0.01,
    #     "learn_every": 20,
    #     "learn_iterations": 10,
    # },
    # {
        "n_episodes": 500,
        "max_t": 1000,
        "BUFFER_SIZE": int(1e6),
        "BATCH_SIZE": 128,
        "GAMMA": 0.99,
        "TAU": 1e-3,
        "LR_ACTOR": 1e-4,
        "LR_CRITIC": 1e-3,
        "WEIGHT_DECAY": 0,
        "learn_every": 20,
        "learn_iterations": 20,
    },
 ]

for properties in combinations:
    new_path = create_next_id_folder(base_path)
    scores = ddpg(env, 
        state_size, 
        action_size, 
        random_seed,
        num_agents=num_agents,
        n_episodes=properties["n_episodes"],
        max_t=properties["max_t"],
        actor_model_save_path=os.path.join(new_path, 'actor.pth'),
        critic_model_save_path=os.path.join(new_path, 'critic.pth'),
        BUFFER_SIZE=properties["BUFFER_SIZE"],
        BATCH_SIZE=properties["BATCH_SIZE"],
        GAMMA=properties["GAMMA"],
        TAU=properties["TAU"],
        LR_ACTOR=properties["LR_ACTOR"],
        LR_CRITIC=properties["LR_CRITIC"],
        WEIGHT_DECAY=properties["WEIGHT_DECAY"],
        learn_every=properties["learn_every"],
        learn_iterations=properties["learn_iterations"],
        )
    
    with open(os.path.join(new_path, 'parameters.json'), 'w') as json_file:
        json.dump(properties, json_file,  indent=4)

    list_of_python_lists = []
    for arr in scores:
        list_of_python_lists.append(arr.tolist())

    with open(os.path.join(new_path, 'scores.json'), 'w') as f:
        json.dump(list_of_python_lists, f, indent=4) # indent makes it human-readable

    all_scores = np.array(list_of_python_lists)
    all_scores = all_scores.T

Successfully created directory: 'ddpg_trials\18'
Episode 100	Average Score: 4.02 [3.9242999122850595, 4.045799909569323, 4.053899909388274, 3.9599999114871025, 4.080299908798188, 3.8404999141581357, 3.8681999135389926, 4.086499908659607, 4.140199907459318, 3.9788999110646546, 3.9945999107137324, 4.139299907479435, 3.9139999125152825, 4.016399910226465, 4.105999908223748, 4.011399910338223, 4.256099904868751, 3.9427999118715524, 4.13379990760237, 3.920599912367761]]6]]]95]77]]6]]6]4]]]32]09]64]]]
Episode 200	Average Score: 17.58 [17.27519961386919, 17.86619960065931, 17.339099612440915, 17.44279961012304, 18.009299597460778, 17.82019960168749, 17.940499598998578, 17.318399612903594, 17.017899619620295, 17.937799599058927, 17.55759960755706, 17.642299605663865, 17.523099608328195, 17.992899597827346, 16.943899621274323, 17.096199617870152, 17.614099606294186, 17.590599606819453, 17.423499610554426, 18.178099593687804]]3]3]
Episode 300	Average Score: 22.41 [22.884499488491564, 21.94999950

In [None]:
fig = plt.figure(figsize=(12, 7)) # Adjust figure size for better readability
ax = fig.add_subplot(111)

# Iterate through each agent's scores and plot them
# If using `transposed_np_array`, you can iterate directly over its rows:
# for i, score_list in enumerate(transposed_np_array):
for i, score_list in enumerate(all_scores):
    plt.plot(np.arange(len(score_list)), score_list, label=f'Agent {i+1}')

column_means_np = all_scores.mean(axis=0)

plt.plot(np.arange(len(column_means_np)), column_means_np, label=f'Mean over Agents')

# Add labels and title
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.title("Scores of All 20 Agents Over Episodes")

# Add a legend to distinguish agents
# This is crucial when plotting multiple lines!
plt.legend(loc='best', ncol=4, fontsize='small') # Adjust legend position and columns as needed

# Add a grid for better readability of values
plt.grid(True, linestyle='--', alpha=0.7)

# Optimize layout to prevent labels/title from overlapping
plt.tight_layout()

# Display the plot
plt.show()

In [None]:
import os
import json
random_seed = 0

base_path = 'ddpg_trials'
combinations = [{
    #     "n_episodes": 200,
    #     "max_t": 1000, # Changed
    #     "BUFFER_SIZE": int(1e6), # Changed
    #     "BATCH_SIZE": 128,
    #     "GAMMA": 0.95, # Changed
    #     "TAU": 1e-2, # Changed
    #     "LR_ACTOR": 1e-5, # Changed
    #     "LR_CRITIC": 1e-5, # Changed
    #     "WEIGHT_DECAY": 0,
    #     "learn_every": 20,
    #     "learn_iterations": 10,
    # },
    # {
        "n_episodes": 200,
        "max_t": 1000, # Changed
        "BUFFER_SIZE": int(1e6), # Changed
        "BATCH_SIZE": 128,
        "GAMMA": 0.95, # Changed
        "TAU": 1e-2, # Changed
        "LR_ACTOR": 1e-4,
        "LR_CRITIC": 1e-4, # Changed
        "WEIGHT_DECAY": 0,
        "learn_every": 20,
        "learn_iterations": 10,
    },
    {
        "n_episodes": 200,
        "max_t": 1000,
        "BUFFER_SIZE": int(1e6), # Changed
        "BATCH_SIZE": 128,
        "GAMMA": 0.99,
        "TAU": 1e-3,
        "LR_ACTOR": 1e-4,
        "LR_CRITIC": 1e-3,
        "WEIGHT_DECAY": 0,
        "learn_every": 20,
        "learn_iterations": 10,
    },
    {
        "n_episodes": 200,
        "max_t": 1000,
        "BUFFER_SIZE": int(1e6), # Changed
        "BATCH_SIZE": 256, # Changed
        "GAMMA": 0.99,
        "TAU": 1e-3,
        "LR_ACTOR": 1e-4,
        "LR_CRITIC": 1e-4,
        "WEIGHT_DECAY": 0,
        "learn_every": 20,
        "learn_iterations": 10,
    },
    # {
    #     "n_episodes": 200,
    #     "max_t": 1000,
    #     "BUFFER_SIZE": int(1e4),
    #     "BATCH_SIZE": 128,
    #     "GAMMA": 0.99,
    #     "TAU": 1e-3,
    #     "LR_ACTOR": 1e-4,
    #     "LR_CRITIC": 1e-3,
    #     "WEIGHT_DECAY": 0,
    #     "learn_every": 20,
    #     "learn_iterations": 10,
    # },
 ]

for properties in combinations:
    new_path = create_next_id_folder(base_path)
    scores = ddpg(env, 
        state_size, 
        action_size, 
        random_seed,
        num_agents=num_agents,
        n_episodes=properties["n_episodes"],
        max_t=properties["max_t"],
        actor_model_save_path=os.path.join(new_path, 'actor.pth'),
        critic_model_save_path=os.path.join(new_path, 'critic.pth'),
        BUFFER_SIZE=properties["BUFFER_SIZE"],
        BATCH_SIZE=properties["BATCH_SIZE"],
        GAMMA=properties["GAMMA"],
        TAU=properties["TAU"],
        LR_ACTOR=properties["LR_ACTOR"],
        LR_CRITIC=properties["LR_CRITIC"],
        WEIGHT_DECAY=properties["WEIGHT_DECAY"],
        learn_every=properties["learn_every"],
        learn_iterations=properties["learn_iterations"],
        )
    
    with open(os.path.join(new_path, 'parameters.json'), 'w') as json_file:
        json.dump(properties, json_file,  indent=4)

    list_of_python_lists = []
    for arr in scores:
        list_of_python_lists.append(arr.tolist())

    with open(os.path.join(new_path, 'scores.json'), 'w') as f:
        json.dump(list_of_python_lists, f, indent=4) # indent makes it human-readable

    all_scores = np.array(list_of_python_lists)
    all_scores = all_scores.T

    fig = plt.figure(figsize=(12, 7)) # Adjust figure size for better readability
    ax = fig.add_subplot(111)

    # Iterate through each agent's scores and plot them
    # If using `transposed_np_array`, you can iterate directly over its rows:
    # for i, score_list in enumerate(transposed_np_array):
    for i, score_list in enumerate(all_scores):
        plt.plot(np.arange(len(score_list)), score_list, label=f'Agent {i+1}')

    column_means_np = all_scores.mean(axis=0)

    plt.plot(np.arange(len(column_means_np)), column_means_np, label=f'Mean over Agents')

    # Add labels and title
    plt.ylabel('Score')
    plt.xlabel('Episode #')
    plt.title("Scores of All 20 Agents Over Episodes")

    # Add a legend to distinguish agents
    # This is crucial when plotting multiple lines!
    plt.legend(loc='best', ncol=4, fontsize='small') # Adjust legend position and columns as needed

    # Add a grid for better readability of values
    plt.grid(True, linestyle='--', alpha=0.7)

    # Optimize layout to prevent labels/title from overlapping
    plt.tight_layout()

    # Display the plot
    plt.show()

In [None]:
import os
import json
random_seed = 0

base_path = 'ddpg_trials'
combinations = [{
    #     "n_episodes": 200,
    #     "max_t": 1000,
    #     "BUFFER_SIZE": int(1e6), # Changed
    #     "BATCH_SIZE": 128,
    #     "GAMMA": 0.99,
    #     "TAU": 1e-3,
    #     "LR_ACTOR": 5e-4, # Changed
    #     "LR_CRITIC": 5e-3, # Changed
    #     "WEIGHT_DECAY": 0,
    #     "learn_every": 20,
    #     "learn_iterations": 10,
    # },
    # {
        "n_episodes": 200,
        "max_t": 1000,
        "BUFFER_SIZE": int(1e6), # Changed
        "BATCH_SIZE": 128,
        "GAMMA": 0.98, # Changed
        "TAU": 1e-3,
        "LR_ACTOR": 1e-4,
        "LR_CRITIC": 1e-3,
        "WEIGHT_DECAY": 0,
        "learn_every": 20,
        "learn_iterations": 10,
    },
    {
        "n_episodes": 200,
        "max_t": 1000,
        "BUFFER_SIZE": int(1e8), # Changed
        "BATCH_SIZE": 128,
        "GAMMA": 0.99,
        "TAU": 1e-3,
        "LR_ACTOR": 1e-4,
        "LR_CRITIC": 1e-3,
        "WEIGHT_DECAY": 0,
        "learn_every": 20,
        "learn_iterations": 10,
    },
    {
        "n_episodes": 500, # Changed
        "max_t": 1000,
        "BUFFER_SIZE": int(1e6), # Changed
        "BATCH_SIZE": 128,
        "GAMMA": 0.99,
        "TAU": 1e-3,
        "LR_ACTOR": 1e-4,
        "LR_CRITIC": 1e-3,
        "WEIGHT_DECAY": 0,
        "learn_every": 20,
        "learn_iterations": 10,
    },
 ]

for properties in combinations:
    new_path = create_next_id_folder(base_path)
    scores = ddpg(env, 
        state_size, 
        action_size, 
        random_seed,
        num_agents=num_agents,
        n_episodes=properties["n_episodes"],
        max_t=properties["max_t"],
        actor_model_save_path=os.path.join(new_path, 'actor.pth'),
        critic_model_save_path=os.path.join(new_path, 'critic.pth'),
        BUFFER_SIZE=properties["BUFFER_SIZE"],
        BATCH_SIZE=properties["BATCH_SIZE"],
        GAMMA=properties["GAMMA"],
        TAU=properties["TAU"],
        LR_ACTOR=properties["LR_ACTOR"],
        LR_CRITIC=properties["LR_CRITIC"],
        WEIGHT_DECAY=properties["WEIGHT_DECAY"],
        learn_every=properties["learn_every"],
        learn_iterations=properties["learn_iterations"],
        )
    
    with open(os.path.join(new_path, 'parameters.json'), 'w') as json_file:
        json.dump(properties, json_file,  indent=4)

    list_of_python_lists = []
    for arr in scores:
        list_of_python_lists.append(arr.tolist())

    with open(os.path.join(new_path, 'scores.json'), 'w') as f:
        json.dump(list_of_python_lists, f, indent=4) # indent makes it human-readable

    all_scores = np.array(list_of_python_lists)
    all_scores = all_scores.T

    fig = plt.figure(figsize=(12, 7)) # Adjust figure size for better readability
    ax = fig.add_subplot(111)

    # Iterate through each agent's scores and plot them
    # If using `transposed_np_array`, you can iterate directly over its rows:
    # for i, score_list in enumerate(transposed_np_array):
    for i, score_list in enumerate(all_scores):
        plt.plot(np.arange(len(score_list)), score_list, label=f'Agent {i+1}')

    column_means_np = all_scores.mean(axis=0)

    plt.plot(np.arange(len(column_means_np)), column_means_np, label=f'Mean over Agents')

    # Add labels and title
    plt.ylabel('Score')
    plt.xlabel('Episode #')
    plt.title("Scores of All 20 Agents Over Episodes")

    # Add a legend to distinguish agents
    # This is crucial when plotting multiple lines!
    plt.legend(loc='best', ncol=4, fontsize='small') # Adjust legend position and columns as needed

    # Add a grid for better readability of values
    plt.grid(True, linestyle='--', alpha=0.7)

    # Optimize layout to prevent labels/title from overlapping
    plt.tight_layout()

    # Display the plot
    plt.show()

In [None]:
import os
import json
random_seed = 0
extra_episodes = 300

base_path = 'ddpg_trials'
checkpoint_path = os.path.join(base_path, str(16))
properties = {}

with open(os.path.join(checkpoint_path, "parameters.json"), 'r') as file:
    scores = json.load(file)

new_path = create_next_id_folder(base_path)
scores = ddpg(env, 
    state_size, 
    action_size, 
    random_seed,
    num_agents=num_agents,
    n_episodes=properties["n_episodes"],
    max_t=properties["max_t"],
    actor_model_save_path=os.path.join(new_path, 'actor.pth'),
    critic_model_save_path=os.path.join(new_path, 'critic.pth'),
    BUFFER_SIZE=properties["BUFFER_SIZE"],
    BATCH_SIZE=properties["BATCH_SIZE"],
    GAMMA=properties["GAMMA"],
    TAU=properties["TAU"],
    LR_ACTOR=properties["LR_ACTOR"],
    LR_CRITIC=properties["LR_CRITIC"],
    WEIGHT_DECAY=properties["WEIGHT_DECAY"],
    learn_every=properties["learn_every"],
    learn_iterations=properties["learn_iterations"],
    continue_from=checkpoint_path,
    extra_episodes=extra_episodes,
    )

with open(os.path.join(new_path, 'parameters.json'), 'w') as json_file:
    properties["n_episodes"] += extra_episodes
    json.dump(properties, json_file,  indent=4)

list_of_python_lists = []
for arr in scores:
    list_of_python_lists.append(arr.tolist())

with open(os.path.join(new_path, 'scores.json'), 'w') as f:
    json.dump(list_of_python_lists, f, indent=4) # indent makes it human-readable

all_scores = np.array(list_of_python_lists)
all_scores = all_scores.T

fig = plt.figure(figsize=(12, 7)) # Adjust figure size for better readability
ax = fig.add_subplot(111)

# Iterate through each agent's scores and plot them
# If using `transposed_np_array`, you can iterate directly over its rows:
# for i, score_list in enumerate(transposed_np_array):
for i, score_list in enumerate(all_scores):
    plt.plot(np.arange(len(score_list)), score_list, label=f'Agent {i+1}')

column_means_np = all_scores.mean(axis=0)

plt.plot(np.arange(len(column_means_np)), column_means_np, label=f'Mean over Agents')

# Add labels and title
plt.ylabel('Score')
plt.xlabel('Episode #')
plt.title("Scores of All 20 Agents Over Episodes")

# Add a legend to distinguish agents
# This is crucial when plotting multiple lines!
plt.legend(loc='best', ncol=4, fontsize='small') # Adjust legend position and columns as needed

# Add a grid for better readability of values
plt.grid(True, linestyle='--', alpha=0.7)

# Optimize layout to prevent labels/title from overlapping
plt.tight_layout()

# Display the plot
plt.show()

In [None]:
import os
import json
random_seed = 0

base_path = 'ddpg_trials'
combinations = [{
        "n_episodes": 500,
        "max_t": 1000,
        "BUFFER_SIZE": int(1e6), # Changed
        "BATCH_SIZE": 128,
        "GAMMA": 0.98, # Changed
        "TAU": 1e-3,
        "LR_ACTOR": 1e-4,
        "LR_CRITIC": 1e-3,
        "WEIGHT_DECAY": 0,
        "learn_every": 20,
        "learn_iterations": 10,
    }
 ]

for properties in combinations:
    new_path = create_next_id_folder(base_path)
    scores = ddpg(env, 
        state_size, 
        action_size, 
        random_seed,
        num_agents=num_agents,
        n_episodes=properties["n_episodes"],
        max_t=properties["max_t"],
        actor_model_save_path=os.path.join(new_path, 'actor.pth'),
        critic_model_save_path=os.path.join(new_path, 'critic.pth'),
        BUFFER_SIZE=properties["BUFFER_SIZE"],
        BATCH_SIZE=properties["BATCH_SIZE"],
        GAMMA=properties["GAMMA"],
        TAU=properties["TAU"],
        LR_ACTOR=properties["LR_ACTOR"],
        LR_CRITIC=properties["LR_CRITIC"],
        WEIGHT_DECAY=properties["WEIGHT_DECAY"],
        learn_every=properties["learn_every"],
        learn_iterations=properties["learn_iterations"],
        )
    
    with open(os.path.join(new_path, 'parameters.json'), 'w') as json_file:
        json.dump(properties, json_file,  indent=4)

    list_of_python_lists = []
    for arr in scores:
        list_of_python_lists.append(arr.tolist())

    with open(os.path.join(new_path, 'scores.json'), 'w') as f:
        json.dump(list_of_python_lists, f, indent=4) # indent makes it human-readable

    all_scores = np.array(list_of_python_lists)
    all_scores = all_scores.T

    fig = plt.figure(figsize=(12, 7)) # Adjust figure size for better readability
    ax = fig.add_subplot(111)

    # Iterate through each agent's scores and plot them
    # If using `transposed_np_array`, you can iterate directly over its rows:
    # for i, score_list in enumerate(transposed_np_array):
    for i, score_list in enumerate(all_scores):
        plt.plot(np.arange(len(score_list)), score_list, label=f'Agent {i+1}')

    column_means_np = all_scores.mean(axis=0)

    plt.plot(np.arange(len(column_means_np)), column_means_np, label=f'Mean over Agents')

    # Add labels and title
    plt.ylabel('Score')
    plt.xlabel('Episode #')
    plt.title("Scores of All 20 Agents Over Episodes")

    # Add a legend to distinguish agents
    # This is crucial when plotting multiple lines!
    plt.legend(loc='best', ncol=4, fontsize='small') # Adjust legend position and columns as needed

    # Add a grid for better readability of values
    plt.grid(True, linestyle='--', alpha=0.7)

    # Optimize layout to prevent labels/title from overlapping
    plt.tight_layout()

    # Display the plot
    plt.show()

In [None]:
import os
import json
random_seed = 0

base_path = 'ddpg_trials'
combinations = [{
        "n_episodes": 1000,
        "max_t": 1000,
        "BUFFER_SIZE": int(1e8), # Changed
        "BATCH_SIZE": 256,
        "GAMMA": 0.97, # Changed
        "TAU": 1e-3,
        "LR_ACTOR": 1e-4,
        "LR_CRITIC": 1e-3,
        "WEIGHT_DECAY": 0,
        "learn_every": 20,
        "learn_iterations": 10,
    }
 ]

for properties in combinations:
    new_path = create_next_id_folder(base_path)
    scores = ddpg(env, 
        state_size, 
        action_size, 
        random_seed,
        num_agents=num_agents,
        n_episodes=properties["n_episodes"],
        max_t=properties["max_t"],
        actor_model_save_path=os.path.join(new_path, 'actor.pth'),
        critic_model_save_path=os.path.join(new_path, 'critic.pth'),
        BUFFER_SIZE=properties["BUFFER_SIZE"],
        BATCH_SIZE=properties["BATCH_SIZE"],
        GAMMA=properties["GAMMA"],
        TAU=properties["TAU"],
        LR_ACTOR=properties["LR_ACTOR"],
        LR_CRITIC=properties["LR_CRITIC"],
        WEIGHT_DECAY=properties["WEIGHT_DECAY"],
        learn_every=properties["learn_every"],
        learn_iterations=properties["learn_iterations"],
        save_every=200,
        )
    
    with open(os.path.join(new_path, 'parameters.json'), 'w') as json_file:
        json.dump(properties, json_file,  indent=4)

    list_of_python_lists = []
    for arr in scores:
        list_of_python_lists.append(arr.tolist())

    with open(os.path.join(new_path, 'scores.json'), 'w') as f:
        json.dump(list_of_python_lists, f, indent=4) # indent makes it human-readable

    all_scores = np.array(list_of_python_lists)
    all_scores = all_scores.T

    fig = plt.figure(figsize=(12, 7)) # Adjust figure size for better readability
    ax = fig.add_subplot(111)

    # Iterate through each agent's scores and plot them
    # If using `transposed_np_array`, you can iterate directly over its rows:
    # for i, score_list in enumerate(transposed_np_array):
    for i, score_list in enumerate(all_scores):
        plt.plot(np.arange(len(score_list)), score_list, label=f'Agent {i+1}')

    column_means_np = all_scores.mean(axis=0)

    plt.plot(np.arange(len(column_means_np)), column_means_np, label=f'Mean over Agents')

    # Add labels and title
    plt.ylabel('Score')
    plt.xlabel('Episode #')
    plt.title("Scores of All 20 Agents Over Episodes")

    # Add a legend to distinguish agents
    # This is crucial when plotting multiple lines!
    plt.legend(loc='best', ncol=4, fontsize='small') # Adjust legend position and columns as needed

    # Add a grid for better readability of values
    plt.grid(True, linestyle='--', alpha=0.7)

    # Optimize layout to prevent labels/title from overlapping
    plt.tight_layout()

    # Display the plot
    plt.show()