<a href="https://colab.research.google.com/github/Mattia-Colbertaldo/PrimacyRL/blob/main/MultirunDopamineCartpole.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Reproducibility Notebook for PrimacyRL
> Multiple runs for trend analysis.  
> In contrast to the SingleAgentDopamineCartpole notebook the Agent is now trained n times
---
_Authored by Patrik Rác, Méline Trochon, Valentina Moretti, and Mattia Colbertaldo_

---

The notebook installs the modified _dopamine_ library and trains a simple *DQNAgent* on Cartpole (environment provided by Gym). It

## Step 1: Install Dopamine
---

In [None]:
# @title Install Dopamine from source
!rm -r dopamine_restart
!git clone -b patrik https://github.com/Mattia-Colbertaldo/dopamine_restart.git
!pip install -r dopamine_restart/requirements.txt

# Install the dopamine package itself (required as there might be previous unseen instalations)
%cd dopamine_restart
!pip install .
%cd ..

rm: cannot remove 'dopamine_restart': No such file or directory
Cloning into 'dopamine_restart'...
remote: Enumerating objects: 4365, done.[K
remote: Counting objects: 100% (1843/1843), done.[K
remote: Compressing objects: 100% (537/537), done.[K
remote: Total 4365 (delta 1434), reused 1410 (delta 1306), pack-reused 2522[K
Receiving objects: 100% (4365/4365), 21.19 MiB | 19.46 MiB/s, done.
Resolving deltas: 100% (2929/2929), done.
Collecting Keras-Preprocessing>=1.1.2 (from -r dopamine_restart/requirements.txt (line 21))
  Downloading Keras_Preprocessing-1.1.2-py2.py3-none-any.whl (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.6/42.6 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
Collecting pyglet>=1.5.0 (from -r dopamine_restart/requirements.txt (line 36))
  Downloading pyglet-2.0.10-py3-none-any.whl (858 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m858.3/858.3 kB[0m [31m16.0 MB/s[0m eta [36m0:00:00[0m
Collecting tensorboard-p

## Step 2: Set-up and run the experiment
---


In [None]:
# @title Import dependencies and set up base path
import numpy as np
import os

from dopamine.discrete_domains import run_experiment
from dopamine.colab import utils as colab_utils
from absl import flags
import gin.tf

BASE_PATH = '/tmp/colab_dopamine_run'  # @param

  from tensorflow.tsl.python.lib.core import pywrap_ml_dtypes
  if (distutils.version.LooseVersion(tf.__version__) <
  return jax_config.define_bool_state('flax_' + name, default, help)
  Shape = jax.core.Shape
For more information, see https://jax.readthedocs.io/en/latest/jep/9263-typed-keys.html
  PRNGKey = jax.random.KeyArray


In [None]:
# @title Set up the hyperparameters for the run

# Modified from dopamine/agents/dqn/config/dqn_cartpole.gin
dqn_config = """
# Hyperparameters for a simple DQN-style Cartpole agent. The hyperparameters
# chosen achieve reasonable performance.
import dopamine.discrete_domains.gym_lib
import dopamine.discrete_domains.run_experiment
import dopamine.agents.dqn.dqn_agent
import dopamine.replay_memory.circular_replay_buffer
import gin.tf.external_configurables

DQNAgent.observation_shape = %gym_lib.CARTPOLE_OBSERVATION_SHAPE
DQNAgent.observation_dtype = %gym_lib.CARTPOLE_OBSERVATION_DTYPE
DQNAgent.stack_size = %gym_lib.CARTPOLE_STACK_SIZE
DQNAgent.network = @gym_lib.CartpoleDQNNetwork
DQNAgent.gamma = 0.6
DQNAgent.update_horizon = 10
DQNAgent.min_replay_history = 500
DQNAgent.update_period = 8
DQNAgent.target_update_period = 10
DQNAgent.epsilon_fn = @dqn_agent.identity_epsilon
DQNAgent.tf_device = '/gpu:*'  # use '/cpu:*' for non-GPU version
DQNAgent.optimizer = @tf.train.AdamOptimizer()
DQNAgent.reset_dense1 = False
DQNAgent.reset_dense2 = True
DQNAgent.reset_last_layer = True
DQNAgent.reset_period= 10000000000
DQNAgent.reset_max = 3

tf.train.AdamOptimizer.learning_rate = 0.001
tf.train.AdamOptimizer.epsilon = 0.0003125

create_gym_environment.environment_name = 'CartPole'
create_gym_environment.version = 'v1'
create_agent.agent_name = 'dqn'
TrainRunner.create_environment_fn = @gym_lib.create_gym_environment
Runner.num_iterations = 120
Runner.training_steps = 500
Runner.evaluation_steps = 100
Runner.max_steps_per_episode = 10000  # Default max episode length.

WrappedReplayBuffer.replay_capacity = 50000
WrappedReplayBuffer.batch_size = 128
"""
gin.parse_config(dqn_config, skip_unknown=False)

([],
 ['dopamine.discrete_domains.gym_lib',
  'dopamine.discrete_domains.run_experiment',
  'dopamine.agents.dqn.dqn_agent',
  'dopamine.replay_memory.circular_replay_buffer',
  'gin.tf.external_configurables'])

In [None]:
# @title Train DQN on Cartpole
n_runs = 10

from absl import logging
logging.set_verbosity('info')

log_files = list()

for i in range(n_runs):
  LOG_PATH = BASE_PATH + str(i)
  log_files.append(LOG_PATH)
  dqn_runner = run_experiment.create_runner(LOG_PATH, schedule='continuous_train_and_eval')
  print('Will train DQN agent number {}, please be patient, may be a while...'.format(i))
  dqn_runner.run_experiment()
  print('Done training agent number {}!'.format(i))

## Step 3: Load the experiment logs and plot results
---

In [None]:
# @title Load experiment logs

import collections

parameter_set = collections.OrderedDict([
    ('agent', ['DQN+reset 2']),
    ('game', ['Cartpole'])
])

data = colab_utils.read_experiment(log_files[0], verbose=True,
                                      parameter_set=parameter_set,
                              summary_keys=['train_episode_returns'])

data['agent'] = 'DQN+reset 2'
data['run'] = 1

for i in range(1,len(log_files)):
  tmp_data = colab_utils.read_experiment(log_files[i], verbose=True,
                                      parameter_set=parameter_set,
                              summary_keys=['train_episode_returns'])
  tmp_data['agent'] = 'DQN+reset 2'
  tmp_data['run'] = i+1
  data = data.merge(tmp_data, how='outer')

In [None]:
# @title Plot the results
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt


fig, ax = plt.subplots(figsize=(16,8))
sns.lineplot(x='iteration', y='train_episode_returns', hue='agent',
              data=data, ax=ax)
plt.yscale('log')
plt.grid()
plt.title('Cartpole')
# plt.savefig("demo.png")
plt.show()