In [1]:
from session import Session
import matplotlib.pyplot as plt
import numpy as np

SOURCE_ENV = 'CustomHopper-source-v0'
TARGET_ENV = 'CustomHopper-target-v0'

# Reinforce with baseline agent

## What is the effect of the learning rate on the agent?

In [None]:
output_folder = "outputs/several_lr"
session =  Session(SOURCE_ENV, output_folder, 0, 'cpu')
for lr in [1e-1, 1e-2, 5e-3, 1e-3, 5e-4, 1e-4]:
    for baseline in [0, 20, 50]:
        session.load_reinforce_with_baseline(None, lr, baseline)
        session.train_agent(n_episode=5000)
session.store_infos("lr=[1e-1, 1e-2, 5e-3, 1e-3, 5e-4, 1e-4], baselines = [0, 20, 50]")

## What is the role of the baseline?

In [None]:
output_folder = "outputs/several_baselines"
session = Session(SOURCE_ENV, output_folder, 1, 'cpu')
for baseline in [0, 10, 20, 50, 100, 200, 500]:
    session.load_reinforce_with_baseline(None, baseline=baseline)
    session.train_agent(n_episode=10000)

for baseline in [0, 10, 20, 50, 100, 200, 500]:
    session.load_reinforce_with_baseline(None, baseline=baseline)
    session.train_agent(n_episode=10000)

session.store_infos("baselines = [0, 20, 50, 100, 200, 500, 0, 20, 50, 100, 200, 500]")

## Can a moving baseline could improve the agent ?

In [None]:
output_folder = "outputs/defined_moving_baseline"
session = Session(SOURCE_ENV, output_folder, 1, 'cpu')
session.load_reinforce_with_baseline(None, baseline=0)
session.train_agent_with_defined_moving_baseline(n_episodes_per_baseline=5000, baselines=[0, 10, 20, 50, 100, 200, 500])
session.store_infos("baselines = [0, 20, 50, 100, 200, 500], 5000 ep per baseline")

## Would a dynamic baseline help the agent reach a better reward?

In [None]:
output_folder = "outputs/increasing_goal_baseline2"
session = Session(SOURCE_ENV, output_folder, 0, 'cpu')
session.load_reinforce_with_baseline(None, baseline=100)
session.train_agent_with_increasing_goal_baseline(1500, 14, 0)
session.store_infos("increasing baseline. 1500 ep/step, 14 steps, initial baseline=0")

# Actor-Critic agent

## Train a basic actor-critic agent

In [2]:
output_folder = "outputs/actorcritic_basic"
session = Session(SOURCE_ENV, output_folder, verbose=10)
session.load_last_actor_critic()
step = session.get_step()
session.train_agent_with_checkpointing(150000, 1000, 50)
session.store_infos(f"Step {step}: Actor critic, 150k episodes, early stopping=1000")

Successful creation of the session, first step is step=1.
Action space: Box([-1. -1. -1.], [1. 1. 1.], (3,), float32)
State space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf], (11,), float64)
Dynamics parameters: [2.53429174 3.92699082 2.71433605 5.0893801 ]
Successful loading of the actor-critic agent.
[1;32;40mEpisode: 3000 | Average return: 0.26 | Average episode length: 17.75[0m
End of session step 1, Lasted 146.07 s, Best reward: 7.76
