In [None]:
import torch
torch.manual_seed(0)

In [None]:
from refl.agents import ReinforceAgent, BaselineReinforceAgent
import pandas as pd
import plotly.express as px
import gym

## Reinforce

In [None]:
exp_avgs = []
final_returns = []
N_EPISODES = 1000

In [None]:
env = gym.make('CartPole-v1')

In [None]:
env.observation_space.shape[0]

In [None]:
for gamma in [1.0, 0.99, 0.95]:
    agent = ReinforceAgent(n_state_dims=env.observation_space.shape[0], n_latent_dims=128, n_actions=env.action_space.n, gamma=gamma)
    avgs = agent.learn(env, N_EPISODES, 500)
    exp_avgs.extend(avgs)
    final_returns.extend([{'Gamma':gamma, 'Return':ret, 'Episode':ep} for ep, ret in enumerate(agent.evaluate(env, 10))])

In [None]:
df = pd.DataFrame.from_records([r for r in exp_avgs])

In [None]:
fig = px.line(df, x="Episode", y="AvgReturn", color="Gamma", title=f"N_EPISODES={N_EPISODES}")
fig.show()

In [None]:
df = pd.DataFrame.from_records([r for r in final_returns])
fig = px.line(df, x="Episode", y="Return", color="Gamma", title=f"Evaluation")
fig.show()

# Baseline Reinforce Algorithm

In [None]:
exp_avgs = []

In [None]:
env = gym.make('CartPole-v1')

In [None]:
N_EPISODES = 500
final_returns = []

In [None]:
for gamma in [1.0, 0.99, 0.95]:
    agent = BaselineReinforceAgent(n_state_dims=env.observation_space.shape[0], n_latent_dims=128, n_actions=env.action_space.n, gamma=gamma)
    avgs = agent.learn(env, N_EPISODES, 500)
    exp_avgs.extend(avgs)
    final_returns.extend([{'Gamma':gamma, 'Return':ret, 'Episode':ep} for ep, ret in enumerate(agent.evaluate(env, 10))])

In [None]:
df = pd.DataFrame.from_records([r for r in exp_avgs])

In [None]:
fig = px.line(df, x="Episode", y="AvgReturn", color="Gamma", title=f"N_EPISODES={N_EPISODES}")
fig.show()

In [None]:
df = pd.DataFrame.from_records([r for r in final_returns])
fig = px.line(df, x="Episode", y="Return", color="Gamma", title=f"Evaluation")
fig.show()