# Reinforcement Learning - Hopper Environment

In [9]:
import matplotlib.pyplot as plt
from IPython.display import display
import numpy as np
import pandas as pd

import module_Reinforce_ActorCritic as rac

## Actor-Critic & Reinforce

### Train method

`train` method parameters:
- `type_alg`: Algorithm type.
    - 0: Reinforce;
    - 1: Actor-Critic with different classes for policy and value and different optimizers;
    - 2: Actor-Critic with single class for policy and value and single optimizer of losses sum.

- `hopper`: Environment in which the policy is trained.
    - 'S': Source Custom Hopper (default);
    - 'T': Target Custom Hopper. 
- `n_episodes`: Number of episodes for training. Default: 5e4.
- `trained_model`: Model from which the training starts. If `None` (default) the training is from scratch.
- `baseline`: Value of the baseline for the Reinforce algorithm. Considered only if `type_alg`=0. Default:0.
- `gamma`: Discount factor for algorithms. Default: 0.99.
- `optim_lr`: Starting learning rate for Adam optimizer. Default: 1e-3.
- `layer_size`: Number of neurons per layer in policy and value neural networks. Default: 64.
- `starting_threshold`: None <span style="color:red"> TODO </span>
- `csv_name`: Name of the CSV file where the return and time data are saved.
- `save_every`: A new value for return and time taken is saved for one episode every `save_every` episodes. These values are both saved in the csv file and returned. Default: 75.
- `print_every`: Every `print_every` episodes the number and the return of the current episode is printed. It is suggested to be set to around 1/5 of `n_episodes`. Default: 1e4.
- `print_name`: Boolean parameter. Print the name of the model when the training ends. Default: True.
- `plot`: Boolean parameter. Print the graph of episodes' returns when the training ends. Default: True.
- `random_state`: Random seed. Default: 42.
- `device`: Device to use for training. Default: 'cpu'

`train` method returns:  <span style="color:red"> TODO </span>
- `numbers`:

- `returns_array`:
- `times_array`:
- `model_name`:



### Test method

 <span style="color:red"> TODO </span>

In [None]:
seeds = [42, 35, 254, 78, 91, 53, 22, 341, 117, 86]
episodes = 50000

In [11]:
#returns_array, times_array, tot_time, model_name = rac.train(0, n_episodes=episodes, csv_name='resultsProva.csv', save_every=50, print_every=episodes//5, plot=True, random_state=42)

In [12]:
for sd in seeds:
    returns_array, times_array, tot_time, model_name = rac.train(0, n_episodes=episodes, baseline=0, csv_name='results_Piri.csv', plot=False, random_state=sd)
    returns_array, times_array, tot_time, model_name = rac.train(0, n_episodes=episodes, baseline=100, csv_name='results_Piri.csv', plot=False, random_state=sd)
    returns_array, times_array, tot_time, model_name = rac.train(0, n_episodes=episodes, baseline=250, csv_name='results_Piri.csv', plot=False, random_state=sd)


Action space: Box([-1. -1. -1.], [1. 1. 1.], (3,), float32)
State space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf], (11,), float64)
Dynamics parameters: [2.47400421 3.92699082 2.71433605 5.0893801 ]


100%|██████████| 100/100 [00:00<00:00, 180.22it/s]


Average of the last 150 returns: 1.8640813160115919

MODEL NAME: Reinforce_100_b0_hS_rs42_250528_23-53-56.mdl
Action space: Box([-1. -1. -1.], [1. 1. 1.], (3,), float32)
State space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf], (11,), float64)
Dynamics parameters: [2.47400421 3.92699082 2.71433605 5.0893801 ]


100%|██████████| 100/100 [00:01<00:00, 92.34it/s]


Average of the last 150 returns: 7.010564837243589

MODEL NAME: Reinforce_100_b100_hS_rs42_250528_23-53-56.mdl
Action space: Box([-1. -1. -1.], [1. 1. 1.], (3,), float32)
State space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf], (11,), float64)
Dynamics parameters: [2.47400421 3.92699082 2.71433605 5.0893801 ]


100%|██████████| 100/100 [00:00<00:00, 113.12it/s]


Average of the last 150 returns: 4.714179449324146

MODEL NAME: Reinforce_100_b250_hS_rs42_250528_23-53-57.mdl
Action space: Box([-1. -1. -1.], [1. 1. 1.], (3,), float32)
State space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf], (11,), float64)
Dynamics parameters: [2.47400421 3.92699082 2.71433605 5.0893801 ]


100%|██████████| 100/100 [00:01<00:00, 61.25it/s]


Average of the last 150 returns: 3.817939823103213

MODEL NAME: Reinforce_100_b0_hS_rs35_250528_23-53-58.mdl
Action space: Box([-1. -1. -1.], [1. 1. 1.], (3,), float32)
State space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf], (11,), float64)
Dynamics parameters: [2.47400421 3.92699082 2.71433605 5.0893801 ]


100%|██████████| 100/100 [00:01<00:00, 85.64it/s]


Average of the last 150 returns: 2.4201635314305205

MODEL NAME: Reinforce_100_b100_hS_rs35_250528_23-54-00.mdl
Action space: Box([-1. -1. -1.], [1. 1. 1.], (3,), float32)
State space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf], (11,), float64)
Dynamics parameters: [2.47400421 3.92699082 2.71433605 5.0893801 ]


100%|██████████| 100/100 [00:01<00:00, 58.97it/s]


Average of the last 150 returns: 3.2105903994676557

MODEL NAME: Reinforce_100_b250_hS_rs35_250528_23-54-01.mdl
Action space: Box([-1. -1. -1.], [1. 1. 1.], (3,), float32)
State space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf], (11,), float64)
Dynamics parameters: [2.47400421 3.92699082 2.71433605 5.0893801 ]


100%|██████████| 100/100 [00:00<00:00, 156.42it/s]


Average of the last 150 returns: 3.3492882092597247

MODEL NAME: Reinforce_100_b0_hS_rs254_250528_23-54-03.mdl
Action space: Box([-1. -1. -1.], [1. 1. 1.], (3,), float32)
State space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf], (11,), float64)
Dynamics parameters: [2.47400421 3.92699082 2.71433605 5.0893801 ]


100%|██████████| 100/100 [00:00<00:00, 143.59it/s]


Average of the last 150 returns: 3.5332318101102054

MODEL NAME: Reinforce_100_b100_hS_rs254_250528_23-54-04.mdl
Action space: Box([-1. -1. -1.], [1. 1. 1.], (3,), float32)
State space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf], (11,), float64)
Dynamics parameters: [2.47400421 3.92699082 2.71433605 5.0893801 ]


100%|██████████| 100/100 [00:00<00:00, 160.73it/s]


Average of the last 150 returns: 3.5335691023737326

MODEL NAME: Reinforce_100_b250_hS_rs254_250528_23-54-04.mdl
Action space: Box([-1. -1. -1.], [1. 1. 1.], (3,), float32)
State space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf], (11,), float64)
Dynamics parameters: [2.47400421 3.92699082 2.71433605 5.0893801 ]


100%|██████████| 100/100 [00:00<00:00, 231.52it/s]


Average of the last 150 returns: 0.6724106168143796

MODEL NAME: Reinforce_100_b0_hS_rs78_250528_23-54-05.mdl
Action space: Box([-1. -1. -1.], [1. 1. 1.], (3,), float32)
State space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf], (11,), float64)
Dynamics parameters: [2.47400421 3.92699082 2.71433605 5.0893801 ]


100%|██████████| 100/100 [00:00<00:00, 223.94it/s]


Average of the last 150 returns: 1.0890826031269083

MODEL NAME: Reinforce_100_b100_hS_rs78_250528_23-54-05.mdl
Action space: Box([-1. -1. -1.], [1. 1. 1.], (3,), float32)
State space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf], (11,), float64)
Dynamics parameters: [2.47400421 3.92699082 2.71433605 5.0893801 ]


100%|██████████| 100/100 [00:00<00:00, 188.38it/s]


Average of the last 150 returns: 1.120399322525013

MODEL NAME: Reinforce_100_b250_hS_rs78_250528_23-54-06.mdl
Action space: Box([-1. -1. -1.], [1. 1. 1.], (3,), float32)
State space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf], (11,), float64)
Dynamics parameters: [2.47400421 3.92699082 2.71433605 5.0893801 ]


100%|██████████| 100/100 [00:02<00:00, 37.87it/s]


Average of the last 150 returns: -2.034756382525409

MODEL NAME: Reinforce_100_b0_hS_rs91_250528_23-54-06.mdl
Action space: Box([-1. -1. -1.], [1. 1. 1.], (3,), float32)
State space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf], (11,), float64)
Dynamics parameters: [2.47400421 3.92699082 2.71433605 5.0893801 ]


100%|██████████| 100/100 [00:02<00:00, 48.04it/s]


Average of the last 150 returns: -2.759629231337519

MODEL NAME: Reinforce_100_b100_hS_rs91_250528_23-54-09.mdl
Action space: Box([-1. -1. -1.], [1. 1. 1.], (3,), float32)
State space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf], (11,), float64)
Dynamics parameters: [2.47400421 3.92699082 2.71433605 5.0893801 ]


100%|██████████| 100/100 [00:02<00:00, 44.64it/s]


Average of the last 150 returns: -2.6531609800739493

MODEL NAME: Reinforce_100_b250_hS_rs91_250528_23-54-11.mdl
Action space: Box([-1. -1. -1.], [1. 1. 1.], (3,), float32)
State space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf], (11,), float64)
Dynamics parameters: [2.47400421 3.92699082 2.71433605 5.0893801 ]


100%|██████████| 100/100 [00:00<00:00, 142.06it/s]


Average of the last 150 returns: 2.7577914825664016

MODEL NAME: Reinforce_100_b0_hS_rs53_250528_23-54-13.mdl
Action space: Box([-1. -1. -1.], [1. 1. 1.], (3,), float32)
State space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf], (11,), float64)
Dynamics parameters: [2.47400421 3.92699082 2.71433605 5.0893801 ]


100%|██████████| 100/100 [00:00<00:00, 132.20it/s]


Average of the last 150 returns: 2.3339828487164516

MODEL NAME: Reinforce_100_b100_hS_rs53_250528_23-54-14.mdl
Action space: Box([-1. -1. -1.], [1. 1. 1.], (3,), float32)
State space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf], (11,), float64)
Dynamics parameters: [2.47400421 3.92699082 2.71433605 5.0893801 ]


100%|██████████| 100/100 [00:00<00:00, 106.47it/s]


Average of the last 150 returns: 2.782995025517879

MODEL NAME: Reinforce_100_b250_hS_rs53_250528_23-54-15.mdl
Action space: Box([-1. -1. -1.], [1. 1. 1.], (3,), float32)
State space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf], (11,), float64)
Dynamics parameters: [2.47400421 3.92699082 2.71433605 5.0893801 ]


100%|██████████| 100/100 [00:00<00:00, 218.05it/s]


Average of the last 150 returns: -0.3003481835360477

MODEL NAME: Reinforce_100_b0_hS_rs22_250528_23-54-16.mdl
Action space: Box([-1. -1. -1.], [1. 1. 1.], (3,), float32)
State space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf], (11,), float64)
Dynamics parameters: [2.47400421 3.92699082 2.71433605 5.0893801 ]


100%|██████████| 100/100 [00:00<00:00, 221.26it/s]


Average of the last 150 returns: -0.538305431072038

MODEL NAME: Reinforce_100_b100_hS_rs22_250528_23-54-16.mdl
Action space: Box([-1. -1. -1.], [1. 1. 1.], (3,), float32)
State space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf], (11,), float64)
Dynamics parameters: [2.47400421 3.92699082 2.71433605 5.0893801 ]


100%|██████████| 100/100 [00:00<00:00, 246.98it/s]


Average of the last 150 returns: -0.5381516489246285

MODEL NAME: Reinforce_100_b250_hS_rs22_250528_23-54-17.mdl
Action space: Box([-1. -1. -1.], [1. 1. 1.], (3,), float32)
State space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf], (11,), float64)
Dynamics parameters: [2.47400421 3.92699082 2.71433605 5.0893801 ]


100%|██████████| 100/100 [00:00<00:00, 219.89it/s]


Average of the last 150 returns: 1.0314847273140493

MODEL NAME: Reinforce_100_b0_hS_rs341_250528_23-54-17.mdl
Action space: Box([-1. -1. -1.], [1. 1. 1.], (3,), float32)
State space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf], (11,), float64)
Dynamics parameters: [2.47400421 3.92699082 2.71433605 5.0893801 ]


100%|██████████| 100/100 [00:00<00:00, 174.80it/s]


Average of the last 150 returns: 2.2195510146295434

MODEL NAME: Reinforce_100_b100_hS_rs341_250528_23-54-18.mdl
Action space: Box([-1. -1. -1.], [1. 1. 1.], (3,), float32)
State space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf], (11,), float64)
Dynamics parameters: [2.47400421 3.92699082 2.71433605 5.0893801 ]


100%|██████████| 100/100 [00:00<00:00, 167.97it/s]


Average of the last 150 returns: 2.3126004271922453

MODEL NAME: Reinforce_100_b250_hS_rs341_250528_23-54-18.mdl
Action space: Box([-1. -1. -1.], [1. 1. 1.], (3,), float32)
State space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf], (11,), float64)
Dynamics parameters: [2.47400421 3.92699082 2.71433605 5.0893801 ]


100%|██████████| 100/100 [00:00<00:00, 118.72it/s]


Average of the last 150 returns: 4.618602720576106

MODEL NAME: Reinforce_100_b0_hS_rs117_250528_23-54-19.mdl
Action space: Box([-1. -1. -1.], [1. 1. 1.], (3,), float32)
State space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf], (11,), float64)
Dynamics parameters: [2.47400421 3.92699082 2.71433605 5.0893801 ]


100%|██████████| 100/100 [00:00<00:00, 116.69it/s]


Average of the last 150 returns: 4.32955570632178

MODEL NAME: Reinforce_100_b100_hS_rs117_250528_23-54-20.mdl
Action space: Box([-1. -1. -1.], [1. 1. 1.], (3,), float32)
State space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf], (11,), float64)
Dynamics parameters: [2.47400421 3.92699082 2.71433605 5.0893801 ]


100%|██████████| 100/100 [00:00<00:00, 140.71it/s]


Average of the last 150 returns: 3.931999910316944

MODEL NAME: Reinforce_100_b250_hS_rs117_250528_23-54-21.mdl
Action space: Box([-1. -1. -1.], [1. 1. 1.], (3,), float32)
State space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf], (11,), float64)
Dynamics parameters: [2.47400421 3.92699082 2.71433605 5.0893801 ]


100%|██████████| 100/100 [00:00<00:00, 216.24it/s]


Average of the last 150 returns: 2.166481020651654

MODEL NAME: Reinforce_100_b0_hS_rs86_250528_23-54-21.mdl
Action space: Box([-1. -1. -1.], [1. 1. 1.], (3,), float32)
State space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf], (11,), float64)
Dynamics parameters: [2.47400421 3.92699082 2.71433605 5.0893801 ]


100%|██████████| 100/100 [00:00<00:00, 173.60it/s]


Average of the last 150 returns: 2.550911302056048

MODEL NAME: Reinforce_100_b100_hS_rs86_250528_23-54-22.mdl
Action space: Box([-1. -1. -1.], [1. 1. 1.], (3,), float32)
State space: Box([-inf -inf -inf -inf -inf -inf -inf -inf -inf -inf -inf], [inf inf inf inf inf inf inf inf inf inf inf], (11,), float64)
Dynamics parameters: [2.47400421 3.92699082 2.71433605 5.0893801 ]


100%|██████████| 100/100 [00:00<00:00, 202.12it/s]

Average of the last 150 returns: 2.1676905146300878

MODEL NAME: Reinforce_100_b250_hS_rs86_250528_23-54-22.mdl





### Plot from csv

In [13]:
"""
loaded_csv = pd.read_csv('results_Piri.csv')

plt.figure(figsize=(12,10))
plt.title('RETURN')
for i in range(15):
    list_string = loaded_csv.loc[f'{i}','returns_AvgLast']
    list_real = eval(list_string)
    if i>9:
        ls='--'
    else:
        ls='-'
    line_b0 = plt.plot(np.arange(1,len(list_real)), list_real[1:], linestyle=ls, label=f'b={10*i}')
plt.xlabel('Episode')
plt.ylabel('Return')
plt.grid()
plt.legend()
plt.savefig("./plots/"+'DifferentBaselines_30000.png',dpi=300)
"""

'\nloaded_csv = pd.read_csv(\'results_Piri.csv\')\n\nplt.figure(figsize=(12,10))\nplt.title(\'RETURN\')\nfor i in range(15):\n    list_string = loaded_csv.loc[f\'{i}\',\'returns_AvgLast\']\n    list_real = eval(list_string)\n    if i>9:\n        ls=\'--\'\n    else:\n        ls=\'-\'\n    line_b0 = plt.plot(np.arange(1,len(list_real)), list_real[1:], linestyle=ls, label=f\'b={10*i}\')\nplt.xlabel(\'Episode\')\nplt.ylabel(\'Return\')\nplt.grid()\nplt.legend()\nplt.savefig("./plots/"+\'DifferentBaselines_30000.png\',dpi=300)\n'

### Load data from CSV

In [14]:
"""
loaded_csv = pd.read_csv('resultsProva.csv', index_col='model_name')
display(loaded_csv)
"""

"\nloaded_csv = pd.read_csv('resultsProva.csv', index_col='model_name')\ndisplay(loaded_csv)\n"

In [15]:
"""
list_string = loaded_csv.loc['Reinforce_1000_b0_hS_rs42_250528_19-52-48','returns']
list_real = eval(list_string)
"""

"\nlist_string = loaded_csv.loc['Reinforce_1000_b0_hS_rs42_250528_19-52-48','returns']\nlist_real = eval(list_string)\n"

### Plot one graph for each different baseline

In [16]:
"""
plt.figure(figsize=(12,10))
plt.title('RETURN')
for i in range(15):
    if i>9:
        ls='--'
    else:
        ls='-'
    line_b0 = plt.plot(numbers_b[1:], returns_list_b[i][1,1:], linestyle=ls, label=f'b={10*i}')  #Change to 1 or to 2
plt.xlabel('Episode')
plt.ylabel('Return')
plt.grid()
plt.legend()
plt.savefig("./plots/"+'DifferentBaselines_30000.png',dpi=300)
"""

'\nplt.figure(figsize=(12,10))\nplt.title(\'RETURN\')\nfor i in range(15):\n    if i>9:\n        ls=\'--\'\n    else:\n        ls=\'-\'\n    line_b0 = plt.plot(numbers_b[1:], returns_list_b[i][1,1:], linestyle=ls, label=f\'b={10*i}\')  #Change to 1 or to 2\nplt.xlabel(\'Episode\')\nplt.ylabel(\'Return\')\nplt.grid()\nplt.legend()\nplt.savefig("./plots/"+\'DifferentBaselines_30000.png\',dpi=300)\n'

### Plot multiple graphs (different seeds) for two different baselines

In [17]:
"""
plt.figure(figsize=(12,10))
plt.title('RETURN')
for i in range(len(seeds)):
    line_b0 = plt.plot(numbers[1:], returns_list[i][1,1:], c='blue', label=f'Average over last 50 episodes')
    line_b20 = plt.plot(numbers_b20[1:], returns_list_b20[i][1,1:], c='green', label=f'Average over last 50 episodes')
plt.xlabel('Episode')
plt.ylabel('Return')
plt.grid()
plt.legend([line_b0, line_b20], ['baseline=0', 'baseline=20'])
plt.savefig("./plots/"+'Prova'+'_Return',dpi=300)
"""

'\nplt.figure(figsize=(12,10))\nplt.title(\'RETURN\')\nfor i in range(len(seeds)):\n    line_b0 = plt.plot(numbers[1:], returns_list[i][1,1:], c=\'blue\', label=f\'Average over last 50 episodes\')\n    line_b20 = plt.plot(numbers_b20[1:], returns_list_b20[i][1,1:], c=\'green\', label=f\'Average over last 50 episodes\')\nplt.xlabel(\'Episode\')\nplt.ylabel(\'Return\')\nplt.grid()\nplt.legend([line_b0, line_b20], [\'baseline=0\', \'baseline=20\'])\nplt.savefig("./plots/"+\'Prova\'+\'_Return\',dpi=300)\n'

### Old

In [18]:
"""
s_r0 = pd.Series([list(run[0]) for run in returns_list_b])
s_r1 = pd.Series([list(run[1]) for run in returns_list_b])
s_r2 = pd.Series([list(run[2]) for run in returns_list_b])
s_t0 = pd.Series([list(run[0]) for run in times_list_b])
s_t1 = pd.Series([list(run[1]) for run in times_list_b])
s_t2 = pd.Series([list(run[2]) for run in times_list_b])
results_df = pd.DataFrame({'type_alg':[0]*15, 'hopper':['S']*15, 'n_episodes':[100000]*15, 'trained_model':[False]*15, 'baseline':[i*10 for i in range(15)], 'gamma':[0.99]*15, 'optim_lr':[1e-3]*15, 'layer_size':[64]*15, 'save_every':[75]*15, 'random_state':[42]*15, 'returns':s_r0, 'returns_AvgLast':s_r1, 'returns_AvgBeginning':s_r2, 'times':s_t0, 'times_AvgLast':s_t1, 'times_AvgBeginning':s_t2})
display(results_df)
results_df.to_csv('results_Piri.csv')
"""

"\ns_r0 = pd.Series([list(run[0]) for run in returns_list_b])\ns_r1 = pd.Series([list(run[1]) for run in returns_list_b])\ns_r2 = pd.Series([list(run[2]) for run in returns_list_b])\ns_t0 = pd.Series([list(run[0]) for run in times_list_b])\ns_t1 = pd.Series([list(run[1]) for run in times_list_b])\ns_t2 = pd.Series([list(run[2]) for run in times_list_b])\nresults_df = pd.DataFrame({'type_alg':[0]*15, 'hopper':['S']*15, 'n_episodes':[100000]*15, 'trained_model':[False]*15, 'baseline':[i*10 for i in range(15)], 'gamma':[0.99]*15, 'optim_lr':[1e-3]*15, 'layer_size':[64]*15, 'save_every':[75]*15, 'random_state':[42]*15, 'returns':s_r0, 'returns_AvgLast':s_r1, 'returns_AvgBeginning':s_r2, 'times':s_t0, 'times_AvgLast':s_t1, 'times_AvgBeginning':s_t2})\ndisplay(results_df)\nresults_df.to_csv('results_Piri.csv')\n"