# Entrenamiento de redes neuronales

### Javier Guzmán Muñoz

In [1]:
# Imports necesarios (copiados del notebook Prueba.inicial.ipynb)
import ray
import ray.rllib.agents.ppo as ppo
import json, os, shutil, sys
import gym
import pprint
import time
import shelve
from tensorflow import keras
from ray import tune

Instructions for updating:
non-resource variables are not supported in the long term


In [2]:
def full_train(checkpoint_root, agent, n_iter, restore = False, restore_dir = None):
    s = "{:3d} reward {:6.2f}/{:6.2f}/{:6.2f} len {:6.2f} learn_time(ms) {:6.2f} saved {}"
    if(restore):
        if restore_dir == None:
            print("Error: you must specify a restore path")
            return
        agent.restore(restore_dir)
    else:
        shutil.rmtree(checkpoint_root, ignore_errors=True, onerror=None)
    results = []
    episode_data = []
    episode_json = []

    total_learn_time = 0
    for n in range(n_iter):
        result = agent.train()
        results.append(result)
        episode = {'n': n,
                   'episode_reward_min': result['episode_reward_min'],
                   'episode_reward_mean': result['episode_reward_mean'],
                   'episode_reward_max': result['episode_reward_max'],
                   'episode_len_mean': result['episode_len_mean'],
                   'learn_time_ms': result['timers']['learn_time_ms']}
        episode_data.append(episode)
        episode_json.append(json.dumps(episode))
        file_name = agent.save(checkpoint_root)
        print(s.format(
        n + 1,
        result["episode_reward_min"],
        result["episode_reward_mean"],
        result["episode_reward_max"],
        result["episode_len_mean"],
        result["timers"]["learn_time_ms"],
        file_name
       ))
        total_learn_time+= result["timers"]["learn_time_ms"]

    print("Total learn time: " + str(total_learn_time))
    print("Average learn time per iteration: " + str(total_learn_time/n_iter))
    return results, episode_data, episode_json
    

### Modelo 1
Red neuronal con entradas de (84,84,4) y filtros de convolución de \[16, \[8, 8\], 4\], \[32, \[4, 4\], 2\], \[256, \[11, 11\], 1\]


In [4]:
shutil.rmtree('~/ray_results', ignore_errors = True, onerror = False)
ray.shutdown()
ray.init()
config = ppo.DEFAULT_CONFIG.copy()
agent = ppo.PPOTrainer(env='Pong-v0')
policy=agent.get_policy()
print(policy.model.model_config)
print(policy.model.base_model.summary())

2020-12-09 23:15:27,593	INFO services.py:1090 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
[2m[36m(pid=7698)[0m Instructions for updating:
[2m[36m(pid=7698)[0m non-resource variables are not supported in the long term
[2m[36m(pid=7701)[0m Instructions for updating:
[2m[36m(pid=7701)[0m non-resource variables are not supported in the long term


{'fcnet_hiddens': [256, 256], 'fcnet_activation': 'tanh', 'conv_filters': [[16, [8, 8], 4], [32, [4, 4], 2], [256, [11, 11], 1]], 'conv_activation': 'relu', 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': False, 'use_lstm': False, 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, '_time_major': False, 'framestack': True, 'dim': 84, 'grayscale': False, 'zero_mean': True, 'custom_model': None, 'custom_model_config': {}, 'custom_action_dist': None, 'custom_preprocessor': None}
Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
observations (InputLayer)       [(None, 84, 84, 4)]  0                                            
__________________________________________________________________________________________________
conv_value_1 (Conv2D)           (None, 21, 21, 16)   

In [5]:
t0 = time.time()
checkpoint_root='/tmp/ppo/model1'
n_iter = 15
full_train(checkpoint_root, agent, n_iter)
t1 = time.time()-t0
print("Total time for the " + str(n_iter) + " training iterations: " + str(t1))

Instructions for updating:
Prefer Variable.assign which has equivalent behavior in 2.X.


[2m[36m(pid=7701)[0m Instructions for updating:
[2m[36m(pid=7701)[0m Prefer Variable.assign which has equivalent behavior in 2.X.
[2m[36m(pid=7698)[0m Instructions for updating:
[2m[36m(pid=7698)[0m Prefer Variable.assign which has equivalent behavior in 2.X.


  1 reward -21.00/-21.00/-21.00 len 1010.00 learn_time(ms) 461182.78 saved /tmp/ppo/model1/checkpoint_1/checkpoint-1
  2 reward -21.00/-21.00/-21.00 len 1018.80 learn_time(ms) 447707.81 saved /tmp/ppo/model1/checkpoint_2/checkpoint-2
  3 reward -21.00/-21.00/-21.00 len 1019.62 learn_time(ms) 451363.11 saved /tmp/ppo/model1/checkpoint_3/checkpoint-3
  4 reward -21.00/-21.00/-21.00 len 1021.00 learn_time(ms) 449277.21 saved /tmp/ppo/model1/checkpoint_4/checkpoint-4
  5 reward -21.00/-21.00/-21.00 len 1018.53 learn_time(ms) 447298.47 saved /tmp/ppo/model1/checkpoint_5/checkpoint-5
  6 reward -21.00/-21.00/-21.00 len 1017.62 learn_time(ms) 448018.20 saved /tmp/ppo/model1/checkpoint_6/checkpoint-6
  7 reward -21.00/-21.00/-21.00 len 1018.32 learn_time(ms) 447906.41 saved /tmp/ppo/model1/checkpoint_7/checkpoint-7
  8 reward -21.00/-21.00/-21.00 len 1018.54 learn_time(ms) 446119.50 saved /tmp/ppo/model1/checkpoint_8/checkpoint-8
  9 reward -21.00/-21.00/-21.00 len 1020.09 learn_time(ms) 44450

### Modelo 2
Red nueronal con entradas de 168x168 y filtros de convolucion que minimizan el número de parametros entrenables

In [10]:
ray.shutdown()
ray.init()
env = 'Pong-v0'
config = ppo.DEFAULT_CONFIG.copy()
config['model']['dim'] = 168
config['model']['conv_filters'] = [[16, [16, 16], 8],[32, [4, 4], 2],[256, [11, 11], 1]]
agent = ppo.PPOTrainer(config, env=env)
policy=agent.get_policy()
print(policy.model.model_config)
print(policy.model.base_model.summary())

2020-12-10 13:01:10,720	INFO services.py:1090 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
[2m[36m(pid=5110)[0m Instructions for updating:
[2m[36m(pid=5110)[0m non-resource variables are not supported in the long term
[2m[36m(pid=5111)[0m Instructions for updating:
[2m[36m(pid=5111)[0m non-resource variables are not supported in the long term


{'fcnet_hiddens': [256, 256], 'fcnet_activation': 'tanh', 'conv_filters': [[16, [16, 16], 8], [32, [4, 4], 2], [256, [11, 11], 1]], 'conv_activation': 'relu', 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': False, 'use_lstm': False, 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, '_time_major': False, 'framestack': True, 'dim': 168, 'grayscale': False, 'zero_mean': True, 'custom_model': None, 'custom_model_config': {}, 'custom_action_dist': None, 'custom_preprocessor': None}
Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
observations (InputLayer)       [(None, 168, 168, 4) 0                                            
__________________________________________________________________________________________________
conv_value_1 (Conv2D)           (None, 21, 21, 16)

In [12]:
t0 = time.time()
checkpoint_root='/tmp/ppo/model2'
n_iter = 15
results_model2_part1, episode_data_model2_part1, episode_json_model2_part1 = full_train(checkpoint_root, agent, n_iter)
t1 = time.time()-t0
print("Total time for the " + str(n_iter) + " training iterations: " + str(t1))

[2m[36m(pid=5110)[0m Instructions for updating:
[2m[36m(pid=5110)[0m Prefer Variable.assign which has equivalent behavior in 2.X.
[2m[36m(pid=5111)[0m Instructions for updating:
[2m[36m(pid=5111)[0m Prefer Variable.assign which has equivalent behavior in 2.X.


  1 reward -21.00/-21.00/-21.00 len 1007.00 learn_time(ms) 630700.37 saved /tmp/ppo/model2/checkpoint_1/checkpoint-1
  2 reward -21.00/-21.00/-21.00 len 1015.80 learn_time(ms) 628022.82 saved /tmp/ppo/model2/checkpoint_2/checkpoint-2
  3 reward -21.00/-21.00/-21.00 len 1015.90 learn_time(ms) 645493.46 saved /tmp/ppo/model2/checkpoint_3/checkpoint-3
  4 reward -21.00/-21.00/-21.00 len 1016.77 learn_time(ms) 663558.83 saved /tmp/ppo/model2/checkpoint_4/checkpoint-4
  5 reward -21.00/-21.00/-21.00 len 1017.65 learn_time(ms) 676946.29 saved /tmp/ppo/model2/checkpoint_5/checkpoint-5
  6 reward -21.00/-21.00/-21.00 len 1017.67 learn_time(ms) 686975.72 saved /tmp/ppo/model2/checkpoint_6/checkpoint-6
  7 reward -21.00/-21.00/-21.00 len 1018.72 learn_time(ms) 706809.98 saved /tmp/ppo/model2/checkpoint_7/checkpoint-7
  8 reward -21.00/-21.00/-21.00 len 1019.38 learn_time(ms) 722911.74 saved /tmp/ppo/model2/checkpoint_8/checkpoint-8
  9 reward -21.00/-21.00/-21.00 len 1019.58 learn_time(ms) 73441

### Modelo 3
Red neuronal con etradas de 252x252 y flitos de convoluciónq ue minimizan el numero de parámetros entrenables.

In [3]:
ray.shutdown()
ray.init()
env = 'Pong-v0'
config = ppo.DEFAULT_CONFIG.copy()
config['model']['dim'] = 252
config['model']['conv_filters'] = [[16, [8, 8], 4],[16, [8, 8], 4], [32, [4, 4], 2], [256, [8, 8], 1]]
agent = ppo.PPOTrainer(config, env=env)
policy=agent.get_policy()
print(policy.model.model_config)
print(policy.model.base_model.summary())

2020-12-10 17:44:58,499	INFO services.py:1090 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2020-12-10 17:45:03,560	INFO trainer.py:592 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
2020-12-10 17:45:03,562	INFO trainer.py:1064 -- `_use_trajectory_view_api` only supported for PyTorch so far! Will run w/o.
2020-12-10 17:45:03,564	INFO trainer.py:617 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=418)[0m Instructions for updating:
[2m[36m(pid=418)[0m non-resource variables are not supported in the long term
[2m[36m(pid=421)[0m Instructions for updating:
[2m[36m(pid=421)[0m non-resource variables are not supported in the long term
2020-12-10 17:45:18,759	INFO trainable.py:252 -- Trainable.setup took 15.200 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


{'fcnet_hiddens': [256, 256], 'fcnet_activation': 'tanh', 'conv_filters': [[16, [8, 8], 4], [16, [8, 8], 4], [32, [4, 4], 2], [256, [8, 8], 1]], 'conv_activation': 'relu', 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': False, 'use_lstm': False, 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, '_time_major': False, 'framestack': True, 'dim': 252, 'grayscale': False, 'zero_mean': True, 'custom_model': None, 'custom_model_config': {}, 'custom_action_dist': None, 'custom_preprocessor': None}
Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
observations (InputLayer)       [(None, 252, 252, 4) 0                                            
__________________________________________________________________________________________________
conv_value_1 (Conv2D)           (None

In [4]:
t0 = time.time()
checkpoint_root='/tmp/ppo/model3'
n_iter = 15
results_model3_part1, episode_data_model3_part1, episode_json_model3_part1 = full_train(checkpoint_root, agent, n_iter)
t1 = time.time()-t0
print("Total time for the " + str(n_iter) + " training iterations: " + str(t1))

Instructions for updating:
Prefer Variable.assign which has equivalent behavior in 2.X.


[2m[36m(pid=418)[0m Instructions for updating:
[2m[36m(pid=418)[0m Prefer Variable.assign which has equivalent behavior in 2.X.
[2m[36m(pid=421)[0m Instructions for updating:
[2m[36m(pid=421)[0m Prefer Variable.assign which has equivalent behavior in 2.X.


  1 reward    nan/   nan/   nan len    nan learn_time(ms) 1125514.45 saved /tmp/ppo/model3/checkpoint_1/checkpoint-1
  2 reward -21.00/-20.75/-20.00 len 1061.75 learn_time(ms) 1141226.55 saved /tmp/ppo/model3/checkpoint_2/checkpoint-2
  3 reward -21.00/-20.75/-20.00 len 1072.75 learn_time(ms) 1157211.77 saved /tmp/ppo/model3/checkpoint_3/checkpoint-3
  4 reward -21.00/-20.67/-20.00 len 1088.00 learn_time(ms) 1145916.49 saved /tmp/ppo/model3/checkpoint_4/checkpoint-4
  5 reward -21.00/-20.62/-20.00 len 1090.75 learn_time(ms) 1142309.95 saved /tmp/ppo/model3/checkpoint_5/checkpoint-5
  6 reward -21.00/-20.60/-19.00 len 1097.25 learn_time(ms) 1140266.95 saved /tmp/ppo/model3/checkpoint_6/checkpoint-6
  7 reward -21.00/-20.59/-19.00 len 1096.50 learn_time(ms) 1141496.07 saved /tmp/ppo/model3/checkpoint_7/checkpoint-7


[2m[33m(pid=raylet)[0m F1210 20:39:02.855826   383   383 node_manager.cc:777]  Check failed: node_id != self_node_id_ Exiting because this node manager has mistakenly been marked dead by the monitor: GCS didn't receive heartbeats within timeout 30000 ms. This is likely since the machine or raylet became overloaded.
[2m[33m(pid=raylet)[0m *** Check failure stack trace: ***
[2m[33m(pid=raylet)[0m     @     0x7f699db8ad3d  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f699db8c1ac  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f699db8aa19  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f699db8ac31  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f699db3f2b9  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f699d850bf4  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f699d850dfc  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f699d9415dc  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f699d9418c6  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f699d94b54a  (unknown)

RaySystemError: System error: Broken pipe

In [3]:
ray.shutdown()
ray.init()
env = 'Pong-v0'
config = ppo.DEFAULT_CONFIG.copy()
config['model']['dim'] = 252
config['model']['conv_filters'] = [[16, [8, 8], 4],[16, [8, 8], 4], [32, [4, 4], 2], [256, [8, 8], 1]]
agent = ppo.PPOTrainer(config, env=env)
policy=agent.get_policy()
print(policy.model.model_config)
print(policy.model.base_model.summary())

2020-12-11 01:50:28,458	INFO services.py:1090 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2020-12-11 01:50:33,563	INFO trainer.py:592 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
2020-12-11 01:50:33,564	INFO trainer.py:1064 -- `_use_trajectory_view_api` only supported for PyTorch so far! Will run w/o.
2020-12-11 01:50:33,565	INFO trainer.py:617 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=284)[0m Instructions for updating:
[2m[36m(pid=284)[0m non-resource variables are not supported in the long term
[2m[36m(pid=286)[0m Instructions for updating:
[2m[36m(pid=286)[0m non-resource variables are not supported in the long term
2020-12-11 01:50:51,415	INFO trainable.py:252 -- Trainable.setup took 17.853 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


{'fcnet_hiddens': [256, 256], 'fcnet_activation': 'tanh', 'conv_filters': [[16, [8, 8], 4], [16, [8, 8], 4], [32, [4, 4], 2], [256, [8, 8], 1]], 'conv_activation': 'relu', 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': False, 'use_lstm': False, 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, '_time_major': False, 'framestack': True, 'dim': 252, 'grayscale': False, 'zero_mean': True, 'custom_model': None, 'custom_model_config': {}, 'custom_action_dist': None, 'custom_preprocessor': None}
Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
observations (InputLayer)       [(None, 252, 252, 4) 0                                            
__________________________________________________________________________________________________
conv_value_1 (Conv2D)           (None

In [None]:
t0 = time.time()
checkpoint_root='/tmp/ppo/model3'
n_iter = 15
results_model3_part1, episode_data_model3_part1, episode_json_model3_part1 = full_train(checkpoint_root, agent, n_iter, True, '/tmp/ppo/model3/checkpoint_7/checkpoint-7')
t1 = time.time()-t0
print("Total time for the " + str(n_iter) + " training iterations: " + str(t1))

2020-12-11 01:50:51,852	INFO trainable.py:481 -- Restored on 10.10.1.128 from checkpoint: /tmp/ppo/model3/checkpoint_7/checkpoint-7
2020-12-11 01:50:51,853	INFO trainable.py:489 -- Current state after restoring: {'_iteration': 7, '_timesteps_total': None, '_time_total': 9055.71677017212, '_episodes_total': 22}


Instructions for updating:
Prefer Variable.assign which has equivalent behavior in 2.X.


[2m[36m(pid=284)[0m Instructions for updating:
[2m[36m(pid=284)[0m Prefer Variable.assign which has equivalent behavior in 2.X.
[2m[36m(pid=286)[0m Instructions for updating:
[2m[36m(pid=286)[0m Prefer Variable.assign which has equivalent behavior in 2.X.


  1 reward    nan/   nan/   nan len    nan learn_time(ms) 946995.92 saved /tmp/ppo/model3/checkpoint_8/checkpoint-8
  2 reward -21.00/-20.50/-20.00 len 1185.50 learn_time(ms) 944662.62 saved /tmp/ppo/model3/checkpoint_9/checkpoint-9
  3 reward -21.00/-20.38/-20.00 len 1196.38 learn_time(ms) 942038.60 saved /tmp/ppo/model3/checkpoint_10/checkpoint-10
  4 reward -21.00/-20.40/-20.00 len 1187.50 learn_time(ms) 943313.46 saved /tmp/ppo/model3/checkpoint_11/checkpoint-11


[2m[33m(pid=raylet)[0m F1211 03:06:54.957127   255   255 node_manager.cc:777]  Check failed: node_id != self_node_id_ Exiting because this node manager has mistakenly been marked dead by the monitor: GCS didn't receive heartbeats within timeout 30000 ms. This is likely since the machine or raylet became overloaded.
[2m[36m(pid=282)[0m E1211 03:07:14.912910   282   320 core_worker.cc:708] Raylet failed. Shutting down.
[2m[36m(pid=283)[0m E1211 03:07:14.911471   283   318 core_worker.cc:708] Raylet failed. Shutting down.
[2m[33m(pid=raylet)[0m *** Check failure stack trace: ***
[2m[33m(pid=raylet)[0m     @     0x7f3ca2768d3d  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f3ca276a1ac  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f3ca2768a19  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f3ca2768c31  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f3ca271d2b9  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f3ca242ebf4  (unknown)
[2m[33m(pid=raylet)[0m     @  

In [3]:
ray.shutdown()
ray.init()
env = 'Pong-v0'
config = ppo.DEFAULT_CONFIG.copy()
config['model']['dim'] = 252
config['model']['conv_filters'] = [[16, [8, 8], 4],[16, [8, 8], 4], [32, [4, 4], 2], [256, [8, 8], 1]]
agent = ppo.PPOTrainer(config, env=env)
policy=agent.get_policy()
print(policy.model.model_config)
print(policy.model.base_model.summary())

2020-12-11 09:36:57,075	INFO services.py:1090 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2020-12-11 09:37:03,421	INFO trainer.py:592 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
2020-12-11 09:37:03,430	INFO trainer.py:1064 -- `_use_trajectory_view_api` only supported for PyTorch so far! Will run w/o.
2020-12-11 09:37:03,434	INFO trainer.py:617 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=2398)[0m Instructions for updating:
[2m[36m(pid=2398)[0m non-resource variables are not supported in the long term
[2m[36m(pid=2401)[0m Instructions for updating:
[2m[36m(pid=2401)[0m non-resource variables are not supported in the long term
2020-12-11 09:37:17,601	INFO trainable.py:252 -- Trainable.setup took 14.192 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


{'fcnet_hiddens': [256, 256], 'fcnet_activation': 'tanh', 'conv_filters': [[16, [8, 8], 4], [16, [8, 8], 4], [32, [4, 4], 2], [256, [8, 8], 1]], 'conv_activation': 'relu', 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': False, 'use_lstm': False, 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, '_time_major': False, 'framestack': True, 'dim': 252, 'grayscale': False, 'zero_mean': True, 'custom_model': None, 'custom_model_config': {}, 'custom_action_dist': None, 'custom_preprocessor': None}
Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
observations (InputLayer)       [(None, 252, 252, 4) 0                                            
__________________________________________________________________________________________________
conv_value_1 (Conv2D)           (None

In [4]:
t0 = time.time()
checkpoint_root='/tmp/ppo/model3'
n_iter = 4
results_model3_part3, episode_data_model3_part3, episode_json_model3_part3 = full_train(checkpoint_root, agent, n_iter, True, '/tmp/ppo/model3/checkpoint_11/checkpoint-11')
t1 = time.time()-t0
print("Total time for the " + str(n_iter) + " training iterations: " + str(t1))

2020-12-11 09:37:18,030	INFO trainable.py:481 -- Restored on 10.10.1.128 from checkpoint: /tmp/ppo/model3/checkpoint_11/checkpoint-11
2020-12-11 09:37:18,032	INFO trainable.py:489 -- Current state after restoring: {'_iteration': 11, '_timesteps_total': None, '_time_total': 13432.568308353424, '_episodes_total': 32}


Instructions for updating:
Prefer Variable.assign which has equivalent behavior in 2.X.


[2m[36m(pid=2398)[0m Instructions for updating:
[2m[36m(pid=2398)[0m Prefer Variable.assign which has equivalent behavior in 2.X.
[2m[36m(pid=2401)[0m Instructions for updating:
[2m[36m(pid=2401)[0m Prefer Variable.assign which has equivalent behavior in 2.X.


  1 reward    nan/   nan/   nan len    nan learn_time(ms) 1193923.55 saved /tmp/ppo/model3/checkpoint_12/checkpoint-12
  2 reward -21.00/-20.75/-20.00 len 1245.50 learn_time(ms) 1075075.03 saved /tmp/ppo/model3/checkpoint_13/checkpoint-13
  3 reward -21.00/-20.57/-20.00 len 1217.29 learn_time(ms) 1045145.66 saved /tmp/ppo/model3/checkpoint_14/checkpoint-14
  4 reward -21.00/-20.30/-18.00 len 1222.70 learn_time(ms) 1022881.61 saved /tmp/ppo/model3/checkpoint_15/checkpoint-15
Total learn time: 4337025.8440000005
Average learn time per iteration: 1084256.4610000001
Total time for the 4 training iterations: 4796.188503503799


### Modelo 4 
Entradas de 168x168

In [3]:
ray.shutdown()
ray.init()
env = 'Pong-v0'
config = ppo.DEFAULT_CONFIG.copy()
config['model']['dim'] = 168
config['model']['conv_filters'] = [[16, [8, 8], 4],[32, [4, 4], 2],[32, [4, 4], 2], [256, [11, 11], 1]]
agent = ppo.PPOTrainer(config, env=env)
policy=agent.get_policy()
print(policy.model.model_config)
print(policy.model.base_model.summary())

2020-12-11 13:10:51,641	INFO services.py:1090 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2020-12-11 13:10:54,580	INFO trainer.py:592 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
2020-12-11 13:10:54,582	INFO trainer.py:1064 -- `_use_trajectory_view_api` only supported for PyTorch so far! Will run w/o.
2020-12-11 13:10:54,584	INFO trainer.py:617 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=4338)[0m Instructions for updating:
[2m[36m(pid=4338)[0m non-resource variables are not supported in the long term
[2m[36m(pid=4345)[0m Instructions for updating:
[2m[36m(pid=4345)[0m non-resource variables are not supported in the long term
2020-12-11 13:11:05,122	INFO trainable.py:252 -- Trainable.setup took 10.543 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


{'fcnet_hiddens': [256, 256], 'fcnet_activation': 'tanh', 'conv_filters': [[16, [8, 8], 4], [32, [4, 4], 2], [32, [4, 4], 2], [256, [11, 11], 1]], 'conv_activation': 'relu', 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': False, 'use_lstm': False, 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, '_time_major': False, 'framestack': True, 'dim': 168, 'grayscale': False, 'zero_mean': True, 'custom_model': None, 'custom_model_config': {}, 'custom_action_dist': None, 'custom_preprocessor': None}
Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
observations (InputLayer)       [(None, 168, 168, 4) 0                                            
__________________________________________________________________________________________________
conv_value_1 (Conv2D)           (No

In [4]:
t0 = time.time()
checkpoint_root='/tmp/ppo/model4'
n_iter = 15
results_model4_part1, episode_data_model4_part1, episode_json_model4_part1 = full_train(checkpoint_root, agent, n_iter)
t1 = time.time()-t0
print("Total time for the " + str(n_iter) + " training iterations: " + str(t1))

Instructions for updating:
Prefer Variable.assign which has equivalent behavior in 2.X.


[2m[36m(pid=4338)[0m Instructions for updating:
[2m[36m(pid=4338)[0m Prefer Variable.assign which has equivalent behavior in 2.X.
[2m[36m(pid=4345)[0m Instructions for updating:
[2m[36m(pid=4345)[0m Prefer Variable.assign which has equivalent behavior in 2.X.


  1 reward -21.00/-21.00/-21.00 len 1013.00 learn_time(ms) 749497.14 saved /tmp/ppo/model4/checkpoint_1/checkpoint-1
  2 reward -21.00/-21.00/-21.00 len 1016.50 learn_time(ms) 748695.33 saved /tmp/ppo/model4/checkpoint_2/checkpoint-2
  3 reward -21.00/-21.00/-21.00 len 1014.88 learn_time(ms) 748144.91 saved /tmp/ppo/model4/checkpoint_3/checkpoint-3
  4 reward -21.00/-21.00/-21.00 len 1018.85 learn_time(ms) 749697.57 saved /tmp/ppo/model4/checkpoint_4/checkpoint-4
  5 reward -21.00/-21.00/-21.00 len 1016.11 learn_time(ms) 750276.71 saved /tmp/ppo/model4/checkpoint_5/checkpoint-5
  6 reward -21.00/-21.00/-21.00 len 1016.95 learn_time(ms) 747233.02 saved /tmp/ppo/model4/checkpoint_6/checkpoint-6
  7 reward -21.00/-21.00/-21.00 len 1016.19 learn_time(ms) 746964.62 saved /tmp/ppo/model4/checkpoint_7/checkpoint-7
  8 reward -21.00/-21.00/-21.00 len 1016.07 learn_time(ms) 746827.92 saved /tmp/ppo/model4/checkpoint_8/checkpoint-8
  9 reward -21.00/-21.00/-21.00 len 1017.09 learn_time(ms) 74613

[2m[33m(pid=raylet)[0m F1211 17:10:17.514658  4308  4308 node_manager.cc:777]  Check failed: node_id != self_node_id_ Exiting because this node manager has mistakenly been marked dead by the monitor: GCS didn't receive heartbeats within timeout 30000 ms. This is likely since the machine or raylet became overloaded.
[2m[33m(pid=raylet)[0m *** Check failure stack trace: ***
[2m[33m(pid=raylet)[0m     @     0x7f7af5e18d3d  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f7af5e1a1ac  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f7af5e18a19  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f7af5e18c31  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f7af5dcd2b9  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f7af5adebf4  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f7af5adedfc  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f7af5bcf5dc  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f7af5bcf8c6  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f7af5bd954a  (unknown)

### Modelo 5

In [3]:
ray.shutdown()
ray.init()
env = 'Pong-v0'
config = ppo.DEFAULT_CONFIG.copy()
config['model']['dim'] = 252
config['model']['conv_filters'] = [[16, [8, 8], 4],[32, [4, 4], 2], [32, [4, 4], 2], [256, [16, 16], 1]]
agent = ppo.PPOTrainer(config, env=env)
policy=agent.get_policy()
print(policy.model.model_config)
print(policy.model.base_model.summary())

2020-12-12 23:55:28,239	INFO services.py:1090 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2020-12-12 23:55:35,003	INFO trainer.py:592 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
2020-12-12 23:55:35,005	INFO trainer.py:1064 -- `_use_trajectory_view_api` only supported for PyTorch so far! Will run w/o.
2020-12-12 23:55:35,005	INFO trainer.py:617 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=1295)[0m Instructions for updating:
[2m[36m(pid=1295)[0m non-resource variables are not supported in the long term
[2m[36m(pid=1298)[0m Instructions for updating:
[2m[36m(pid=1298)[0m non-resource variables are not supported in the long term
2020-12-12 23:55:48,665	INFO trainable.py:252 -- Trainable.setup took 13.663 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


{'fcnet_hiddens': [256, 256], 'fcnet_activation': 'tanh', 'conv_filters': [[16, [8, 8], 4], [32, [4, 4], 2], [32, [4, 4], 2], [256, [16, 16], 1]], 'conv_activation': 'relu', 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': False, 'use_lstm': False, 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, '_time_major': False, 'framestack': True, 'dim': 252, 'grayscale': False, 'zero_mean': True, 'custom_model': None, 'custom_model_config': {}, 'custom_action_dist': None, 'custom_preprocessor': None}
Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
observations (InputLayer)       [(None, 252, 252, 4) 0                                            
__________________________________________________________________________________________________
conv_value_1 (Conv2D)           (No

In [4]:
t0 = time.time()
checkpoint_root='/tmp/ppo/model5'
n_iter = 15
results_model5_part1, episode_data_model5_part1, episode_json_model5_part1 = full_train(checkpoint_root, agent, n_iter)
t1 = time.time()-t0
print("Total time for the " + str(n_iter) + " training iterations: " + str(t1))

Instructions for updating:
Prefer Variable.assign which has equivalent behavior in 2.X.


[2m[36m(pid=1298)[0m Instructions for updating:
[2m[36m(pid=1298)[0m Prefer Variable.assign which has equivalent behavior in 2.X.
[2m[36m(pid=1295)[0m Instructions for updating:
[2m[36m(pid=1295)[0m Prefer Variable.assign which has equivalent behavior in 2.X.


  1 reward -21.00/-21.00/-21.00 len 1018.50 learn_time(ms) 2016325.93 saved /tmp/ppo/model5/checkpoint_1/checkpoint-1
  2 reward -21.00/-21.00/-21.00 len 1017.00 learn_time(ms) 1855743.47 saved /tmp/ppo/model5/checkpoint_2/checkpoint-2
  3 reward -21.00/-21.00/-21.00 len 1024.38 learn_time(ms) 1782429.29 saved /tmp/ppo/model5/checkpoint_3/checkpoint-3
  4 reward -21.00/-21.00/-21.00 len 1021.25 learn_time(ms) 1742060.77 saved /tmp/ppo/model5/checkpoint_4/checkpoint-4
  5 reward -21.00/-21.00/-21.00 len 1021.75 learn_time(ms) 1717637.55 saved /tmp/ppo/model5/checkpoint_5/checkpoint-5
  6 reward -21.00/-21.00/-21.00 len 1019.85 learn_time(ms) 1702886.05 saved /tmp/ppo/model5/checkpoint_6/checkpoint-6
  7 reward -21.00/-21.00/-21.00 len 1020.46 learn_time(ms) 1699780.68 saved /tmp/ppo/model5/checkpoint_7/checkpoint-7
  8 reward -21.00/-21.00/-21.00 len 1021.11 learn_time(ms) 1688489.74 saved /tmp/ppo/model5/checkpoint_8/checkpoint-8
  9 reward -21.00/-21.00/-21.00 len 1020.22 learn_time(m

[2m[33m(pid=raylet)[0m F1213 05:01:10.538866  1266  1266 node_manager.cc:777]  Check failed: node_id != self_node_id_ Exiting because this node manager has mistakenly been marked dead by the monitor: GCS didn't receive heartbeats within timeout 30000 ms. This is likely since the machine or raylet became overloaded.
[2m[33m(pid=raylet)[0m *** Check failure stack trace: ***
[2m[33m(pid=raylet)[0m     @     0x7ff37a15bd3d  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7ff37a15d1ac  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7ff37a15ba19  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7ff37a15bc31  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7ff37a1102b9  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7ff379e21bf4  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7ff379e21dfc  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7ff379f125dc  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7ff379f128c6  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7ff379f1c54a  (unknown)

RaySystemError: System error: Broken pipe

In [3]:
ray.shutdown()
ray.init()
env = 'Pong-v0'
config = ppo.DEFAULT_CONFIG.copy()
config['model']['dim'] = 252
config['model']['conv_filters'] = [[16, [8, 8], 4],[32, [4, 4], 2], [32, [4, 4], 2], [256, [16, 16], 1]]
agent = ppo.PPOTrainer(config, env=env)
policy=agent.get_policy()
print(policy.model.model_config)
print(policy.model.base_model.summary())

2020-12-13 18:09:01,239	INFO services.py:1090 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2020-12-13 18:09:07,632	INFO trainer.py:592 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
2020-12-13 18:09:07,634	INFO trainer.py:1064 -- `_use_trajectory_view_api` only supported for PyTorch so far! Will run w/o.
2020-12-13 18:09:07,636	INFO trainer.py:617 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=4492)[0m Instructions for updating:
[2m[36m(pid=4492)[0m non-resource variables are not supported in the long term
[2m[36m(pid=4493)[0m Instructions for updating:
[2m[36m(pid=4493)[0m non-resource variables are not supported in the long term
2020-12-13 18:09:21,428	INFO trainable.py:252 -- Trainable.setup took 13.797 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


{'fcnet_hiddens': [256, 256], 'fcnet_activation': 'tanh', 'conv_filters': [[16, [8, 8], 4], [32, [4, 4], 2], [32, [4, 4], 2], [256, [16, 16], 1]], 'conv_activation': 'relu', 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': False, 'use_lstm': False, 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, '_time_major': False, 'framestack': True, 'dim': 252, 'grayscale': False, 'zero_mean': True, 'custom_model': None, 'custom_model_config': {}, 'custom_action_dist': None, 'custom_preprocessor': None}
Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
observations (InputLayer)       [(None, 252, 252, 4) 0                                            
__________________________________________________________________________________________________
conv_value_1 (Conv2D)           (No

In [5]:
t0 = time.time()
checkpoint_root='/tmp/ppo/model5'
n_iter = 5
results_model5_part2, episode_data_model5_part2, episode_json_model5_part2 = full_train(checkpoint_root, agent, n_iter, True, '/tmp/ppo/model5/checkpoint_10/checkpoint-10')
t1 = time.time()-t0
print("Total time for the " + str(n_iter) + " training iterations: " + str(t1))

2020-12-13 18:14:35,760	INFO trainable.py:481 -- Restored on 10.10.1.128 from checkpoint: /tmp/ppo/model5/checkpoint_10/checkpoint-10
2020-12-13 18:14:35,761	INFO trainable.py:489 -- Current state after restoring: {'_iteration': 10, '_timesteps_total': None, '_time_total': 17988.872725248337, '_episodes_total': 36}


Instructions for updating:
Prefer Variable.assign which has equivalent behavior in 2.X.


[2m[36m(pid=4492)[0m Instructions for updating:
[2m[36m(pid=4492)[0m Prefer Variable.assign which has equivalent behavior in 2.X.
[2m[36m(pid=4493)[0m Instructions for updating:
[2m[36m(pid=4493)[0m Prefer Variable.assign which has equivalent behavior in 2.X.


  1 reward -21.00/-21.00/-21.00 len 1020.00 learn_time(ms) 1933939.66 saved /tmp/ppo/model5/checkpoint_11/checkpoint-11
  2 reward -21.00/-21.00/-21.00 len 1016.75 learn_time(ms) 1836163.59 saved /tmp/ppo/model5/checkpoint_12/checkpoint-12
  3 reward -21.00/-21.00/-21.00 len 1020.89 learn_time(ms) 1825555.04 saved /tmp/ppo/model5/checkpoint_13/checkpoint-13
  4 reward -21.00/-21.00/-21.00 len 1019.85 learn_time(ms) 1840576.79 saved /tmp/ppo/model5/checkpoint_14/checkpoint-14
  5 reward -21.00/-21.00/-21.00 len 1019.65 learn_time(ms) 1839794.46 saved /tmp/ppo/model5/checkpoint_15/checkpoint-15
Total learn time: 9276029.545
Average learn time per iteration: 1855205.909
Total time for the 5 training iterations: 9908.155445814133


### Modelo 6

In [3]:
ray.shutdown()
ray.init()
env = 'Pong-v0'
config = ppo.DEFAULT_CONFIG.copy()
config['model']['dim'] = 168
config['model']['conv_filters'] = [[16, [8, 8], 4],[32, [4, 4], 2],[256, [21, 21], 1]]
agent = ppo.PPOTrainer(config, env=env)
policy=agent.get_policy()
print(policy.model.model_config)
print(policy.model.base_model.summary())

2020-12-13 23:31:48,624	INFO services.py:1090 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2020-12-13 23:31:55,295	INFO trainer.py:592 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
2020-12-13 23:31:55,297	INFO trainer.py:1064 -- `_use_trajectory_view_api` only supported for PyTorch so far! Will run w/o.
2020-12-13 23:31:55,298	INFO trainer.py:617 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=6040)[0m Instructions for updating:
[2m[36m(pid=6040)[0m non-resource variables are not supported in the long term
[2m[36m(pid=6042)[0m Instructions for updating:
[2m[36m(pid=6042)[0m non-resource variables are not supported in the long term
2020-12-13 23:32:10,904	INFO trainable.py:252 -- Trainable.setup took 15.610 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


{'fcnet_hiddens': [256, 256], 'fcnet_activation': 'tanh', 'conv_filters': [[16, [8, 8], 4], [32, [4, 4], 2], [256, [21, 21], 1]], 'conv_activation': 'relu', 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': False, 'use_lstm': False, 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, '_time_major': False, 'framestack': True, 'dim': 168, 'grayscale': False, 'zero_mean': True, 'custom_model': None, 'custom_model_config': {}, 'custom_action_dist': None, 'custom_preprocessor': None}
Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
observations (InputLayer)       [(None, 168, 168, 4) 0                                            
__________________________________________________________________________________________________
conv_value_1 (Conv2D)           (None, 42, 42, 16)  

In [4]:
t0 = time.time()
checkpoint_root='/tmp/ppo/model6'
n_iter = 15
results_model6_part1, episode_data_model6_part1, episode_json_model6_part1 = full_train(checkpoint_root, agent, n_iter)
t1 = time.time()-t0
print("Total time for the " + str(n_iter) + " training iterations: " + str(t1))

Instructions for updating:
Prefer Variable.assign which has equivalent behavior in 2.X.


[2m[36m(pid=6042)[0m Instructions for updating:
[2m[36m(pid=6042)[0m Prefer Variable.assign which has equivalent behavior in 2.X.
[2m[36m(pid=6040)[0m Instructions for updating:
[2m[36m(pid=6040)[0m Prefer Variable.assign which has equivalent behavior in 2.X.


  1 reward -21.00/-21.00/-21.00 len 1014.00 learn_time(ms) 1835688.24 saved /tmp/ppo/model6/checkpoint_1/checkpoint-1
  2 reward -21.00/-21.00/-21.00 len 1015.50 learn_time(ms) 1820429.65 saved /tmp/ppo/model6/checkpoint_2/checkpoint-2
  3 reward -21.00/-21.00/-21.00 len 1019.11 learn_time(ms) 1821961.97 saved /tmp/ppo/model6/checkpoint_3/checkpoint-3


[2m[33m(pid=raylet)[0m F1214 01:11:47.068300  6012  6012 node_manager.cc:777]  Check failed: node_id != self_node_id_ Exiting because this node manager has mistakenly been marked dead by the monitor: GCS didn't receive heartbeats within timeout 30000 ms. This is likely since the machine or raylet became overloaded.
[2m[33m(pid=raylet)[0m *** Check failure stack trace: ***
[2m[33m(pid=raylet)[0m     @     0x7f3b28d10d3d  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f3b28d121ac  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f3b28d10a19  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f3b28d10c31  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f3b28cc52b9  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f3b289d6bf4  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f3b289d6dfc  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f3b28ac75dc  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f3b28ac78c6  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f3b28ad154a  (unknown)

RaySystemError: System error: Broken pipe

In [3]:
ray.shutdown()
ray.init()
env = 'Pong-v0'
config = ppo.DEFAULT_CONFIG.copy()
config['model']['dim'] = 168
config['model']['conv_filters'] = [[16, [8, 8], 4],[32, [4, 4], 2],[256, [21, 21], 1]]
agent = ppo.PPOTrainer(config, env=env)
policy=agent.get_policy()
print(policy.model.model_config)
print(policy.model.base_model.summary())

2020-12-15 18:35:38,174	INFO services.py:1090 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2020-12-15 18:35:44,017	INFO trainer.py:592 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
2020-12-15 18:35:44,020	INFO trainer.py:1064 -- `_use_trajectory_view_api` only supported for PyTorch so far! Will run w/o.
2020-12-15 18:35:44,024	INFO trainer.py:617 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=395)[0m Instructions for updating:
[2m[36m(pid=395)[0m non-resource variables are not supported in the long term
[2m[36m(pid=397)[0m Instructions for updating:
[2m[36m(pid=397)[0m non-resource variables are not supported in the long term
2020-12-15 18:36:00,783	INFO trainable.py:252 -- Trainable.setup took 16.767 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


{'fcnet_hiddens': [256, 256], 'fcnet_activation': 'tanh', 'conv_filters': [[16, [8, 8], 4], [32, [4, 4], 2], [256, [21, 21], 1]], 'conv_activation': 'relu', 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': False, 'use_lstm': False, 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, '_time_major': False, 'framestack': True, 'dim': 168, 'grayscale': False, 'zero_mean': True, 'custom_model': None, 'custom_model_config': {}, 'custom_action_dist': None, 'custom_preprocessor': None}
Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
observations (InputLayer)       [(None, 168, 168, 4) 0                                            
__________________________________________________________________________________________________
conv_value_1 (Conv2D)           (None, 42, 42, 16)  

In [4]:
t0 = time.time()
checkpoint_root='/tmp/ppo/model6'
n_iter = 12
results_model6_part2, episode_data_model6_part2, episode_json_model6_part2 = full_train(checkpoint_root, agent, n_iter, True, '/tmp/ppo/model6/checkpoint_3/checkpoint-3' )
t1 = time.time()-t0
print("Total time for the " + str(n_iter) + " training iterations: " + str(t1))

2020-12-15 18:36:44,046	INFO trainable.py:481 -- Restored on 10.10.1.128 from checkpoint: /tmp/ppo/model6/checkpoint_3/checkpoint-3
2020-12-15 18:36:44,047	INFO trainable.py:489 -- Current state after restoring: {'_iteration': 3, '_timesteps_total': None, '_time_total': 5539.474450111389, '_episodes_total': 9}


Instructions for updating:
Prefer Variable.assign which has equivalent behavior in 2.X.


[2m[36m(pid=397)[0m Instructions for updating:
[2m[36m(pid=397)[0m Prefer Variable.assign which has equivalent behavior in 2.X.
[2m[36m(pid=395)[0m Instructions for updating:
[2m[36m(pid=395)[0m Prefer Variable.assign which has equivalent behavior in 2.X.


  1 reward -21.00/-21.00/-21.00 len 1024.00 learn_time(ms) 1814448.53 saved /tmp/ppo/model6/checkpoint_4/checkpoint-4
  2 reward -21.00/-21.00/-21.00 len 1022.00 learn_time(ms) 1822879.62 saved /tmp/ppo/model6/checkpoint_5/checkpoint-5
  3 reward -21.00/-21.00/-21.00 len 1026.62 learn_time(ms) 1814269.73 saved /tmp/ppo/model6/checkpoint_6/checkpoint-6
  4 reward -21.00/-21.00/-21.00 len 1023.83 learn_time(ms) 1835116.95 saved /tmp/ppo/model6/checkpoint_7/checkpoint-7
  5 reward -21.00/-20.88/-20.00 len 1037.56 learn_time(ms) 1843049.29 saved /tmp/ppo/model6/checkpoint_8/checkpoint-8
  6 reward -21.00/-20.90/-20.00 len 1033.55 learn_time(ms) 1829018.30 saved /tmp/ppo/model6/checkpoint_9/checkpoint-9


[2m[36m(pid=394)[0m E1215 22:04:52.974412   394   430 core_worker.cc:708] Raylet failed. Shutting down.
[2m[36m(pid=393)[0m E1215 22:04:52.974412   393   431 core_worker.cc:708] Raylet failed. Shutting down.
[2m[33m(pid=raylet)[0m F1215 22:04:52.285184   366   366 node_manager.cc:777]  Check failed: node_id != self_node_id_ Exiting because this node manager has mistakenly been marked dead by the monitor: GCS didn't receive heartbeats within timeout 30000 ms. This is likely since the machine or raylet became overloaded.
[2m[33m(pid=raylet)[0m *** Check failure stack trace: ***
[2m[33m(pid=raylet)[0m     @     0x7f2aff653d3d  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f2aff6551ac  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f2aff653a19  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f2aff653c31  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f2aff6082b9  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f2aff319bf4  (unknown)
[2m[33m(pid=raylet)[0m     @  

RaySystemError: System error: Broken pipe

In [4]:
ray.shutdown()
ray.init()
env = 'Pong-v0'
config = ppo.DEFAULT_CONFIG.copy()
config['model']['dim'] = 168
config['model']['conv_filters'] = [[16, [8, 8], 4],[32, [4, 4], 2],[256, [21, 21], 1]]
agent = ppo.PPOTrainer(config, env=env)
policy=agent.get_policy()
print(policy.model.model_config)
print(policy.model.base_model.summary())

2020-12-16 00:42:14,130	INFO services.py:1090 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2020-12-16 00:42:18,246	INFO trainer.py:592 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
2020-12-16 00:42:18,250	INFO trainer.py:1064 -- `_use_trajectory_view_api` only supported for PyTorch so far! Will run w/o.
2020-12-16 00:42:18,251	INFO trainer.py:617 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=2417)[0m Instructions for updating:
[2m[36m(pid=2417)[0m non-resource variables are not supported in the long term
[2m[36m(pid=2420)[0m Instructions for updating:
[2m[36m(pid=2420)[0m non-resource variables are not supported in the long term
2020-12-16 00:42:31,073	INFO trainable.py:252 -- Trainable.setup took 12.828 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


{'fcnet_hiddens': [256, 256], 'fcnet_activation': 'tanh', 'conv_filters': [[16, [8, 8], 4], [32, [4, 4], 2], [256, [21, 21], 1]], 'conv_activation': 'relu', 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': False, 'use_lstm': False, 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, '_time_major': False, 'framestack': True, 'dim': 168, 'grayscale': False, 'zero_mean': True, 'custom_model': None, 'custom_model_config': {}, 'custom_action_dist': None, 'custom_preprocessor': None}
Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
observations (InputLayer)       [(None, 168, 168, 4) 0                                            
__________________________________________________________________________________________________
conv_value_1 (Conv2D)           (None, 42, 42, 16)  

In [5]:
t0 = time.time()
checkpoint_root='/tmp/ppo/model6'
n_iter = 6
results_model6_part2, episode_data_model6_part2, episode_json_model6_part2 = full_train(checkpoint_root, agent, n_iter, True, '/tmp/ppo/model6/checkpoint_9/checkpoint-9' )
t1 = time.time()-t0
print("Total time for the " + str(n_iter) + " training iterations: " + str(t1))

2020-12-16 00:42:31,703	INFO trainable.py:481 -- Restored on 10.10.1.128 from checkpoint: /tmp/ppo/model6/checkpoint_9/checkpoint-9
2020-12-16 00:42:31,704	INFO trainable.py:489 -- Current state after restoring: {'_iteration': 9, '_timesteps_total': None, '_time_total': 16678.175520181656, '_episodes_total': 29}


Instructions for updating:
Prefer Variable.assign which has equivalent behavior in 2.X.


[2m[36m(pid=2417)[0m Instructions for updating:
[2m[36m(pid=2417)[0m Prefer Variable.assign which has equivalent behavior in 2.X.
[2m[36m(pid=2420)[0m Instructions for updating:
[2m[36m(pid=2420)[0m Prefer Variable.assign which has equivalent behavior in 2.X.


  1 reward    nan/   nan/   nan len    nan learn_time(ms) 2008326.72 saved /tmp/ppo/model6/checkpoint_10/checkpoint-10
  2 reward -21.00/-20.75/-20.00 len 1072.50 learn_time(ms) 2006580.87 saved /tmp/ppo/model6/checkpoint_11/checkpoint-11


[2m[33m(pid=raylet)[0m F1216 02:06:40.650221  2390  2390 node_manager.cc:777]  Check failed: node_id != self_node_id_ Exiting because this node manager has mistakenly been marked dead by the monitor: GCS didn't receive heartbeats within timeout 30000 ms. This is likely since the machine or raylet became overloaded.
[2m[33m(pid=raylet)[0m *** Check failure stack trace: ***
[2m[33m(pid=raylet)[0m     @     0x7f1a684aed3d  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f1a684b01ac  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f1a684aea19  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f1a684aec31  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f1a684632b9  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f1a68174bf4  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f1a68174dfc  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f1a682655dc  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f1a682658c6  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f1a6826f54a  (unknown)

RaySystemError: System error: Broken pipe

In [3]:
ray.shutdown()
ray.init()
env = 'Pong-v0'
config = ppo.DEFAULT_CONFIG.copy()
config['model']['dim'] = 168
config['model']['conv_filters'] = [[16, [8, 8], 4],[32, [4, 4], 2],[256, [21, 21], 1]]
agent = ppo.PPOTrainer(config, env=env)
policy=agent.get_policy()
print(policy.model.model_config)
print(policy.model.base_model.summary())

2020-12-17 09:59:50,748	INFO services.py:1090 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2020-12-17 09:59:57,878	INFO trainer.py:592 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
2020-12-17 09:59:57,879	INFO trainer.py:1064 -- `_use_trajectory_view_api` only supported for PyTorch so far! Will run w/o.
2020-12-17 09:59:57,880	INFO trainer.py:617 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=416)[0m Instructions for updating:
[2m[36m(pid=416)[0m non-resource variables are not supported in the long term
[2m[36m(pid=422)[0m Instructions for updating:
[2m[36m(pid=422)[0m non-resource variables are not supported in the long term
2020-12-17 10:00:13,913	INFO trainable.py:252 -- Trainable.setup took 16.037 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


{'fcnet_hiddens': [256, 256], 'fcnet_activation': 'tanh', 'conv_filters': [[16, [8, 8], 4], [32, [4, 4], 2], [256, [21, 21], 1]], 'conv_activation': 'relu', 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': False, 'use_lstm': False, 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, '_time_major': False, 'framestack': True, 'dim': 168, 'grayscale': False, 'zero_mean': True, 'custom_model': None, 'custom_model_config': {}, 'custom_action_dist': None, 'custom_preprocessor': None}
Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
observations (InputLayer)       [(None, 168, 168, 4) 0                                            
__________________________________________________________________________________________________
conv_value_1 (Conv2D)           (None, 42, 42, 16)  

In [4]:
t0 = time.time()
checkpoint_root='/tmp/ppo/model6'
n_iter = 4
results_model6_part4, episode_data_model6_part4, episode_json_model6_part4 = full_train(checkpoint_root, agent, n_iter, True, '/tmp/ppo/model6/checkpoint_11/checkpoint-11' )
t1 = time.time()-t0
print("Total time for the " + str(n_iter) + " training iterations: " + str(t1))

2020-12-17 10:00:35,613	INFO trainable.py:481 -- Restored on 10.10.1.128 from checkpoint: /tmp/ppo/model6/checkpoint_11/checkpoint-11
2020-12-17 10:00:35,615	INFO trainable.py:489 -- Current state after restoring: {'_iteration': 11, '_timesteps_total': None, '_time_total': 20760.872745513916, '_episodes_total': 33}


Instructions for updating:
Prefer Variable.assign which has equivalent behavior in 2.X.


[2m[36m(pid=422)[0m Instructions for updating:
[2m[36m(pid=422)[0m Prefer Variable.assign which has equivalent behavior in 2.X.
[2m[36m(pid=416)[0m Instructions for updating:
[2m[36m(pid=416)[0m Prefer Variable.assign which has equivalent behavior in 2.X.


  1 reward    nan/   nan/   nan len    nan learn_time(ms) 2372554.67 saved /tmp/ppo/model6/checkpoint_12/checkpoint-12
  2 reward -21.00/-20.50/-20.00 len 1116.75 learn_time(ms) 2201403.06 saved /tmp/ppo/model6/checkpoint_13/checkpoint-13
  3 reward -21.00/-20.12/-19.00 len 1172.00 learn_time(ms) 2259977.12 saved /tmp/ppo/model6/checkpoint_14/checkpoint-14


[2m[33m(pid=raylet)[0m F1217 12:20:37.464964   384   384 node_manager.cc:777]  Check failed: node_id != self_node_id_ Exiting because this node manager has mistakenly been marked dead by the monitor: GCS didn't receive heartbeats within timeout 30000 ms. This is likely since the machine or raylet became overloaded.
[2m[33m(pid=raylet)[0m *** Check failure stack trace: ***
[2m[33m(pid=raylet)[0m     @     0x7f66a67cbd3d  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f66a67cd1ac  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f66a67cba19  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f66a67cbc31  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f66a67802b9  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f66a6491bf4  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f66a6491dfc  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f66a65825dc  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f66a65828c6  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f66a658c54a  (unknown)

RaySystemError: System error: Broken pipe

In [3]:
ray.shutdown()
ray.init()
env = 'Pong-v0'
config = ppo.DEFAULT_CONFIG.copy()
config['model']['dim'] = 168
config['model']['conv_filters'] = [[16, [8, 8], 4],[32, [4, 4], 2],[256, [21, 21], 1]]
agent = ppo.PPOTrainer(config, env=env)
policy=agent.get_policy()
print(policy.model.model_config)
print(policy.model.base_model.summary())

2020-12-17 15:07:39,725	INFO services.py:1090 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2020-12-17 15:07:46,111	INFO trainer.py:592 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
2020-12-17 15:07:46,112	INFO trainer.py:1064 -- `_use_trajectory_view_api` only supported for PyTorch so far! Will run w/o.
2020-12-17 15:07:46,113	INFO trainer.py:617 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=183)[0m Instructions for updating:
[2m[36m(pid=183)[0m non-resource variables are not supported in the long term
[2m[36m(pid=181)[0m Instructions for updating:
[2m[36m(pid=181)[0m non-resource variables are not supported in the long term
2020-12-17 15:08:01,368	INFO trainable.py:252 -- Trainable.setup took 15.258 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


{'fcnet_hiddens': [256, 256], 'fcnet_activation': 'tanh', 'conv_filters': [[16, [8, 8], 4], [32, [4, 4], 2], [256, [21, 21], 1]], 'conv_activation': 'relu', 'free_log_std': False, 'no_final_linear': False, 'vf_share_layers': False, 'use_lstm': False, 'max_seq_len': 20, 'lstm_cell_size': 256, 'lstm_use_prev_action_reward': False, '_time_major': False, 'framestack': True, 'dim': 168, 'grayscale': False, 'zero_mean': True, 'custom_model': None, 'custom_model_config': {}, 'custom_action_dist': None, 'custom_preprocessor': None}
Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
observations (InputLayer)       [(None, 168, 168, 4) 0                                            
__________________________________________________________________________________________________
conv_value_1 (Conv2D)           (None, 42, 42, 16)  

In [4]:
t0 = time.time()
checkpoint_root='/tmp/ppo/model6'
n_iter = 3
results_model6_part5, episode_data_model6_part5, episode_json_model6_part5 = full_train(checkpoint_root, agent, n_iter, True, '/tmp/ppo/model6/checkpoint_14/checkpoint-14' )
t1 = time.time()-t0
print("Total time for the " + str(n_iter) + " training iterations: " + str(t1))

2020-12-17 15:08:19,209	INFO trainable.py:481 -- Restored on 10.10.1.128 from checkpoint: /tmp/ppo/model6/checkpoint_14/checkpoint-14
2020-12-17 15:08:19,211	INFO trainable.py:489 -- Current state after restoring: {'_iteration': 14, '_timesteps_total': None, '_time_total': 27650.47457242012, '_episodes_total': 41}


Instructions for updating:
Prefer Variable.assign which has equivalent behavior in 2.X.


[2m[36m(pid=181)[0m Instructions for updating:
[2m[36m(pid=181)[0m Prefer Variable.assign which has equivalent behavior in 2.X.
[2m[36m(pid=183)[0m Instructions for updating:
[2m[36m(pid=183)[0m Prefer Variable.assign which has equivalent behavior in 2.X.


  1 reward    nan/   nan/   nan len    nan learn_time(ms) 1866830.50 saved /tmp/ppo/model6/checkpoint_15/checkpoint-15
  2 reward -21.00/-20.75/-20.00 len 1106.25 learn_time(ms) 1814145.11 saved /tmp/ppo/model6/checkpoint_16/checkpoint-16


[2m[33m(pid=raylet)[0m F1217 16:34:57.683689   152   152 node_manager.cc:777]  Check failed: node_id != self_node_id_ Exiting because this node manager has mistakenly been marked dead by the monitor: GCS didn't receive heartbeats within timeout 30000 ms. This is likely since the machine or raylet became overloaded.
[2m[33m(pid=raylet)[0m *** Check failure stack trace: ***
[2m[33m(pid=raylet)[0m     @     0x7f67d6e5fd3d  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f67d6e611ac  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f67d6e5fa19  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f67d6e5fc31  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f67d6e142b9  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f67d6b25bf4  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f67d6b25dfc  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f67d6c165dc  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f67d6c168c6  (unknown)
[2m[33m(pid=raylet)[0m     @     0x7f67d6c2054a  (unknown)

RaySystemError: System error: Broken pipe

# Rollouts

### Modelo 1

In [6]:
ray.shutdown()
t0=time.time()
!python3 rollout.py /tmp/ppo/model1/checkpoint_15/checkpoint-15 --env='Pong-v0' --run PPO --episodes 10 --out='rollout_outputs/model1.pkl' --save-info --use-shelve
t1 = time.time()-t0
print("Rollout total time: " + str(t1))

Instructions for updating:
non-resource variables are not supported in the long term
2020-12-17 23:45:09,331	INFO services.py:1090 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2020-12-17 23:45:12,003	INFO trainer.py:592 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
2020-12-17 23:45:12,003	INFO trainer.py:1064 -- `_use_trajectory_view_api` only supported for PyTorch so far! Will run w/o.
2020-12-17 23:45:12,003	INFO trainer.py:617 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=1652)[0m Instructions for updating:
[2m[36m(pid=1652)[0m non-resource variables are not supported in the long term
[2m[36m(pid=1654)[0m Instructions for updating:
[2m[36m(pid=1654)[0m non-resource variables are not supported in the long term
2020-12-17 23:45:20,708	INFO trainable.py:481 -- Restored on 10.10.1.128 from checkpoint: /tmp/ppo/model1/checkpoint_15/che

In [7]:
ray.shutdown()
t0=time.time()
!python3 rollout.py /tmp/ppo/model2/checkpoint_15/checkpoint-15 --env='Pong-v0' --run PPO --episodes 10 --out='rollout_outputs/model2.pkl' --save-info --use-shelve
t1 = time.time()-t0
print("Rollout total time: " + str(t1))

Instructions for updating:
non-resource variables are not supported in the long term
2020-12-17 23:46:48,808	INFO services.py:1090 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2020-12-17 23:46:51,425	INFO trainer.py:592 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
2020-12-17 23:46:51,426	INFO trainer.py:1064 -- `_use_trajectory_view_api` only supported for PyTorch so far! Will run w/o.
2020-12-17 23:46:51,426	INFO trainer.py:617 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=1919)[0m Instructions for updating:
[2m[36m(pid=1919)[0m non-resource variables are not supported in the long term
[2m[36m(pid=1921)[0m Instructions for updating:
[2m[36m(pid=1921)[0m non-resource variables are not supported in the long term
2020-12-17 23:47:00,418	INFO trainable.py:481 -- Restored on 10.10.1.128 from checkpoint: /tmp/ppo/model2/checkpoint_15/che

In [8]:
ray.shutdown()
t0=time.time()
!python3 rollout.py /tmp/ppo/model3/checkpoint_15/checkpoint-15 --env='Pong-v0' --run PPO --episodes 10 --out='rollout_outputs/model3.pkl' --save-info --use-shelve
t1 = time.time()-t0
print("Rollout total time: " + str(t1))

Instructions for updating:
non-resource variables are not supported in the long term
2020-12-17 23:48:16,396	INFO services.py:1090 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2020-12-17 23:48:18,988	INFO trainer.py:592 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
2020-12-17 23:48:18,988	INFO trainer.py:1064 -- `_use_trajectory_view_api` only supported for PyTorch so far! Will run w/o.
2020-12-17 23:48:18,989	INFO trainer.py:617 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=2187)[0m Instructions for updating:
[2m[36m(pid=2187)[0m non-resource variables are not supported in the long term
[2m[36m(pid=2182)[0m Instructions for updating:
[2m[36m(pid=2182)[0m non-resource variables are not supported in the long term
2020-12-17 23:48:27,336	INFO trainable.py:481 -- Restored on 10.10.1.128 from checkpoint: /tmp/ppo/model3/checkpoint_15/che

In [9]:
ray.shutdown()
t0=time.time()
!python3 rollout.py /tmp/ppo/model4/checkpoint_15/checkpoint-15 --env='Pong-v0' --run PPO --episodes 10 --out='rollout_outputs/model4.pkl' --save-info --use-shelve
t1 = time.time()-t0
print("Rollout total time: " + str(t1))

Instructions for updating:
non-resource variables are not supported in the long term
2020-12-17 23:50:56,060	INFO services.py:1090 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2020-12-17 23:50:58,644	INFO trainer.py:592 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
2020-12-17 23:50:58,644	INFO trainer.py:1064 -- `_use_trajectory_view_api` only supported for PyTorch so far! Will run w/o.
2020-12-17 23:50:58,644	INFO trainer.py:617 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=2469)[0m Instructions for updating:
[2m[36m(pid=2469)[0m non-resource variables are not supported in the long term
[2m[36m(pid=2471)[0m Instructions for updating:
[2m[36m(pid=2471)[0m non-resource variables are not supported in the long term
2020-12-17 23:51:07,323	INFO trainable.py:481 -- Restored on 10.10.1.128 from checkpoint: /tmp/ppo/model4/checkpoint_15/che

In [12]:
ray.shutdown()
t0=time.time()
!python3 rollout.py /tmp/ppo/model5/checkpoint_15/checkpoint-15 --env='Pong-v0' --run PPO --episodes 10 --out='rollout_outputs/model5.pkl' --save-info --use-shelve
t1 = time.time()-t0
print("Rollout total time: " + str(t1))

Instructions for updating:
non-resource variables are not supported in the long term
2020-12-18 00:16:23,435	INFO services.py:1090 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2020-12-18 00:16:26,404	INFO trainer.py:592 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
2020-12-18 00:16:26,405	INFO trainer.py:1064 -- `_use_trajectory_view_api` only supported for PyTorch so far! Will run w/o.
2020-12-18 00:16:26,405	INFO trainer.py:617 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=3235)[0m Instructions for updating:
[2m[36m(pid=3235)[0m non-resource variables are not supported in the long term
[2m[36m(pid=3239)[0m Instructions for updating:
[2m[36m(pid=3239)[0m non-resource variables are not supported in the long term
2020-12-18 00:16:36,313	INFO trainable.py:481 -- Restored on 10.10.1.128 from checkpoint: /tmp/ppo/model5/checkpoint_15/che

In [11]:
ray.shutdown()
t0=time.time()
!python3 rollout.py /tmp/ppo/model6/checkpoint_16/checkpoint-16 --env='Pong-v0' --run PPO --episodes 10 --out='rollout_outputs/model6.pkl' --save-info --use-shelve
t1 = time.time()-t0
print("Rollout total time: " + str(t1))

Instructions for updating:
non-resource variables are not supported in the long term
2020-12-17 23:55:15,033	INFO services.py:1090 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2020-12-17 23:55:17,657	INFO trainer.py:592 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
2020-12-17 23:55:17,657	INFO trainer.py:1064 -- `_use_trajectory_view_api` only supported for PyTorch so far! Will run w/o.
2020-12-17 23:55:17,657	INFO trainer.py:617 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=2966)[0m Instructions for updating:
[2m[36m(pid=2966)[0m non-resource variables are not supported in the long term
[2m[36m(pid=2974)[0m Instructions for updating:
[2m[36m(pid=2974)[0m non-resource variables are not supported in the long term
2020-12-17 23:55:28,315	INFO trainable.py:481 -- Restored on 10.10.1.128 from checkpoint: /tmp/ppo/model6/checkpoint_16/che

In [15]:
ray.shutdown()
t0=time.time()
!python3 rollout.py /tmp/ppo/model3/checkpoint_15/checkpoint-15 --env='Pong-v0' --run PPO --episodes 10 --video-dir '/mnt/c/Users/javig/Desktop/DG Mat-Inf/5 Quinto/TFG Informatica/videos_pong_model3'
t1 = time.time()-t0
print("Rollout total time: " + str(t1))

Instructions for updating:
non-resource variables are not supported in the long term
2020-12-18 00:37:15,000	INFO services.py:1090 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2020-12-18 00:37:17,979	INFO trainer.py:592 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
2020-12-18 00:37:17,980	INFO trainer.py:1064 -- `_use_trajectory_view_api` only supported for PyTorch so far! Will run w/o.
2020-12-18 00:37:17,980	INFO trainer.py:617 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=5332)[0m Instructions for updating:
[2m[36m(pid=5332)[0m non-resource variables are not supported in the long term
[2m[36m(pid=5334)[0m Instructions for updating:
[2m[36m(pid=5334)[0m non-resource variables are not supported in the long term
2020-12-18 00:37:26,910	INFO trainable.py:481 -- Restored on 10.10.1.128 from checkpoint: /tmp/ppo/model3/checkpoint_15/che