In [22]:
import torch
import numpy as np
from tqdm import tqdm
import sys
sys.path.append('../../')

from VizDoom.VizDoom_src.utils import z_normalize, inverse_z_normalize
from VizDoom.VizDoom_src.utils import env_vizdoom2
from TMaze_new.TMaze_new_src.utils import set_seed

env_args = {
    'simulator':'doom', 
    'scenario':'custom_scenario{:003}.cfg', #custom_scenario{:003}.cfg
    'test_scenario':'', 
    'screen_size':'320X180', 
    'screen_height':64, 
    'screen_width':112, 
    'num_environments':16,# 16
    'limit_actions':True, 
    'scenario_dir':'../../VizDoom/VizDoom_src/env/', 
    'test_scenario_dir':'', 
    'show_window':False, 
    'resize':True, 
    'multimaze':True, 
    'num_mazes_train':16, 
    'num_mazes_test':1, # 64 
    'disable_head_bob':False, 
    'use_shaping':False, 
    'fixed_scenario':False, 
    'use_pipes':False, 
    'num_actions':0, 
    'hidden_size':128, 
    'reload_model':'', 
    'model_checkpoint':'../3dcdrl/saved_models/two_col_p1_checkpoint_0198658048.pth.tar',
    'conv1_size':16, 
    'conv2_size':32, 
    'conv3_size':16, 
    'learning_rate':0.0007, 
    'momentum':0.0, 
    'gamma':0.99, 
    'frame_skip':4, 
    'train_freq':4, 
    'train_report_freq':100, 
    'max_iters':5000000, 
    'eval_freq':1000, 
    'eval_games':50, 
    'model_save_rate':1000, 
    'eps':1e-05, 
    'alpha':0.99, 
    'use_gae':False, 
    'tau':0.95, 
    'entropy_coef':0.001, 
    'value_loss_coef':0.5, 
    'max_grad_norm':0.5, 
    'num_steps':128, 
    'num_stack':1, 
    'num_frames':200000000, 
    'use_em_loss':False, 
    'skip_eval':False, 
    'stoc_evals':False, 
    'model_dir':'', 
    'out_dir':'./', 
    'log_interval':100, 
    'job_id':12345, 
    'test_name':'test_000', 
    'use_visdom':False, 
    'visdom_port':8097, 
    'visdom_ip':'http://10.0.0.1'                 
}

In [23]:
def get_returns_VizDoom(seed, episode_timeout):
    
    set_seed(seed)
    
    max_ep_len = episode_timeout#* 3

        
    scene = 0
    scenario = env_args['scenario_dir'] + env_args['scenario'].format(scene)
    config_env = scenario

    env = env_vizdoom2.DoomEnvironmentDisappear(
        scenario=config_env,
        show_window=False,
        use_info=True,
        use_shaping=False, #if False bonus reward if #shaping reward is always: +1,-1 in two_towers
        frame_skip=2,
        no_backward_movement=True,
        seed=seed)
    
    state0 = env.reset()
    state = torch.tensor(state0['image']).float()
    state = state.reshape(1, 1, state.shape[0], state.shape[1], state.shape[2])

    
    out_states = []
    out_states.append(state.cpu().numpy())
    done = False
    
    rews = []
    episode_return, episode_length = 0, 0
    
    for t in range(max_ep_len):
        act = np.random.randint(low=0, high=4+1)   
        
        state, reward, done, info = env.step(act)
        state = np.float32(state['image'])
        state = state.reshape(1, 1, state.shape[0], state.shape[1], state.shape[2])
        
        
        rews.append(reward)
        episode_return += reward
        episode_length += 1
        
        if done:
            torch.cuda.empty_cache()
            break  
    
    env.close()
    return episode_return, (t+1)*2

# * ##############################################

In [24]:
reds = [2, 3, 6, 8, 9, 10, 11, 14, 15, 16, 17, 18, 20, 21, 25, 26, 27, 28, 29, 31, 38, 40, 41, 42, 45,
        46, 49, 50, 51, 52, 53, 54, 55, 58, 59, 60, 61, 63, 64, 67, 68, 70, 72, 73, 74, 77, 80, 82, 84, 
        86, 88, 89, 90, 91, 92, 97, 98, 99, 100, 101, 103, 106, 108, 109, 113, 115, 116, 117, 120, 
        123, 124, 125, 126, 127, 128, 129, 133, 134, 136, 139, 140, 142, 144, 145, 147, 148, 151, 152, 
        153, 154, 156, 157, 158, 159, 161, 164, 165, 170, 171, 173]

greens = [0, 1, 4, 5, 7, 12, 13, 19, 22, 23, 24, 30, 32, 33, 34, 35, 36, 37, 39, 43, 44, 47, 48, 56, 57,
          62, 65, 66, 69, 71, 75, 76, 78, 79, 81, 83, 85, 87, 93, 94, 95, 96, 102, 104, 105, 107, 110, 111, 
          112, 114, 118, 119, 121, 122, 130, 131, 132, 135, 137, 138, 141, 143, 146, 149, 150, 155, 160, 162, 
          163, 166, 167, 168, 169, 172, 175, 176, 177, 182, 183, 187, 190, 192, 193, 195, 199, 204, 206, 208, 
          209, 210, 212, 215, 216, 218, 219, 220, 221, 223, 224, 225]

In [25]:
episode_timeout = 4200
SEED = 1

reds_returns = []
reds_lengths = []
for SEED in tqdm(reds):
    episode_return, episode_length = get_returns_VizDoom(seed=SEED, episode_timeout=episode_timeout)
    reds_returns.append(episode_return)
    reds_lengths.append(episode_length)

greens_returns = []
greens_lengths = []
for SEED in tqdm(greens):
    episode_return, episode_length = get_returns_VizDoom(seed=SEED, episode_timeout=episode_timeout)
    greens_returns.append(episode_return)
    greens_lengths.append(episode_length)

100%|██████████| 100/100 [00:58<00:00,  1.72it/s]
100%|██████████| 100/100 [00:58<00:00,  1.70it/s]


In [29]:
print("Total returns:", np.mean(reds_returns + greens_returns))
print("Total lengths:", np.mean(reds_lengths + greens_lengths))

print("-"*100)

print("Red returns:", np.mean(reds_returns))
print("Red lengths:", np.mean(reds_lengths))

print("-"*100)

print("Green returns:", np.mean(greens_returns))
print("Green lengths:", np.mean(greens_lengths))

Total returns: 4.817399999999981
Total lengths: 404.75
----------------------------------------------------------------------------------------------------
Red returns: 4.657599999999982
Red lengths: 395.78
----------------------------------------------------------------------------------------------------
Green returns: 4.977199999999977
Green lengths: 413.72
