In [3]:
!pip install datasets
!pip install trl

Collecting trl
  Using cached trl-0.9.4-py3-none-any.whl (226 kB)
Installing collected packages: trl
Successfully installed trl-0.9.4


In [5]:
!pip install sentencepiece

Collecting sentencepiece
  Downloading sentencepiece-0.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[K     |████████████████████████████████| 1.3 MB 5.1 MB/s eta 0:00:01
[?25hInstalling collected packages: sentencepiece
Successfully installed sentencepiece-0.2.0


In [6]:
!pip install peft

Collecting peft
  Downloading peft-0.11.1-py3-none-any.whl (251 kB)
[K     |████████████████████████████████| 251 kB 5.1 MB/s eta 0:00:01
[?25hCollecting accelerate>=0.21.0
  Using cached accelerate-0.31.0-py3-none-any.whl (309 kB)
Collecting torch>=1.13.0
  Using cached torch-2.3.1-cp38-cp38-manylinux1_x86_64.whl (779.1 MB)
Collecting nvidia-cufft-cu12==11.0.2.54; platform_system == "Linux" and platform_machine == "x86_64"
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)
Collecting triton==2.3.1; platform_system == "Linux" and platform_machine == "x86_64" and python_version < "3.12"
  Using cached triton-2.3.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (168.0 MB)
Collecting nvidia-nccl-cu12==2.20.5; platform_system == "Linux" and platform_machine == "x86_64"
  Using cached nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl (176.2 MB)
Installing collected packages: nvidia-cufft-cu12, triton, nvidia-nccl-cu12, torch, accelerate,

In [7]:
!pip install tensorboardX

Collecting tensorboardX
  Downloading tensorboardX-2.6.2.2-py2.py3-none-any.whl (101 kB)
[K     |████████████████████████████████| 101 kB 2.6 MB/s ta 0:00:011
Collecting protobuf>=3.20
  Downloading protobuf-5.27.2-cp38-abi3-manylinux2014_x86_64.whl (309 kB)
[K     |████████████████████████████████| 309 kB 40.8 MB/s eta 0:00:01
Installing collected packages: protobuf, tensorboardX
Successfully installed protobuf-5.27.2 tensorboardX-2.6.2.2


In [1]:
from trl import PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead, AutoModelForSeq2SeqLMWithValueHead
from datasets import load_dataset
from transformers import AutoTokenizer
import pandas as pd

### Configuration

In [2]:
ppo_config = PPOConfig(
    model_name="SFT_Model_T5-Small",
    learning_rate=1.41e-5,
    # log_with="wandb",
)

In [3]:
sent_kwargs = {"return_all_scores": True, "function_to_apply": "none", "batch_size": 1}

### Load Dataset

In [4]:
ppo_dataset = load_dataset("gen_sft_dataset.py", trust_remote_code=True) # gen_ppo_dataset
ppo_dataset

DatasetDict({
    train: Dataset({
        features: ['weibo', 'resp'],
        num_rows: 25140
    })
    validation: Dataset({
        features: ['weibo', 'resp'],
        num_rows: 8670
    })
})

In [5]:
ppo_dataset["train"]["resp"][1]

'不用呼吸机光速下班'

### Load Pre-trained SFT Model

In [7]:
# model = AutoModelForCausalLMWithValueHead.from_pretrained("./SFT_Model_T5-Small")
# ref_model = AutoModelForCausalLMWithValueHead.from_pretrained("./SFT_Model_T5-Small")
# tokenizer = AutoTokenizer.from_pretrained("./SFT_Model_T5-Small")

model = AutoModelForSeq2SeqLMWithValueHead.from_pretrained("./SFT_Model_T5-Small/checkpoint-15720")
ref_model = AutoModelForSeq2SeqLMWithValueHead.from_pretrained("./SFT_Model_T5-Small/checkpoint-15720")
tokenizer = AutoTokenizer.from_pretrained("./SFT_Model_T5-Small/checkpoint-15720")

tokenizer.pad_token = tokenizer.eos_token

# from transformers import AutoModelForSeq2SeqLM, PreTrainedModel

# base_model = PreTrainedModel.from_pretrained("./SFT_Model_ChatGLM/checkpoint-4000", trust_remote_code = True)
# # base_model = PreTrainedModelWrapper.from_pretrained(base_model)
# model = AutoModelForSeq2SeqLMWithValueHead.from_pretrained(base_model)
# ref_model = AutoModelForSeq2SeqLMWithValueHead.from_pretrained(base_model)
# tokenizer = AutoTokenizer.from_pretrained("./SFT_Model_ChatGLM/checkpoint-4000")

# tokenizer.pad_token = tokenizer.eos_token

In [8]:
def tokenize(sample):
    # sample["input_ids"] = tokenizer.encode(sample["weibo"])
    sample = tokenizer(sample["weibo"])
    sample["query"] = tokenizer.decode(sample["input_ids"])
    return sample

ppo_train_dataset = ppo_dataset["train"]
ppo_train_dataset = ppo_train_dataset.map(tokenize) # Bug Fixed (Not In-place)
ppo_train_dataset.set_format(type="torch") # Important! Turn into tensors 

In [9]:
ppo_train_dataset

Dataset({
    features: ['weibo', 'resp', 'input_ids', 'attention_mask', 'query'],
    num_rows: 25140
})

In [10]:
print(ppo_train_dataset[0]['input_ids'])

tensor([ 1713,   518,  9697,     2,  4663,     3,     2,   536, 13572,     2,
        22773,     2,  2596,  3486,     6,  2596,  4278,     6,  2596,  6832,
            1])


### Initialize PPOTrainer 

In [11]:
# from transformers import DataCollatorForSeq2Seq

# data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

def collator(data):
    return dict((key, [d[key] for d in data]) for key in data[0])

In [12]:
ppo_trainer = PPOTrainer(ppo_config, model, ref_model, tokenizer, dataset=ppo_train_dataset, data_collator=collator)

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


### Load Tuned Reward Model Trained in RM Phase

In [13]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline

# rm_model = AutoModelForSequenceClassification.from_pretrained("./RM_model/checkpoint-1560")
# rm_tokenizer = AutoTokenizer.from_pretrained("./RM_model/checkpoint-1560")

rm_model = AutoModelForSequenceClassification.from_pretrained("./RM_model7/checkpoint-11700")
rm_tokenizer = AutoTokenizer.from_pretrained("./RM_model7/checkpoint-11700")
rm_tokenizer.pad_token = tokenizer.eos_token
# GPT
rm_tokenizer.padding_side = "right"  # This ensures consistent padding
rm_model.config.pad_token_id = rm_tokenizer.pad_token_id

sentiment_pipe = pipeline("sentiment-analysis", model=rm_model, tokenizer=rm_tokenizer, padding=True, truncation=True, max_length=32)

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


### Training Loop for Model Optimization

In [15]:
from tqdm import tqdm

In [16]:
ppo_trainer.dataloader

<accelerate.data_loader.DataLoaderShard at 0x7fa1b2447f10>

In [17]:
print(ppo_train_dataset[:5])

{'weibo': ['#WTT冠军赛布达佩斯站#\xa0男单1/4决赛林高远3-0宇田幸矢11-3，11-4，11-7', '#WTT冠军赛布达佩斯站#\xa0男单1/4决赛林高远3-0宇田幸矢11-3，11-4，11-7', '#WTT冠军赛布达佩斯站#\xa0男单1/4决赛林高远3-0宇田幸矢11-3，11-4，11-7', '#WTT冠军赛布达佩斯站#\xa0男单1/4决赛林高远3-0宇田幸矢11-3，11-4，11-7', '#WTT冠军赛布达佩斯站#\xa0男单1/4决赛林高远3-0宇田幸矢11-3，11-4，11-7'], 'resp': ['别把我帅死林高远一直这么坚定下去吧！！！！别有太大压力，战胜自己就够了！！！！我永远相信小林将军', '不用呼吸机光速下班', '“人家肯定拼你的”，“对，所以说这个是被拼的时候，自己要扛住压力这个事。”今天发球落点速度节奏变化都好棒，让大家欣赏一下速度流明天的半决赛将迎战本站第一个右手，放平心态，你谁都不怕！', '“坚定都溢出来了”“怎么可以防的如此自如啊”“非常自信/冷静啊”…没有不可治愈的伤痛，没有不可结束的沉沦。所有失去的，会以另一种方式归来。林高远，好样的，这场打的真漂亮，继续加油，愈战愈勇！', '“全方位的碾压让对方看不到一丝希望！”林高远牛逼！是意气风发的小林将军林高远继续加油！！！！'], 'input_ids': tensor([[ 1713,   518,  9697,     2,  4663,     3,     2,   536, 13572,     2,
         22773,     2,  2596,  3486,     6,  2596,  4278,     6,  2596,  6832,
             1],
        [ 1713,   518,  9697,     2,  4663,     3,     2,   536, 13572,     2,
         22773,     2,  2596,  3486,     6,  2596,  4278,     6,  2596,  6832,
             1],
        [ 1713,   518,  9697,     2,  4663,   

In [18]:
import torch

In [19]:
# from trl.trainer import PPOTrainer

# class DebugPPOTrainer(PPOTrainer):
#     def compute_rewards(self, scores, logprobs, ref_logprobs, masks):
#         print(f"Mask: {masks}")  # Print the mask tensor
#         return super().compute_rewards(scores, logprobs, ref_logprobs, masks)

In [20]:
# Generation Settings
generation_kwargs = {
    "min_length": -1,
    "top_k": 0.0,
    "top_p": 0.7,
    "temperature": 0.95, # This one is set implicitly in SFTTrainer as well.
    "do_sample": True,
    "pad_token_id": tokenizer.eos_token_id,
}


for epoch, batch in tqdm(enumerate(ppo_trainer.dataloader)): # Need the specific type of data_collator
    print(f"\nStart Epoch {epoch}")
    # print(batch)
    query_tensors = batch["input_ids"]
    attention_masks = batch["attention_mask"]
    # attention_masks = [query_tensors != tokenizer.pad_token_id]
    # print(attention_masks)

    #### Get response from gpt2
    response_tensors = []
    for i, query in enumerate(query_tensors):
        # gen_len = output_length_sampler()
        generation_kwargs["max_new_tokens"] = 32
        # print(attention_masks[i])
        response = ppo_trainer.generate(query, **generation_kwargs, attention_mask=attention_masks[i].unsqueeze(0))
        response_tensors.append(response.squeeze()[-32:])
    batch["response"] = [tokenizer.decode(r.squeeze()) for r in response_tensors]

    #### Compute sentiment score
    texts = [q + r for q, r in zip(batch["query"], batch["response"])]
    pipe_outputs = sentiment_pipe(texts, **sent_kwargs)
    rewards = [torch.tensor(output[1]["score"]) for output in pipe_outputs]
    print(f"Rewards: {rewards}")

    #### Run PPO step
    stats = ppo_trainer.step(query_tensors, response_tensors, rewards)
    ppo_trainer.log_stats(stats, batch, rewards)

0it [00:00, ?it/s]


Start Epoch 0




Rewards: [tensor(-0.3165), tensor(-0.0251), tensor(-1.0098), tensor(-0.1059), tensor(-0.9854), tensor(-0.6910), tensor(-0.6012), tensor(-0.4122), tensor(-0.8500), tensor(-0.5225), tensor(-0.8987), tensor(-1.0042), tensor(-0.6118), tensor(-0.3549), tensor(-0.2822), tensor(-0.7444), tensor(-1.4790), tensor(-1.2325), tensor(-0.6816), tensor(-0.1488), tensor(0.3995), tensor(-1.4229), tensor(-0.2256), tensor(0.1444), tensor(-0.3305), tensor(-0.3921), tensor(-0.4582), tensor(-0.5637), tensor(-0.9589), tensor(-0.1358), tensor(-1.1236), tensor(-0.9890), tensor(-0.4730), tensor(0.6373), tensor(-1.1058), tensor(-0.9395), tensor(-0.0384), tensor(0.1405), tensor(-0.6538), tensor(-1.0177), tensor(-0.3271), tensor(-0.9890), tensor(-0.6431), tensor(-0.3343), tensor(-0.2904), tensor(-1.2058), tensor(0.0029), tensor(-1.1483), tensor(-0.2259), tensor(0.2283), tensor(-0.9854), tensor(0.0175), tensor(-0.5428), tensor(0.1127), tensor(-1.1798), tensor(-0.0141), tensor(-1.2325), tensor(-0.8022), tensor(-1.03

1it [00:46, 46.10s/it]


Start Epoch 1
Rewards: [tensor(-0.6130), tensor(-0.8490), tensor(-1.0044), tensor(-0.5277), tensor(-0.5165), tensor(-0.9915), tensor(-1.0030), tensor(-0.2005), tensor(-1.2325), tensor(-0.1882), tensor(-0.7852), tensor(0.2021), tensor(-1.3268), tensor(0.0581), tensor(-0.6325), tensor(-0.2126), tensor(-1.0612), tensor(0.0543), tensor(-0.6451), tensor(-0.0117), tensor(-0.0355), tensor(-0.0835), tensor(-0.2126), tensor(-0.5469), tensor(-0.2291), tensor(-0.4185), tensor(0.5631), tensor(-1.2325), tensor(-0.1249), tensor(-0.5305), tensor(0.0432), tensor(-0.2311), tensor(0.1127), tensor(-1.3145), tensor(-0.0920), tensor(-0.2855), tensor(-1.2644), tensor(-0.2368), tensor(-1.1339), tensor(-0.8307), tensor(-0.0927), tensor(-1.4021), tensor(-1.0185), tensor(-0.1013), tensor(-0.0243), tensor(-0.6792), tensor(-0.2346), tensor(-0.1778), tensor(-0.9089), tensor(-0.7761), tensor(-0.7942), tensor(-0.6385), tensor(-0.0499), tensor(0.3271), tensor(0.3457), tensor(-0.3332), tensor(-1.1825), tensor(-0.3306

2it [01:36, 48.80s/it]


Start Epoch 2
Rewards: [tensor(-0.6429), tensor(-1.1493), tensor(-0.0258), tensor(-0.9547), tensor(-0.3917), tensor(-1.0955), tensor(-0.4016), tensor(-0.5985), tensor(-1.2470), tensor(-1.2146), tensor(-0.3320), tensor(-0.9650), tensor(-0.3971), tensor(1.2242), tensor(-0.2541), tensor(-0.9666), tensor(-0.6593), tensor(-0.7716), tensor(-0.1936), tensor(-1.2402), tensor(-0.0418), tensor(-0.2149), tensor(0.0523), tensor(-0.9583), tensor(-0.1534), tensor(-0.5244), tensor(-0.6603), tensor(-0.0943), tensor(-1.1666), tensor(-0.8113), tensor(-1.1600), tensor(-1.2787), tensor(-0.8386), tensor(-0.7414), tensor(-0.9111), tensor(-0.3070), tensor(-0.3192), tensor(-0.7908), tensor(-0.6233), tensor(-1.1388), tensor(0.5019), tensor(-0.7153), tensor(-0.0371), tensor(-1.3349), tensor(-0.9186), tensor(-1.2070), tensor(-1.0390), tensor(-0.3652), tensor(-0.9059), tensor(-0.4891), tensor(0.2074), tensor(-0.3578), tensor(0.3912), tensor(-0.3576), tensor(-0.9291), tensor(-1.3120), tensor(-0.0243), tensor(-1.0

3it [02:23, 47.77s/it]


Start Epoch 3
Rewards: [tensor(-1.3268), tensor(-0.4140), tensor(-0.6737), tensor(0.0132), tensor(-1.2315), tensor(-0.1784), tensor(-0.7028), tensor(-1.2325), tensor(-0.1229), tensor(-0.4285), tensor(-1.3306), tensor(-1.1106), tensor(-0.5547), tensor(0.0661), tensor(-0.3660), tensor(-0.0574), tensor(-0.1460), tensor(-0.8296), tensor(-0.5118), tensor(-0.9382), tensor(-0.0231), tensor(-0.0750), tensor(0.2409), tensor(-1.2325), tensor(-0.8676), tensor(-0.4138), tensor(-0.1657), tensor(-0.9791), tensor(-0.9791), tensor(-0.0821), tensor(-0.6172), tensor(-0.4076), tensor(-0.5556), tensor(-1.2142), tensor(-0.3438), tensor(0.1387), tensor(-0.6489), tensor(-1.2146), tensor(-0.6464), tensor(-0.1507), tensor(-1.1829), tensor(-0.1483), tensor(-0.2931), tensor(-0.2021), tensor(-1.0124), tensor(-0.3943), tensor(-0.5428), tensor(-0.7562), tensor(0.4885), tensor(0.2303), tensor(-0.8715), tensor(-1.1183), tensor(-0.7953), tensor(-0.7431), tensor(0.7138), tensor(-0.8122), tensor(-0.4083), tensor(-1.131

4it [03:09, 47.05s/it]


Start Epoch 4
Rewards: [tensor(-0.8921), tensor(-0.8725), tensor(-0.6058), tensor(-0.8502), tensor(-0.3605), tensor(-1.1931), tensor(-0.5953), tensor(0.2520), tensor(0.2603), tensor(-0.8278), tensor(-0.6240), tensor(-0.8820), tensor(0.0425), tensor(-0.4188), tensor(-0.3332), tensor(-0.7956), tensor(-1.2053), tensor(-0.5652), tensor(-0.9890), tensor(-0.8766), tensor(-0.9628), tensor(-1.1515), tensor(-1.0984), tensor(-0.2776), tensor(-0.4101), tensor(-0.1633), tensor(0.1405), tensor(0.4166), tensor(-0.1601), tensor(-1.2087), tensor(-0.0901), tensor(-0.5445), tensor(-1.1417), tensor(-1.2303), tensor(-0.6429), tensor(-0.6081), tensor(-0.5368), tensor(0.3074), tensor(-0.4830), tensor(0.2282), tensor(-1.4462), tensor(-0.4511), tensor(-0.5549), tensor(-0.3943), tensor(-0.7716), tensor(0.1763), tensor(-0.2902), tensor(0.0540), tensor(-1.2142), tensor(0.2522), tensor(-0.7862), tensor(0.2474), tensor(-0.5713), tensor(-0.6385), tensor(-1.2146), tensor(-0.9821), tensor(-0.1827), tensor(-0.6809), 

5it [04:00, 48.71s/it]


Start Epoch 5
Rewards: [tensor(-0.1715), tensor(-0.9818), tensor(-1.0884), tensor(-1.3970), tensor(-0.5839), tensor(-0.3380), tensor(-0.7862), tensor(-0.7629), tensor(-0.9154), tensor(-0.1932), tensor(-0.7344), tensor(-0.3075), tensor(-0.4155), tensor(-0.8296), tensor(-0.4476), tensor(-0.8917), tensor(-1.0536), tensor(-0.5726), tensor(0.0407), tensor(0.1909), tensor(-0.8356), tensor(-0.6464), tensor(-0.5428), tensor(-0.6048), tensor(-0.3654), tensor(-1.0164), tensor(-0.2425), tensor(-1.0340), tensor(-0.8386), tensor(-1.2866), tensor(-0.2205), tensor(0.1077), tensor(-0.1507), tensor(-0.1990), tensor(-1.1915), tensor(-0.2929), tensor(-1.1741), tensor(-0.2686), tensor(-0.0499), tensor(-1.2367), tensor(0.3102), tensor(-1.2259), tensor(-1.1741), tensor(-0.6952), tensor(-1.2203), tensor(0.0861), tensor(-0.9759), tensor(-0.5138), tensor(-0.8201), tensor(-0.8718), tensor(-0.6601), tensor(-1.3268), tensor(-0.5272), tensor(-0.1311), tensor(-0.5413), tensor(-0.0220), tensor(-0.8220), tensor(-1.0

6it [04:50, 48.86s/it]


Start Epoch 6
Rewards: [tensor(-0.3603), tensor(-1.1949), tensor(-0.2911), tensor(-0.8601), tensor(-0.3141), tensor(-0.0220), tensor(-0.3018), tensor(-1.0990), tensor(-0.1599), tensor(-0.4511), tensor(-1.2644), tensor(-1.0029), tensor(-1.1950), tensor(-1.2612), tensor(-0.6587), tensor(-1.3929), tensor(0.0362), tensor(-0.7052), tensor(-0.9716), tensor(-1.0939), tensor(-0.2368), tensor(-1.2784), tensor(-0.1109), tensor(-1.0721), tensor(0.1293), tensor(-0.1937), tensor(-0.5839), tensor(-0.4579), tensor(-1.1355), tensor(-0.6747), tensor(-0.5584), tensor(-0.6366), tensor(-0.8412), tensor(0.1674), tensor(-0.1663), tensor(-0.6426), tensor(-0.0247), tensor(-1.2142), tensor(-0.2566), tensor(-0.7445), tensor(0.0268), tensor(-1.1864), tensor(-1.1945), tensor(-1.0567), tensor(-0.3289), tensor(0.3045), tensor(-1.2365), tensor(-0.2990), tensor(-0.5823), tensor(-0.5877), tensor(-0.2390), tensor(-1.2196), tensor(-1.3268), tensor(-0.7854), tensor(-0.4629), tensor(-0.9863), tensor(-0.2291), tensor(-0.2

7it [05:33, 47.14s/it]


Start Epoch 7
Rewards: [tensor(-0.8831), tensor(-0.0815), tensor(-0.0499), tensor(-0.6409), tensor(-0.6317), tensor(-1.1917), tensor(-1.2601), tensor(-1.0276), tensor(-0.3558), tensor(-1.0674), tensor(-0.2777), tensor(-0.3438), tensor(-0.5428), tensor(-0.3708), tensor(-0.6863), tensor(-0.8296), tensor(-1.2144), tensor(-0.6863), tensor(-0.0889), tensor(-0.8141), tensor(-0.2901), tensor(-0.9643), tensor(-0.8992), tensor(-0.8339), tensor(-0.2615), tensor(-1.0016), tensor(-0.2221), tensor(0.2803), tensor(-0.5841), tensor(-0.5411), tensor(-0.4541), tensor(-1.2325), tensor(-0.1836), tensor(-0.8535), tensor(-0.9077), tensor(-1.0823), tensor(-0.5917), tensor(-0.4596), tensor(-1.2046), tensor(-0.7660), tensor(-1.1950), tensor(-0.3364), tensor(-1.1598), tensor(-1.1171), tensor(-1.2787), tensor(-1.2784), tensor(-1.0565), tensor(-0.5411), tensor(-1.2705), tensor(-0.4621), tensor(-0.7426), tensor(-0.9283), tensor(-0.9604), tensor(-1.1950), tensor(-1.1829), tensor(-0.4069), tensor(-0.5615), tensor(

8it [06:18, 46.41s/it]


Start Epoch 8
Rewards: [tensor(-0.5368), tensor(-0.1827), tensor(-0.8579), tensor(-0.4915), tensor(0.0877), tensor(-0.1847), tensor(-0.4720), tensor(-0.7295), tensor(-0.8894), tensor(-0.8309), tensor(-0.3936), tensor(-0.0943), tensor(-1.1278), tensor(-0.2858), tensor(0.2409), tensor(-0.8525), tensor(-0.1244), tensor(-1.2055), tensor(-1.1414), tensor(-0.3450), tensor(-0.9301), tensor(-0.5266), tensor(-0.9383), tensor(0.0581), tensor(0.0260), tensor(-0.3135), tensor(-0.3660), tensor(-0.9854), tensor(-0.6587), tensor(-0.6617), tensor(-0.9771), tensor(-0.0406), tensor(-0.7640), tensor(-1.3268), tensor(0.5031), tensor(-0.7279), tensor(-0.8093), tensor(-1.2142), tensor(0.0581), tensor(-0.6385), tensor(-0.6593), tensor(-0.3997), tensor(-1.0739), tensor(-0.1601), tensor(-1.0244), tensor(-0.3076), tensor(-1.0099), tensor(-1.1237), tensor(-0.1657), tensor(-0.7819), tensor(-1.0128), tensor(-0.4854), tensor(-0.0220), tensor(0.2496), tensor(-0.7009), tensor(-0.4501), tensor(-1.1769), tensor(-0.434

9it [07:11, 48.48s/it]


Start Epoch 9
Rewards: [tensor(-0.6617), tensor(-0.5221), tensor(-0.5462), tensor(-0.0429), tensor(-0.9152), tensor(-1.1551), tensor(-0.2071), tensor(-0.2686), tensor(-1.0975), tensor(-0.8975), tensor(-0.2251), tensor(-0.7111), tensor(-0.5221), tensor(-0.9628), tensor(-0.8753), tensor(0.1293), tensor(-0.6256), tensor(-0.6163), tensor(-1.0911), tensor(-0.2383), tensor(-1.2325), tensor(-1.0520), tensor(-0.4729), tensor(-0.9007), tensor(-1.2055), tensor(-0.3379), tensor(-1.0395), tensor(-0.8319), tensor(-0.3014), tensor(-0.5938), tensor(-0.2155), tensor(-1.2601), tensor(-0.1133), tensor(0.1070), tensor(-0.2646), tensor(-0.4376), tensor(-0.9651), tensor(-0.7426), tensor(-1.2165), tensor(-0.2167), tensor(0.2662), tensor(-0.8525), tensor(-0.5710), tensor(-1.1694), tensor(-0.3181), tensor(-1.2846), tensor(-0.2800), tensor(-0.5111), tensor(-0.9293), tensor(-0.4541), tensor(-0.9257), tensor(-0.2356), tensor(-0.8376), tensor(-0.4830), tensor(-0.2798), tensor(-0.5413), tensor(-0.5560), tensor(-0

10it [07:57, 47.78s/it]


Start Epoch 10
Rewards: [tensor(-0.5305), tensor(-0.0751), tensor(-1.0099), tensor(-1.1632), tensor(-0.0973), tensor(-0.2291), tensor(-0.5533), tensor(0.0432), tensor(-0.1177), tensor(-0.3818), tensor(-0.5886), tensor(-0.9154), tensor(-0.7840), tensor(0.1001), tensor(-0.5126), tensor(-0.8588), tensor(-0.2566), tensor(-0.8818), tensor(0.0857), tensor(-0.1530), tensor(-0.2082), tensor(-0.3018), tensor(-0.2794), tensor(-0.1285), tensor(-0.0824), tensor(-0.7430), tensor(-0.2884), tensor(-1.3709), tensor(-0.4294), tensor(-0.2409), tensor(-0.6728), tensor(-1.0984), tensor(-1.3571), tensor(-0.5241), tensor(-0.5241), tensor(-0.8312), tensor(0.0242), tensor(-0.6872), tensor(-0.7111), tensor(-0.1208), tensor(-0.3232), tensor(-0.7414), tensor(-1.2398), tensor(-0.5161), tensor(0.1902), tensor(-0.5044), tensor(-0.4579), tensor(-1.0564), tensor(0.2021), tensor(-1.3268), tensor(-0.9026), tensor(-0.8705), tensor(-0.0632), tensor(-0.7928), tensor(-1.1047), tensor(-0.5737), tensor(-1.2912), tensor(-1.2

11it [08:44, 47.43s/it]


Start Epoch 11
Rewards: [tensor(-0.5524), tensor(-0.2149), tensor(-0.3345), tensor(0.0540), tensor(0.3971), tensor(-1.1352), tensor(-0.8890), tensor(-0.5266), tensor(-1.1882), tensor(-0.7862), tensor(-0.8356), tensor(-0.9771), tensor(0.0123), tensor(-0.9730), tensor(-1.0164), tensor(-0.1960), tensor(-1.2164), tensor(-0.2368), tensor(0.3045), tensor(-0.7414), tensor(-1.1389), tensor(-0.6325), tensor(-0.9890), tensor(-0.7501), tensor(-0.5514), tensor(-0.1059), tensor(-0.5050), tensor(-0.1456), tensor(-0.2886), tensor(-0.8363), tensor(-0.4596), tensor(0.0364), tensor(-0.4527), tensor(-1.0401), tensor(-0.0889), tensor(0.3604), tensor(-0.2883), tensor(-0.6385), tensor(-0.4730), tensor(-1.2784), tensor(-0.9360), tensor(-0.3837), tensor(-1.1769), tensor(-0.7282), tensor(-0.1893), tensor(0.0362), tensor(-0.2786), tensor(-0.7885), tensor(-0.0853), tensor(-0.9344), tensor(0.3402), tensor(-0.5562), tensor(-0.5744), tensor(-0.7723), tensor(-0.3070), tensor(0.0485), tensor(0.1708), tensor(-0.9550)

12it [09:31, 47.30s/it]


Start Epoch 12
Rewards: [tensor(-0.9390), tensor(-0.7669), tensor(0.1995), tensor(-0.1936), tensor(-0.4541), tensor(-1.0958), tensor(-0.9186), tensor(-0.6659), tensor(-0.9895), tensor(-0.2800), tensor(-1.0730), tensor(0.2187), tensor(-0.0854), tensor(-1.2504), tensor(-0.2860), tensor(-0.2537), tensor(-0.2279), tensor(0.2662), tensor(-0.9761), tensor(-1.3968), tensor(0.3636), tensor(-0.4274), tensor(-0.6520), tensor(-0.9333), tensor(-1.2849), tensor(-0.4244), tensor(-0.7912), tensor(-0.1625), tensor(-0.1626), tensor(-0.0730), tensor(-1.1722), tensor(-0.1657), tensor(-0.4914), tensor(-1.0557), tensor(-0.8609), tensor(0.0221), tensor(-1.1309), tensor(-0.1530), tensor(-0.7362), tensor(-1.2325), tensor(-0.3549), tensor(-0.6160), tensor(-0.4954), tensor(-0.3723), tensor(-1.4031), tensor(-0.7897), tensor(-0.3201), tensor(-0.2285), tensor(-0.5867), tensor(-1.4515), tensor(-1.1143), tensor(0.0971), tensor(0.0174), tensor(-0.6385), tensor(-0.7047), tensor(-0.1299), tensor(0.2520), tensor(-0.829

13it [10:24, 49.07s/it]


Start Epoch 13
Rewards: [tensor(-0.7198), tensor(-1.2142), tensor(-0.8022), tensor(-1.2228), tensor(-1.0200), tensor(-0.5276), tensor(-0.9798), tensor(-0.0016), tensor(-0.6314), tensor(-0.9851), tensor(-0.6131), tensor(-0.2219), tensor(0.0280), tensor(-0.2353), tensor(-0.0985), tensor(-0.6878), tensor(-0.3350), tensor(-0.1775), tensor(-0.5855), tensor(-0.5109), tensor(-0.5240), tensor(0.3635), tensor(-0.5420), tensor(-0.2743), tensor(-1.2146), tensor(-0.3867), tensor(-1.0975), tensor(-0.2406), tensor(0.0432), tensor(-0.5980), tensor(-1.2325), tensor(-0.6015), tensor(0.3292), tensor(-0.3376), tensor(-0.1646), tensor(-0.3892), tensor(-0.9966), tensor(-1.2152), tensor(-0.6910), tensor(0.2409), tensor(-0.5244), tensor(-1.0295), tensor(-1.0043), tensor(-0.8750), tensor(-0.2028), tensor(0.0475), tensor(-0.5490), tensor(-0.8966), tensor(-1.0164), tensor(-0.3875), tensor(-0.0864), tensor(-1.0642), tensor(-1.2325), tensor(0.5933), tensor(-1.2053), tensor(-1.1171), tensor(-1.0651), tensor(-1.24

14it [11:06, 46.92s/it]


Start Epoch 14
Rewards: [tensor(-0.6593), tensor(-1.1820), tensor(-1.0501), tensor(-1.1883), tensor(-1.0501), tensor(-0.7444), tensor(-1.2325), tensor(-1.0294), tensor(-1.1708), tensor(-1.1515), tensor(-1.0128), tensor(-0.4796), tensor(-0.5311), tensor(-0.8367), tensor(-0.6385), tensor(-1.2233), tensor(-0.3350), tensor(-0.5125), tensor(-1.2196), tensor(-0.3345), tensor(-0.4160), tensor(-0.1738), tensor(-1.2325), tensor(-0.4274), tensor(-1.1106), tensor(-0.7018), tensor(-0.9820), tensor(0.0189), tensor(-0.1316), tensor(0.0610), tensor(-0.1081), tensor(0.1493), tensor(-0.0776), tensor(-0.6690), tensor(-1.2846), tensor(-0.6236), tensor(-1.3953), tensor(-0.8711), tensor(-0.0832), tensor(-0.8736), tensor(-0.5360), tensor(-0.0231), tensor(-0.3765), tensor(-0.2901), tensor(-0.6050), tensor(-1.0784), tensor(-1.2912), tensor(-0.7018), tensor(-1.2233), tensor(-1.2562), tensor(-0.5926), tensor(0.0115), tensor(-0.3654), tensor(0.0332), tensor(-1.2784), tensor(0.4175), tensor(-0.5547), tensor(-0.6

15it [11:51, 46.28s/it]


Start Epoch 15
Rewards: [tensor(-1.3268), tensor(-1.1993), tensor(-0.0117), tensor(-0.5246), tensor(-0.0855), tensor(-1.0984), tensor(-1.1544), tensor(-0.0428), tensor(-1.2457), tensor(-1.2647), tensor(-0.2499), tensor(-0.4461), tensor(-0.6645), tensor(-1.0256), tensor(-0.1329), tensor(0.0260), tensor(0.1503), tensor(-1.0666), tensor(-0.0903), tensor(-0.7809), tensor(-0.9791), tensor(-0.3305), tensor(-0.2954), tensor(-1.0326), tensor(-0.7414), tensor(-1.2142), tensor(-0.8313), tensor(-0.0952), tensor(-0.8386), tensor(0.0462), tensor(-0.9774), tensor(-0.6364), tensor(-0.6388), tensor(-0.5097), tensor(-0.1751), tensor(-0.9895), tensor(-0.8929), tensor(-1.2160), tensor(-1.1247), tensor(-0.7806), tensor(-0.0824), tensor(-0.2911), tensor(-0.9258), tensor(-0.3666), tensor(-0.1177), tensor(-0.7629), tensor(-0.9382), tensor(-0.7953), tensor(-0.2053), tensor(-0.8126), tensor(-0.9648), tensor(-1.0164), tensor(-0.0068), tensor(-0.6693), tensor(-1.1493), tensor(-0.5244), tensor(-0.5420), tensor(-

16it [12:50, 50.21s/it]


Start Epoch 16
Rewards: [tensor(-0.8848), tensor(-0.8412), tensor(0.0793), tensor(-0.8363), tensor(-0.3635), tensor(-1.2065), tensor(-0.6062), tensor(-0.2826), tensor(-1.0265), tensor(-1.1762), tensor(-0.2284), tensor(-0.1285), tensor(-0.6792), tensor(0.0091), tensor(0.2985), tensor(-1.1544), tensor(-1.1292), tensor(-0.0632), tensor(0.4182), tensor(-0.2622), tensor(-0.7928), tensor(-0.6645), tensor(-1.1868), tensor(-0.6923), tensor(-1.1569), tensor(-0.1867), tensor(-0.5503), tensor(-1.0368), tensor(-0.9257), tensor(-0.8543), tensor(-1.1944), tensor(-0.1208), tensor(0.3406), tensor(-0.1479), tensor(-0.8151), tensor(-1.2795), tensor(-1.1675), tensor(-0.4450), tensor(-0.8278), tensor(-0.1483), tensor(-1.0126), tensor(-0.4310), tensor(-0.7399), tensor(-0.2888), tensor(-0.5167), tensor(-0.3332), tensor(-0.6243), tensor(-0.8742), tensor(-0.1821), tensor(-1.1059), tensor(-0.9599), tensor(-1.0314), tensor(-0.8014), tensor(-1.1915), tensor(-0.0212), tensor(-0.9437), tensor(0.0610), tensor(-0.2

17it [13:36, 48.82s/it]


Start Epoch 17
Rewards: [tensor(-0.2855), tensor(-0.0336), tensor(-1.1950), tensor(-0.8540), tensor(-0.9685), tensor(-1.2866), tensor(-0.0045), tensor(-0.9651), tensor(-0.8741), tensor(-0.1204), tensor(-0.8676), tensor(-1.0043), tensor(-0.4438), tensor(-0.3156), tensor(0.0115), tensor(-0.5139), tensor(-0.5674), tensor(0.1363), tensor(-0.0815), tensor(-0.2686), tensor(-0.2018), tensor(-0.9437), tensor(-0.2717), tensor(-0.5827), tensor(-0.9359), tensor(-0.4264), tensor(0.1766), tensor(-0.5726), tensor(0.1909), tensor(-0.1702), tensor(-0.9809), tensor(-1.1689), tensor(-0.5111), tensor(-0.6258), tensor(0.3988), tensor(-0.6449), tensor(-0.9114), tensor(-0.8476), tensor(-0.9311), tensor(0.0189), tensor(-1.1855), tensor(0.2098), tensor(-0.1013), tensor(-1.0175), tensor(-0.4752), tensor(-0.2673), tensor(-0.4101), tensor(-0.9718), tensor(-1.2264), tensor(-0.4300), tensor(-0.8142), tensor(-0.7024), tensor(-0.8715), tensor(-0.3521), tensor(-0.8008), tensor(-0.9890), tensor(-1.0734), tensor(0.036

18it [14:24, 48.61s/it]


Start Epoch 18
Rewards: [tensor(-0.9995), tensor(-1.0665), tensor(-0.3506), tensor(-0.6706), tensor(-0.1921), tensor(-1.0390), tensor(-1.3412), tensor(-0.6155), tensor(-0.4219), tensor(-0.0041), tensor(-0.8366), tensor(-0.1891), tensor(-0.8534), tensor(-0.9771), tensor(-0.9111), tensor(-0.3344), tensor(-1.0464), tensor(-0.4138), tensor(-0.9360), tensor(-1.0124), tensor(-0.2114), tensor(-0.7417), tensor(0.0364), tensor(-0.6790), tensor(-1.0665), tensor(-0.6179), tensor(-0.9820), tensor(-1.2325), tensor(-0.8407), tensor(-0.1646), tensor(-0.6346), tensor(-0.9904), tensor(-1.3351), tensor(-0.0293), tensor(-1.1680), tensor(-1.1171), tensor(-1.1247), tensor(-0.7869), tensor(-0.8197), tensor(-0.8003), tensor(-0.7057), tensor(-1.1447), tensor(-0.3358), tensor(0.2140), tensor(0.1984), tensor(-0.0639), tensor(0.1052), tensor(-0.8820), tensor(-0.2100), tensor(-0.7448), tensor(-0.3837), tensor(-0.7141), tensor(0.0099), tensor(-0.4235), tensor(-0.9152), tensor(-0.8742), tensor(-0.0920), tensor(-0.

19it [15:18, 50.21s/it]


Start Epoch 19
Rewards: [tensor(-1.4515), tensor(-0.8386), tensor(-0.7431), tensor(-0.4285), tensor(-0.1868), tensor(-0.5657), tensor(-0.3452), tensor(-0.6417), tensor(-0.1130), tensor(0.0051), tensor(-0.1827), tensor(-1.0281), tensor(-0.5462), tensor(-1.3325), tensor(-0.9060), tensor(-0.5023), tensor(-0.7448), tensor(-0.1229), tensor(0.2752), tensor(-0.7771), tensor(-0.0905), tensor(-0.0979), tensor(0.0174), tensor(-0.9863), tensor(-1.1580), tensor(-0.7152), tensor(0.4741), tensor(-1.2581), tensor(-0.0597), tensor(-0.7223), tensor(0.2283), tensor(-1.1586), tensor(-0.1530), tensor(-0.4592), tensor(-0.3298), tensor(-0.5953), tensor(-0.7100), tensor(-1.0564), tensor(-0.6790), tensor(-0.6407), tensor(-0.9993), tensor(-0.7761), tensor(-1.2991), tensor(0.1444), tensor(-0.7256), tensor(-0.4208), tensor(-0.1302), tensor(0.2871), tensor(-0.2267), tensor(-0.3438), tensor(-1.2520), tensor(-0.3921), tensor(-0.5074), tensor(-0.1249), tensor(-0.9962), tensor(-1.0164), tensor(-0.9961), tensor(-0.22

20it [16:08, 50.11s/it]


Start Epoch 20
Rewards: [tensor(-1.4410), tensor(-0.2534), tensor(-1.2152), tensor(-0.8711), tensor(-1.1493), tensor(-0.1638), tensor(-0.3408), tensor(-0.8711), tensor(-0.2311), tensor(-0.0950), tensor(-0.8386), tensor(-0.5926), tensor(-0.9632), tensor(-0.3354), tensor(-0.5049), tensor(-0.4628), tensor(-0.5305), tensor(-0.8302), tensor(-1.2142), tensor(-0.2990), tensor(-0.2902), tensor(-0.2256), tensor(0.1057), tensor(0.1729), tensor(0.0381), tensor(-1.2606), tensor(-0.3867), tensor(-0.6610), tensor(-0.2852), tensor(-1.0177), tensor(-1.0097), tensor(-0.9648), tensor(-1.2325), tensor(-1.1609), tensor(-0.5214), tensor(-0.2625), tensor(0.2309), tensor(-0.1964), tensor(0.0118), tensor(-0.8018), tensor(-0.2776), tensor(-0.0429), tensor(-1.0541), tensor(0.2662), tensor(-0.1982), tensor(-0.5827), tensor(-0.5666), tensor(-0.0319), tensor(-0.1032), tensor(-0.5380), tensor(-0.2368), tensor(-0.2862), tensor(-0.6172), tensor(-1.4689), tensor(-0.1130), tensor(-1.1155), tensor(-1.0378), tensor(-1.1

21it [16:56, 49.66s/it]


Start Epoch 21
Rewards: [tensor(-0.3545), tensor(-1.1864), tensor(-0.4092), tensor(-1.1059), tensor(-1.3019), tensor(-1.2912), tensor(-1.0731), tensor(-0.3426), tensor(-0.5840), tensor(-0.9691), tensor(-0.6009), tensor(-0.1711), tensor(-1.1372), tensor(-1.0256), tensor(-0.4830), tensor(-0.3286), tensor(-0.8307), tensor(-0.9273), tensor(-1.1134), tensor(-0.4667), tensor(-0.4461), tensor(-1.1427), tensor(-0.6929), tensor(0.1346), tensor(0.0793), tensor(-0.6431), tensor(-0.1456), tensor(-0.1684), tensor(-0.3428), tensor(-0.2149), tensor(-0.4066), tensor(-0.2862), tensor(-0.0685), tensor(-0.2936), tensor(-0.7869), tensor(-0.3184), tensor(-0.6638), tensor(-0.0188), tensor(-0.3921), tensor(-0.6351), tensor(-0.5839), tensor(0.1346), tensor(-1.0731), tensor(-0.8684), tensor(-0.5069), tensor(-0.9946), tensor(-0.2167), tensor(-0.2419), tensor(-0.3001), tensor(-0.0992), tensor(-0.3181), tensor(-0.0908), tensor(-0.4507), tensor(-0.2543), tensor(-0.3301), tensor(-0.0965), tensor(-0.0480), tensor(-

22it [17:43, 48.77s/it]


Start Epoch 22
Rewards: [tensor(-1.2866), tensor(-0.2721), tensor(-1.0855), tensor(-0.7210), tensor(-0.6716), tensor(-0.9091), tensor(-0.0802), tensor(-0.7028), tensor(-0.8390), tensor(-1.0296), tensor(-0.4782), tensor(-0.3660), tensor(-0.2826), tensor(-0.8386), tensor(-0.9217), tensor(-1.2866), tensor(-0.4621), tensor(-0.4050), tensor(-1.3790), tensor(-0.9479), tensor(-0.9949), tensor(-0.7669), tensor(0.4182), tensor(-0.1822), tensor(-0.2475), tensor(-0.8810), tensor(-0.3251), tensor(-0.0499), tensor(-0.9007), tensor(-1.0553), tensor(-0.9249), tensor(-0.5538), tensor(-0.3660), tensor(-0.1982), tensor(-0.5549), tensor(-0.4066), tensor(-1.0907), tensor(-0.2614), tensor(-1.1218), tensor(0.0587), tensor(-0.8344), tensor(-0.9031), tensor(0.1766), tensor(-0.8201), tensor(-0.0992), tensor(-0.4790), tensor(-1.0855), tensor(-0.5737), tensor(-1.2795), tensor(0.0532), tensor(-0.6172), tensor(-0.9218), tensor(-0.4962), tensor(-0.5413), tensor(0.0672), tensor(-0.2877), tensor(-0.9993), tensor(-0.

23it [18:32, 48.83s/it]


Start Epoch 23
Rewards: [tensor(-1.0201), tensor(-0.4505), tensor(-1.0175), tensor(-0.4830), tensor(-0.6379), tensor(-0.4100), tensor(-0.8359), tensor(-1.2470), tensor(-1.1585), tensor(-1.2678), tensor(-0.5471), tensor(-0.4645), tensor(-0.6772), tensor(-0.7295), tensor(-1.1456), tensor(-0.5244), tensor(-1.4410), tensor(-0.6062), tensor(-1.2367), tensor(-0.3723), tensor(-0.6821), tensor(-0.5118), tensor(-1.3268), tensor(-0.4201), tensor(-0.7402), tensor(-0.4810), tensor(-0.5459), tensor(-0.7912), tensor(-0.6624), tensor(-0.2862), tensor(-1.1515), tensor(-0.1859), tensor(-0.6620), tensor(-0.2622), tensor(0.0895), tensor(-0.5266), tensor(-1.1450), tensor(0.1077), tensor(-0.3704), tensor(-0.6706), tensor(-1.0128), tensor(-1.0729), tensor(-1.1855), tensor(-0.2534), tensor(-0.3408), tensor(-0.5412), tensor(0.1291), tensor(-1.0462), tensor(-0.8289), tensor(-0.2291), tensor(0.0691), tensor(-0.2842), tensor(-0.5857), tensor(-0.4805), tensor(-1.2457), tensor(-0.4750), tensor(-0.1847), tensor(-0

24it [19:16, 47.51s/it]


Start Epoch 24
Rewards: [tensor(-0.8729), tensor(0.1763), tensor(-0.8819), tensor(-0.7272), tensor(-0.5971), tensor(-0.4150), tensor(-0.5565), tensor(0.2677), tensor(-0.9007), tensor(-0.6706), tensor(-1.0177), tensor(-1.6261), tensor(-0.1669), tensor(-0.4602), tensor(-0.6838), tensor(-1.0221), tensor(-0.5219), tensor(-0.0013), tensor(-0.0855), tensor(-0.3892), tensor(0.0428), tensor(-0.5557), tensor(-0.9890), tensor(-0.8890), tensor(-0.2224), tensor(-0.7590), tensor(-1.2860), tensor(-0.4210), tensor(-0.3391), tensor(-1.0379), tensor(-0.6808), tensor(-0.5103), tensor(-0.8185), tensor(-0.5938), tensor(-1.5759), tensor(-1.0969), tensor(-0.4579), tensor(-1.0181), tensor(-0.6888), tensor(-0.3233), tensor(-0.7928), tensor(-0.7911), tensor(-0.8601), tensor(0.1029), tensor(-0.2356), tensor(0.1995), tensor(-0.3771), tensor(-1.0615), tensor(-1.0091), tensor(-0.3312), tensor(-0.8756), tensor(-1.0969), tensor(-0.8185), tensor(-1.0029), tensor(-0.9628), tensor(-0.0911), tensor(-0.0861), tensor(-1.

25it [20:03, 47.15s/it]


Start Epoch 25
Rewards: [tensor(-0.9847), tensor(0.1368), tensor(-0.9251), tensor(-0.6809), tensor(-0.1597), tensor(-0.1663), tensor(0.1052), tensor(0.0432), tensor(-0.4805), tensor(-1.2325), tensor(-0.5985), tensor(0.5693), tensor(-0.1936), tensor(-0.4613), tensor(-1.3120), tensor(-0.9948), tensor(-0.7669), tensor(-0.5503), tensor(-0.6045), tensor(-0.0645), tensor(-1.1316), tensor(-0.9890), tensor(-0.0979), tensor(-1.1414), tensor(-0.2242), tensor(-0.9424), tensor(-0.8587), tensor(0.3286), tensor(-1.2164), tensor(-0.6821), tensor(0.2187), tensor(-0.1009), tensor(-0.1375), tensor(-0.0855), tensor(-0.9547), tensor(-0.3153), tensor(-1.1885), tensor(-0.0117), tensor(-0.5722), tensor(0.1729), tensor(0.1730), tensor(-1.2233), tensor(-0.2783), tensor(-0.4155), tensor(-0.6585), tensor(0.2520), tensor(-1.0164), tensor(-0.4436), tensor(-0.1397), tensor(-0.3264), tensor(-0.5754), tensor(-0.8550), tensor(-0.2291), tensor(-0.6759), tensor(-0.8506), tensor(-0.8397), tensor(-0.0039), tensor(-0.7961

26it [20:52, 47.79s/it]


Start Epoch 26
Rewards: [tensor(-0.5890), tensor(-1.1219), tensor(-0.8289), tensor(-0.7018), tensor(-0.4669), tensor(-0.9131), tensor(-0.0131), tensor(-0.2971), tensor(-1.0378), tensor(-0.1251), tensor(-1.2152), tensor(-0.2918), tensor(-0.5562), tensor(0.0828), tensor(-0.4798), tensor(-0.6733), tensor(-0.9091), tensor(-0.5327), tensor(-0.5611), tensor(-1.1515), tensor(-1.1493), tensor(-0.1189), tensor(-0.3389), tensor(-1.1653), tensor(0.2409), tensor(-0.6543), tensor(-0.1108), tensor(0.2214), tensor(-1.1484), tensor(-0.9125), tensor(-1.3310), tensor(-0.3818), tensor(-0.9186), tensor(-0.2439), tensor(-0.2028), tensor(-0.6666), tensor(0.0947), tensor(-1.2142), tensor(-0.2431), tensor(-1.2623), tensor(-0.5557), tensor(-0.0496), tensor(-0.4360), tensor(-0.8376), tensor(-0.2794), tensor(-0.1242), tensor(-1.0501), tensor(-1.1706), tensor(-0.4810), tensor(-0.6872), tensor(-0.2906), tensor(-0.8579), tensor(-0.4645), tensor(-0.0045), tensor(-0.4461), tensor(-0.9217), tensor(-0.4273), tensor(-0

27it [21:47, 50.04s/it]


Start Epoch 27
Rewards: [tensor(-0.3408), tensor(-0.3955), tensor(-0.5139), tensor(-0.4188), tensor(-0.7058), tensor(-0.1530), tensor(-0.4386), tensor(-0.3457), tensor(-0.4122), tensor(0.0175), tensor(-0.9030), tensor(-1.1714), tensor(-1.2607), tensor(-1.0925), tensor(-1.1693), tensor(-0.5744), tensor(0.1909), tensor(-0.9273), tensor(0.3074), tensor(-0.0450), tensor(-1.3968), tensor(0.5631), tensor(-0.2786), tensor(-0.5027), tensor(-1.1274), tensor(-1.1580), tensor(-0.3364), tensor(-0.3324), tensor(-1.2866), tensor(-1.1724), tensor(-0.4973), tensor(-0.1363), tensor(-1.1367), tensor(-1.2289), tensor(-1.0734), tensor(-0.2520), tensor(-0.5551), tensor(0.2353), tensor(-0.4418), tensor(0.0861), tensor(-0.2567), tensor(0.0012), tensor(0.1029), tensor(-0.7629), tensor(-0.1624), tensor(-0.5320), tensor(0.2520), tensor(-0.3251), tensor(-0.2683), tensor(0.0905), tensor(-1.1071), tensor(0.2214), tensor(-0.8312), tensor(-0.7956), tensor(-1.1210), tensor(-0.8213), tensor(-0.8992), tensor(-0.4512),

28it [22:33, 48.79s/it]


Start Epoch 28
Rewards: [tensor(-1.0460), tensor(-0.8073), tensor(-0.9879), tensor(-0.1197), tensor(-0.9716), tensor(-0.9682), tensor(0.2871), tensor(0.0175), tensor(0.3475), tensor(-1.2142), tensor(-0.2439), tensor(-0.6014), tensor(0.0051), tensor(-0.9578), tensor(-1.1701), tensor(-0.6658), tensor(-0.4810), tensor(-0.8344), tensor(-0.5490), tensor(-1.2367), tensor(0.2985), tensor(-0.3138), tensor(-0.9951), tensor(-0.5428), tensor(-0.8008), tensor(-0.5020), tensor(-0.7399), tensor(-0.1293), tensor(-0.0751), tensor(-0.4520), tensor(-0.4447), tensor(-0.9347), tensor(-0.4460), tensor(0.1363), tensor(-0.4056), tensor(-0.8818), tensor(-0.9651), tensor(-0.5272), tensor(-0.3639), tensor(-0.5102), tensor(0.3292), tensor(0.0132), tensor(-0.1597), tensor(-0.0910), tensor(-0.8502), tensor(-0.9915), tensor(-0.1990), tensor(-0.7979), tensor(-0.0882), tensor(-1.1437), tensor(-1.2129), tensor(0.4151), tensor(-0.3470), tensor(-0.6510), tensor(-0.9651), tensor(-0.6201), tensor(-1.1651), tensor(-0.1961

29it [23:19, 47.82s/it]


Start Epoch 29
Rewards: [tensor(-0.0760), tensor(-0.0861), tensor(-0.8289), tensor(-0.6295), tensor(-0.5909), tensor(-1.0730), tensor(-1.2917), tensor(-0.9901), tensor(-0.2539), tensor(-1.0043), tensor(0.1752), tensor(-0.0068), tensor(-1.1473), tensor(0.5693), tensor(0.1382), tensor(-0.4445), tensor(-0.8725), tensor(-0.4436), tensor(-0.7772), tensor(-0.5877), tensor(-0.6997), tensor(-0.8293), tensor(-0.0492), tensor(-0.9628), tensor(-1.1750), tensor(-0.3192), tensor(-0.6790), tensor(-0.7269), tensor(-0.6504), tensor(-0.8664), tensor(-0.9961), tensor(-0.5669), tensor(-0.3286), tensor(-0.0697), tensor(-0.8688), tensor(-0.4905), tensor(0.1553), tensor(-0.0355), tensor(-0.8715), tensor(-1.0276), tensor(-0.9077), tensor(-0.5413), tensor(-0.6878), tensor(-1.2384), tensor(-0.6624), tensor(-0.0595), tensor(-1.0734), tensor(-0.7594), tensor(-1.1944), tensor(-0.8534), tensor(-0.6997), tensor(-1.2457), tensor(-0.7912), tensor(-0.3332), tensor(-0.9863), tensor(-0.2346), tensor(-1.2175), tensor(-0

30it [24:05, 47.45s/it]


Start Epoch 30
Rewards: [tensor(-0.8359), tensor(-0.7362), tensor(0.3823), tensor(-0.6324), tensor(-0.0730), tensor(-0.0853), tensor(-0.8429), tensor(-0.8278), tensor(-0.0735), tensor(0.1986), tensor(-0.9589), tensor(-0.4304), tensor(-0.3880), tensor(-0.1657), tensor(-0.0574), tensor(-1.1703), tensor(0.1293), tensor(0.0732), tensor(-0.7347), tensor(-0.5382), tensor(-0.7669), tensor(-0.2036), tensor(-0.7418), tensor(-0.0132), tensor(-0.7684), tensor(-0.5354), tensor(-0.3343), tensor(-0.5308), tensor(-0.1822), tensor(-0.0835), tensor(-0.3006), tensor(-0.0526), tensor(-0.2515), tensor(-0.5158), tensor(0.0588), tensor(-0.3805), tensor(-0.2788), tensor(-1.2352), tensor(-0.5192), tensor(-0.8987), tensor(-1.3499), tensor(-0.6774), tensor(-0.9770), tensor(-1.2693), tensor(-1.0756), tensor(-1.1341), tensor(-1.1868), tensor(-0.6816), tensor(-1.0378), tensor(0.0221), tensor(-0.9154), tensor(-0.7360), tensor(-1.0984), tensor(-0.6385), tensor(-0.4436), tensor(-0.5469), tensor(-0.2323), tensor(-1.1

31it [24:56, 48.38s/it]


Start Epoch 31
Rewards: [tensor(-1.0340), tensor(-1.2040), tensor(-0.8847), tensor(-0.1177), tensor(-0.3438), tensor(-0.3258), tensor(-1.0452), tensor(-0.0967), tensor(-1.2043), tensor(-0.0943), tensor(-1.2956), tensor(-1.1931), tensor(-0.9349), tensor(-0.0920), tensor(-1.0185), tensor(-1.0612), tensor(-1.3268), tensor(0.2871), tensor(-0.0920), tensor(-0.6585), tensor(-0.2515), tensor(-1.0016), tensor(-0.5585), tensor(-0.4596), tensor(-0.2686), tensor(-0.0212), tensor(-0.0638), tensor(-1.2123), tensor(-0.5469), tensor(-0.1864), tensor(-0.9205), tensor(0.0362), tensor(-0.9699), tensor(-0.8593), tensor(-0.6853), tensor(-0.2826), tensor(-0.4650), tensor(-0.0961), tensor(-0.8093), tensor(-0.1381), tensor(-0.5983), tensor(-0.3558), tensor(-0.3545), tensor(-0.1822), tensor(-0.8238), tensor(-1.1520), tensor(0.3074), tensor(-0.8987), tensor(-0.6863), tensor(-0.0824), tensor(-0.2440), tensor(-0.6737), tensor(-0.4891), tensor(-0.8601), tensor(-0.0117), tensor(-0.3326), tensor(-1.3268), tensor(-

32it [25:42, 47.69s/it]


Start Epoch 32
Rewards: [tensor(-0.3576), tensor(-1.3268), tensor(-0.4056), tensor(-0.2483), tensor(-0.7334), tensor(-1.3098), tensor(-1.0099), tensor(0.0533), tensor(-0.5471), tensor(-0.2114), tensor(-1.0150), tensor(-0.7618), tensor(-0.9555), tensor(-0.5348), tensor(-0.9162), tensor(-1.1330), tensor(-0.0039), tensor(-0.3682), tensor(-1.0776), tensor(-1.0378), tensor(-1.2070), tensor(-1.5563), tensor(-0.6438), tensor(-0.4460), tensor(-0.8122), tensor(-1.0460), tensor(-0.3955), tensor(-0.5545), tensor(-0.5117), tensor(-0.2159), tensor(-0.8022), tensor(-0.0355), tensor(-0.7223), tensor(-0.2902), tensor(0.3635), tensor(-0.3023), tensor(-0.2284), tensor(-0.4219), tensor(-0.6896), tensor(-0.4268), tensor(-1.0454), tensor(-1.2265), tensor(-0.5926), tensor(-0.1625), tensor(-0.2740), tensor(0.1587), tensor(-0.3075), tensor(-0.7729), tensor(-0.3631), tensor(-0.0595), tensor(-1.1337), tensor(-0.4790), tensor(0.3271), tensor(-0.9366), tensor(0.0268), tensor(-0.2787), tensor(-0.1161), tensor(-0.

33it [26:32, 48.32s/it]


Start Epoch 33
Rewards: [tensor(-0.4621), tensor(-0.7922), tensor(-0.3937), tensor(-1.2618), tensor(-0.6664), tensor(-0.0288), tensor(-1.1944), tensor(-0.1625), tensor(-0.6290), tensor(-0.4418), tensor(-0.1420), tensor(-0.7967), tensor(-0.7418), tensor(-1.3268), tensor(-0.4600), tensor(-1.2582), tensor(-1.2562), tensor(-1.0221), tensor(-0.3063), tensor(-0.7873), tensor(-0.1711), tensor(-0.3605), tensor(-1.4515), tensor(-0.4100), tensor(-0.1624), tensor(-0.2951), tensor(-0.0632), tensor(-0.6243), tensor(-0.8819), tensor(-0.5553), tensor(-0.5028), tensor(-0.7028), tensor(-0.6965), tensor(-0.0632), tensor(0.1752), tensor(-0.2309), tensor(-1.2127), tensor(-1.0016), tensor(-0.1161), tensor(-0.0371), tensor(-0.5726), tensor(-0.7761), tensor(-0.9150), tensor(0.4182), tensor(-0.5797), tensor(-1.0190), tensor(-0.2740), tensor(-0.0882), tensor(0.4182), tensor(-1.1321), tensor(-0.2959), tensor(-0.8289), tensor(-0.8578), tensor(-1.3203), tensor(-0.2962), tensor(-0.4602), tensor(-0.2353), tensor(-

34it [27:20, 48.22s/it]


Start Epoch 34
Rewards: [tensor(-0.6716), tensor(-0.0265), tensor(0.1014), tensor(-0.1049), tensor(-0.5380), tensor(-0.8109), tensor(-0.5277), tensor(-0.3050), tensor(-0.9191), tensor(-0.5305), tensor(-0.9854), tensor(-0.4100), tensor(-1.0395), tensor(-0.3892), tensor(-0.6385), tensor(-0.9125), tensor(-0.8588), tensor(-0.1717), tensor(0.0123), tensor(0.3971), tensor(-0.8847), tensor(-0.9152), tensor(-0.6919), tensor(-0.2018), tensor(-1.2142), tensor(0.0904), tensor(-0.0231), tensor(-0.1626), tensor(-0.9628), tensor(-1.0885), tensor(-0.6262), tensor(0.1747), tensor(-0.9390), tensor(0.4175), tensor(-0.2425), tensor(-1.1745), tensor(-0.1669), tensor(-1.1825), tensor(-0.7119), tensor(-0.9208), tensor(-0.1050), tensor(-0.1671), tensor(-0.0619), tensor(-1.1864), tensor(-1.3308), tensor(-0.5973), tensor(-0.8185), tensor(-0.1937), tensor(0.0430), tensor(-0.4155), tensor(-0.9020), tensor(-0.0616), tensor(-1.2846), tensor(-0.2499), tensor(0.1405), tensor(-1.1737), tensor(-0.5211), tensor(-0.986

35it [28:11, 49.04s/it]


Start Epoch 35
Rewards: [tensor(-1.1106), tensor(0.3292), tensor(0.0989), tensor(-0.3624), tensor(-0.5170), tensor(-0.9186), tensor(-0.4364), tensor(0.0171), tensor(-0.3391), tensor(-1.0519), tensor(-0.9863), tensor(-1.1484), tensor(-0.0597), tensor(-0.4274), tensor(-0.7521), tensor(-0.1563), tensor(0.2703), tensor(-0.1633), tensor(-0.8003), tensor(-1.0276), tensor(0.1491), tensor(-0.7771), tensor(-0.1775), tensor(-0.1347), tensor(-1.1450), tensor(-1.0010), tensor(0.1318), tensor(-0.3624), tensor(-0.4476), tensor(-0.2387), tensor(-0.4877), tensor(-1.0274), tensor(0.0174), tensor(-0.9154), tensor(-0.5244), tensor(0.3286), tensor(-0.0247), tensor(-0.4150), tensor(-0.6532), tensor(-1.1945), tensor(-0.2056), tensor(-0.1244), tensor(-0.6225), tensor(-0.3766), tensor(-0.2204), tensor(-0.4083), tensor(-1.2640), tensor(-1.2165), tensor(-1.2457), tensor(-0.7628), tensor(-0.0068), tensor(-0.5276), tensor(-0.0045), tensor(-1.2142), tensor(-1.1580), tensor(-0.5744), tensor(-0.6295), tensor(-0.509

36it [29:02, 49.64s/it]


Start Epoch 36
Rewards: [tensor(-0.1262), tensor(0.1346), tensor(-0.1081), tensor(-1.1292), tensor(-0.5320), tensor(-1.2912), tensor(-0.0121), tensor(0.1674), tensor(-0.8989), tensor(-0.6444), tensor(-0.0418), tensor(-0.7417), tensor(-0.8742), tensor(-0.4547), tensor(-0.0985), tensor(-0.8213), tensor(-0.9791), tensor(-1.0665), tensor(-0.0317), tensor(-0.1511), tensor(-0.8293), tensor(-0.5792), tensor(-0.8553), tensor(-0.4149), tensor(-0.1690), tensor(-0.2855), tensor(0.3448), tensor(-0.5264), tensor(-0.9895), tensor(-0.1519), tensor(-1.1609), tensor(-0.7697), tensor(0.1793), tensor(-0.6240), tensor(-0.9895), tensor(-0.8139), tensor(-0.9651), tensor(-0.8980), tensor(-1.0419), tensor(-0.6295), tensor(-0.7444), tensor(-0.1330), tensor(-0.3332), tensor(-0.7444), tensor(-0.5437), tensor(-0.3070), tensor(-1.1341), tensor(-0.4509), tensor(-0.3457), tensor(-0.8245), tensor(-0.4653), tensor(0.0239), tensor(-0.2952), tensor(-0.0265), tensor(0.0861), tensor(-1.0106), tensor(-0.0499), tensor(0.39

37it [29:45, 47.60s/it]


Start Epoch 37
Rewards: [tensor(-0.7098), tensor(0.3344), tensor(-0.7102), tensor(-0.1485), tensor(-0.8959), tensor(-0.9020), tensor(-0.4304), tensor(-0.1208), tensor(-0.2911), tensor(-0.1882), tensor(-0.1485), tensor(0.2303), tensor(-0.5027), tensor(-1.1694), tensor(-0.6520), tensor(0.2164), tensor(-0.5737), tensor(-0.5512), tensor(-0.2931), tensor(-0.7234), tensor(-1.1964), tensor(-1.1820), tensor(0.1382), tensor(-0.6620), tensor(-0.5669), tensor(-0.7500), tensor(-0.8499), tensor(-0.9344), tensor(-0.4520), tensor(-1.0205), tensor(-0.5812), tensor(-0.7956), tensor(-1.2846), tensor(-0.8278), tensor(-1.0397), tensor(-0.6012), tensor(-0.5626), tensor(-0.3312), tensor(-1.1945), tensor(-0.4812), tensor(0.1984), tensor(-0.3666), tensor(-0.1262), tensor(-1.0483), tensor(0.4485), tensor(-1.0113), tensor(0.0123), tensor(-0.8746), tensor(-0.9390), tensor(-1.2011), tensor(-0.0041), tensor(-0.2014), tensor(-0.3391), tensor(0.1010), tensor(-0.2284), tensor(0.1747), tensor(-0.5097), tensor(-0.6199

38it [30:27, 46.09s/it]


Start Epoch 38
Rewards: [tensor(-0.2018), tensor(-0.3448), tensor(-1.1673), tensor(-0.3293), tensor(0.2086), tensor(-0.9835), tensor(-1.3397), tensor(-0.4796), tensor(-0.0645), tensor(-0.8348), tensor(-0.5877), tensor(-0.2860), tensor(-0.6872), tensor(-0.3345), tensor(-0.1921), tensor(-0.0972), tensor(-0.2149), tensor(-0.8872), tensor(-0.7480), tensor(-0.5049), tensor(-1.1155), tensor(-0.9759), tensor(-1.1769), tensor(-0.4066), tensor(-0.1722), tensor(-0.6772), tensor(-0.6659), tensor(-0.8045), tensor(-0.5259), tensor(-1.2784), tensor(-0.9804), tensor(0.0587), tensor(-0.4445), tensor(-0.3309), tensor(0.4166), tensor(-0.3266), tensor(-0.9578), tensor(-1.2278), tensor(-0.2394), tensor(-1.0367), tensor(-0.6385), tensor(-0.0937), tensor(-0.4600), tensor(-0.2842), tensor(-0.8688), tensor(-1.2824), tensor(0.0531), tensor(-0.4972), tensor(-1.3268), tensor(-1.0351), tensor(-1.2146), tensor(-0.2483), tensor(-1.2047), tensor(-1.0939), tensor(-1.0669), tensor(-0.6872), tensor(-0.1283), tensor(-0

39it [31:14, 46.30s/it]


Start Epoch 39
Rewards: [tensor(-1.0225), tensor(-0.0429), tensor(-0.3286), tensor(-0.0979), tensor(-0.1251), tensor(-0.9226), tensor(-0.3408), tensor(-0.3251), tensor(-1.1043), tensor(-0.3521), tensor(-0.5142), tensor(-0.0121), tensor(0.0490), tensor(-0.5877), tensor(-1.1864), tensor(-0.6245), tensor(-0.5621), tensor(-0.4356), tensor(-0.3545), tensor(-0.5469), tensor(-1.4790), tensor(-0.1847), tensor(-0.6245), tensor(-0.2176), tensor(-0.4978), tensor(-1.0100), tensor(-0.9655), tensor(-0.5167), tensor(-0.2149), tensor(-0.4520), tensor(-0.9989), tensor(-0.9648), tensor(0.1642), tensor(-0.7777), tensor(-0.8818), tensor(-0.8278), tensor(-0.6121), tensor(-0.7664), tensor(-0.7922), tensor(-0.6385), tensor(-0.4534), tensor(-0.5797), tensor(-0.3880), tensor(-0.9424), tensor(-0.9651), tensor(-1.0784), tensor(-0.5812), tensor(0.0296), tensor(-0.5165), tensor(-0.4673), tensor(-0.2929), tensor(-0.5240), tensor(0.0381), tensor(-0.9092), tensor(-0.7243), tensor(-0.5049), tensor(-0.0278), tensor(-0

40it [32:04, 47.44s/it]


Start Epoch 40
Rewards: [tensor(-0.6601), tensor(-0.1791), tensor(-1.0124), tensor(-0.4208), tensor(-0.3681), tensor(0.1894), tensor(-0.1722), tensor(0.0678), tensor(-0.3232), tensor(-1.1569), tensor(-1.3268), tensor(0.0426), tensor(-0.7448), tensor(-0.3681), tensor(-0.1784), tensor(-1.0622), tensor(-0.5471), tensor(-0.2520), tensor(0.3216), tensor(-0.8253), tensor(-0.0934), tensor(-0.4790), tensor(0.1799), tensor(-0.3344), tensor(-0.3805), tensor(0.0134), tensor(0.3074), tensor(-0.5909), tensor(-0.1262), tensor(-1.2325), tensor(-0.8766), tensor(-1.2385), tensor(-0.9428), tensor(-0.0543), tensor(-0.2943), tensor(-0.3704), tensor(0.0860), tensor(-1.4601), tensor(-0.9583), tensor(-0.3350), tensor(-0.4310), tensor(0.0277), tensor(-0.7325), tensor(-0.1375), tensor(-1.0228), tensor(-0.8026), tensor(0.2677), tensor(-0.7750), tensor(-0.5028), tensor(-0.3995), tensor(-0.0979), tensor(-0.7482), tensor(-0.7640), tensor(-1.1621), tensor(-0.2918), tensor(-0.0910), tensor(-0.8540), tensor(-0.8769)

41it [32:51, 47.32s/it]


Start Epoch 41
Rewards: [tensor(-0.3166), tensor(-0.9700), tensor(-0.1441), tensor(-0.1507), tensor(-0.7956), tensor(0.3823), tensor(0.0029), tensor(-0.8601), tensor(-0.7697), tensor(-0.7569), tensor(-0.8369), tensor(-0.5726), tensor(0.0242), tensor(-0.0319), tensor(-1.1741), tensor(-1.2392), tensor(0.0381), tensor(-0.4273), tensor(-1.0294), tensor(-1.0984), tensor(-1.0899), tensor(-0.1304), tensor(-0.0331), tensor(-0.5360), tensor(-0.2615), tensor(0.2061), tensor(-0.6855), tensor(-0.9412), tensor(-1.1355), tensor(-0.2005), tensor(-1.0016), tensor(-0.9910), tensor(-0.7569), tensor(-0.7419), tensor(-0.6280), tensor(-1.1594), tensor(-0.2419), tensor(-0.9890), tensor(-1.0924), tensor(-1.0030), tensor(-0.1208), tensor(-0.5244), tensor(-0.4629), tensor(-0.2364), tensor(-0.4736), tensor(0.1445), tensor(-0.9275), tensor(-0.4534), tensor(-0.1013), tensor(-0.7697), tensor(-0.3407), tensor(-0.8481), tensor(-0.5533), tensor(-0.8476), tensor(-0.4433), tensor(-0.3408), tensor(-0.5167), tensor(-1.2

42it [33:40, 47.95s/it]


Start Epoch 42
Rewards: [tensor(-0.5512), tensor(-0.6012), tensor(-0.4100), tensor(-0.6792), tensor(-1.1931), tensor(-0.7223), tensor(-0.1129), tensor(-0.1706), tensor(-0.1712), tensor(-0.9059), tensor(-0.7750), tensor(-0.3955), tensor(-0.7385), tensor(-0.1375), tensor(-0.7956), tensor(-0.0231), tensor(-0.5571), tensor(-1.0855), tensor(-0.5380), tensor(-0.1369), tensor(-1.1788), tensor(-0.5990), tensor(-0.8018), tensor(-0.7476), tensor(-0.4445), tensor(0.0066), tensor(-0.8869), tensor(-0.3892), tensor(-0.0805), tensor(-0.3603), tensor(-0.6325), tensor(-0.0927), tensor(-1.1473), tensor(-0.2155), tensor(-1.2912), tensor(-0.7402), tensor(-0.8367), tensor(-0.6510), tensor(-0.1251), tensor(-0.9306), tensor(0.0661), tensor(-0.1663), tensor(-0.7047), tensor(-0.8710), tensor(-0.9216), tensor(-0.3209), tensor(-1.3268), tensor(-1.0185), tensor(0.2061), tensor(-0.4635), tensor(-1.1310), tensor(-0.1775), tensor(-0.5482), tensor(-1.1068), tensor(-0.8070), tensor(0.1382), tensor(-1.2067), tensor(-1

43it [34:25, 47.03s/it]


Start Epoch 43
Rewards: [tensor(-0.2267), tensor(-1.0541), tensor(-0.5015), tensor(0.0532), tensor(-1.2276), tensor(-0.1108), tensor(-0.6849), tensor(-0.2356), tensor(-0.1511), tensor(-0.1050), tensor(-0.5685), tensor(-0.4443), tensor(-0.0648), tensor(0.1405), tensor(-1.1229), tensor(-0.2211), tensor(-0.7660), tensor(-0.8014), tensor(-1.1931), tensor(-0.3605), tensor(-0.8756), tensor(-0.3391), tensor(-0.5556), tensor(-0.4667), tensor(-0.0992), tensor(-1.1484), tensor(-0.4086), tensor(-0.1791), tensor(-0.6530), tensor(-0.8661), tensor(-0.9962), tensor(-0.5028), tensor(-0.0889), tensor(-0.9612), tensor(-0.4490), tensor(0.5804), tensor(-0.5029), tensor(-1.3499), tensor(-0.4324), tensor(-0.4436), tensor(-1.2382), tensor(-0.2356), tensor(-0.6172), tensor(-0.9282), tensor(-0.8296), tensor(-0.5309), tensor(-1.2784), tensor(-0.2962), tensor(-0.3875), tensor(-0.6488), tensor(-0.6314), tensor(-0.6346), tensor(-0.5857), tensor(-0.1488), tensor(-1.4777), tensor(-0.3286), tensor(0.5804), tensor(-1

44it [35:11, 46.50s/it]


Start Epoch 44
Rewards: [tensor(-0.9651), tensor(-0.3766), tensor(-1.1023), tensor(0.0967), tensor(-0.9879), tensor(-0.9293), tensor(-1.0730), tensor(-0.2901), tensor(-0.3477), tensor(-1.3968), tensor(-0.9236), tensor(-0.1283), tensor(-0.5240), tensor(-0.5855), tensor(-0.4793), tensor(-1.0564), tensor(-0.3652), tensor(-0.2673), tensor(-1.1670), tensor(-0.3768), tensor(-0.7953), tensor(-0.3682), tensor(-0.7629), tensor(-1.1755), tensor(-0.9461), tensor(-0.5448), tensor(-1.1725), tensor(-0.1690), tensor(-0.5857), tensor(-0.5839), tensor(-0.1530), tensor(0.0242), tensor(-0.3470), tensor(-1.2991), tensor(-0.3086), tensor(-1.2144), tensor(-0.0122), tensor(-0.4507), tensor(-0.5938), tensor(-0.5694), tensor(-0.3681), tensor(-0.8831), tensor(-0.7360), tensor(-0.5214), tensor(-0.8277), tensor(-1.2850), tensor(-0.9154), tensor(-0.3470), tensor(-0.0882), tensor(0.0362), tensor(-0.8122), tensor(-1.4515), tensor(-1.2203), tensor(-0.8753), tensor(-0.6346), tensor(-1.2164), tensor(-0.5666), tensor(-

45it [35:56, 46.13s/it]


Start Epoch 45
Rewards: [tensor(-0.8314), tensor(-0.0908), tensor(-0.3427), tensor(-1.2846), tensor(-0.7385), tensor(-0.9802), tensor(-0.2437), tensor(-0.6726), tensor(-0.8201), tensor(-0.3228), tensor(-1.1689), tensor(-0.4798), tensor(-0.6538), tensor(-0.7143), tensor(-0.7402), tensor(-0.3579), tensor(-0.2611), tensor(-0.0384), tensor(-0.6878), tensor(0.1729), tensor(-0.3156), tensor(-0.3768), tensor(-1.1920), tensor(-0.5909), tensor(-0.9890), tensor(-0.9922), tensor(-0.0042), tensor(-0.5288), tensor(0.1894), tensor(-1.2043), tensor(-0.5240), tensor(-0.1532), tensor(-0.8730), tensor(-0.4832), tensor(-0.9419), tensor(-0.9521), tensor(-0.4141), tensor(-0.9875), tensor(-0.3668), tensor(-1.3968), tensor(-0.4244), tensor(-0.4684), tensor(-0.3603), tensor(0.3365), tensor(-0.2236), tensor(0.0251), tensor(-0.6047), tensor(-1.2365), tensor(-0.1177), tensor(-0.3964), tensor(-0.2132), tensor(-0.1295), tensor(-0.3732), tensor(-0.6809), tensor(0.1318), tensor(-0.4356), tensor(0.1289), tensor(-1.1

46it [36:42, 46.15s/it]


Start Epoch 46
Rewards: [tensor(-0.3576), tensor(-1.2504), tensor(0.1438), tensor(-1.2067), tensor(-0.2517), tensor(-1.1788), tensor(0.0533), tensor(-0.3350), tensor(-0.4138), tensor(-1.1864), tensor(-1.0501), tensor(-0.8596), tensor(-0.0645), tensor(-0.7187), tensor(-0.8269), tensor(-0.1483), tensor(0.1491), tensor(-0.5549), tensor(-0.1754), tensor(-0.1867), tensor(0.1363), tensor(-1.0010), tensor(-0.2843), tensor(-0.8008), tensor(-1.2917), tensor(-0.8824), tensor(-0.3739), tensor(-1.2824), tensor(-0.4249), tensor(-0.3558), tensor(-0.2797), tensor(-1.2436), tensor(0.5693), tensor(-0.7979), tensor(-0.9035), tensor(0.0381), tensor(-0.9293), tensor(0.1029), tensor(-0.5209), tensor(-0.5978), tensor(-0.5771), tensor(-0.7452), tensor(-0.5082), tensor(-1.1706), tensor(-0.0428), tensor(-0.4596), tensor(-1.0164), tensor(-0.8338), tensor(-0.1881), tensor(-1.3268), tensor(-0.9648), tensor(-0.1519), tensor(-0.2221), tensor(-0.7942), tensor(-0.9995), tensor(-0.3111), tensor(-1.0593), tensor(0.030

47it [37:35, 48.22s/it]


Start Epoch 47
Rewards: [tensor(-1.0033), tensor(-0.0701), tensor(0.0286), tensor(-0.9401), tensor(-0.9026), tensor(-0.3668), tensor(0.0732), tensor(-0.5161), tensor(-0.1882), tensor(-1.1931), tensor(-0.4447), tensor(-0.4914), tensor(-0.9419), tensor(-0.6385), tensor(-1.1543), tensor(-1.0395), tensor(-0.8141), tensor(-0.4273), tensor(-0.5792), tensor(-1.2142), tensor(-0.3158), tensor(-1.0454), tensor(-0.2071), tensor(-0.9606), tensor(-0.1715), tensor(0.1052), tensor(-0.7054), tensor(-0.1375), tensor(0.3271), tensor(-0.2852), tensor(-0.5469), tensor(-1.0564), tensor(-1.2067), tensor(-0.0621), tensor(0.0115), tensor(-0.4356), tensor(-0.1859), tensor(-0.4544), tensor(-1.1704), tensor(-0.3877), tensor(-0.4667), tensor(-0.3059), tensor(-0.9915), tensor(-0.7235), tensor(-1.2228), tensor(-0.2021), tensor(-0.8367), tensor(-0.2428), tensor(-0.9993), tensor(-0.2541), tensor(-0.6849), tensor(-0.9583), tensor(-0.7746), tensor(-1.4444), tensor(-0.4541), tensor(-1.2912), tensor(-1.5759), tensor(-1.

48it [38:21, 47.45s/it]


Start Epoch 48
Rewards: [tensor(-0.6728), tensor(-1.1012), tensor(-0.2455), tensor(0.0320), tensor(-0.0992), tensor(-0.3408), tensor(-1.2164), tensor(-0.1483), tensor(0.0158), tensor(-0.5311), tensor(0.0971), tensor(0.2040), tensor(-0.4752), tensor(-1.2795), tensor(-0.8499), tensor(0.2474), tensor(0.0895), tensor(-0.6319), tensor(-1.1749), tensor(-0.2786), tensor(-0.3615), tensor(-0.4122), tensor(-0.9631), tensor(-0.1571), tensor(-1.0150), tensor(-0.2309), tensor(-1.2347), tensor(-0.0730), tensor(-0.9216), tensor(0.2962), tensor(0.1014), tensor(-1.0612), tensor(-0.3650), tensor(-0.6583), tensor(0.0100), tensor(-0.9269), tensor(0.0649), tensor(0.3292), tensor(-0.2477), tensor(-0.2237), tensor(-0.4356), tensor(-0.8258), tensor(-1.3306), tensor(-0.6172), tensor(-0.8550), tensor(0.0678), tensor(-1.3571), tensor(-1.0663), tensor(-1.0006), tensor(0.1257), tensor(-1.1788), tensor(0.3457), tensor(-0.9650), tensor(-0.6952), tensor(-0.2991), tensor(0.3406), tensor(-0.8769), tensor(-1.2233), ten

49it [39:03, 45.97s/it]


Start Epoch 49
Rewards: [tensor(0.2630), tensor(-0.8369), tensor(-0.3477), tensor(-0.0314), tensor(-0.1671), tensor(-1.3842), tensor(-1.1883), tensor(-0.1133), tensor(-0.1498), tensor(-0.5985), tensor(-0.9890), tensor(-1.2043), tensor(-1.0855), tensor(-0.1982), tensor(0.0286), tensor(-1.0368), tensor(-1.1931), tensor(-0.5382), tensor(-0.8725), tensor(0.2603), tensor(-0.4150), tensor(-0.2712), tensor(-1.1864), tensor(-0.0122), tensor(-0.8664), tensor(-0.2979), tensor(-1.1543), tensor(-0.0265), tensor(-0.6939), tensor(-0.4268), tensor(0.1986), tensor(-0.0950), tensor(-0.9257), tensor(-0.1369), tensor(-0.8540), tensor(0.1909), tensor(-0.7638), tensor(-0.8756), tensor(-1.0575), tensor(-0.0278), tensor(-0.9395), tensor(-0.9962), tensor(-1.1950), tensor(-0.3137), tensor(-1.0126), tensor(-1.0015), tensor(-1.0918), tensor(-0.2774), tensor(-0.4294), tensor(-1.0244), tensor(-1.0340), tensor(0.0269), tensor(-0.1397), tensor(-1.2093), tensor(-0.8454), tensor(-0.1420), tensor(-0.5094), tensor(-0.9

50it [39:49, 45.84s/it]


Start Epoch 50
Rewards: [tensor(-0.6280), tensor(-1.2325), tensor(-0.7052), tensor(-1.0721), tensor(0.0828), tensor(-0.3576), tensor(-1.2325), tensor(0.3457), tensor(-0.9556), tensor(-0.5448), tensor(-0.3075), tensor(-1.0734), tensor(-0.9206), tensor(-0.1775), tensor(-0.9407), tensor(-0.6648), tensor(-0.8141), tensor(-0.6034), tensor(-0.6889), tensor(-1.0296), tensor(-0.8784), tensor(-1.1931), tensor(0.0260), tensor(-0.0355), tensor(-0.6345), tensor(-0.1847), tensor(-0.9298), tensor(-1.2592), tensor(-0.6530), tensor(-0.3958), tensor(-1.0340), tensor(-0.9154), tensor(-0.4922), tensor(-1.0556), tensor(-0.3050), tensor(-0.2562), tensor(-0.3326), tensor(-0.7894), tensor(-0.6155), tensor(-0.3135), tensor(-1.2038), tensor(-0.7206), tensor(-0.6130), tensor(-0.7482), tensor(-0.6685), tensor(-0.1544), tensor(-0.1369), tensor(-0.9771), tensor(-0.1736), tensor(-0.2394), tensor(-0.3605), tensor(-0.7662), tensor(-0.9700), tensor(-1.1776), tensor(-0.6013), tensor(-0.9968), tensor(-0.7426), tensor(-

51it [40:38, 46.84s/it]


Start Epoch 51
Rewards: [tensor(-0.4100), tensor(-1.1788), tensor(-0.8289), tensor(-1.0969), tensor(-0.3376), tensor(-0.4534), tensor(-1.2323), tensor(-0.5165), tensor(-0.2822), tensor(0.2761), tensor(-0.3165), tensor(0.0426), tensor(-0.3937), tensor(-0.1130), tensor(-0.5029), tensor(-0.2971), tensor(0.2871), tensor(-0.4185), tensor(-0.1749), tensor(-1.0009), tensor(-1.1493), tensor(-0.1544), tensor(-0.2219), tensor(-0.1702), tensor(-0.2740), tensor(-1.1389), tensor(-1.1673), tensor(-1.1783), tensor(0.0364), tensor(-0.0910), tensor(-0.8274), tensor(-1.0644), tensor(-0.2368), tensor(-0.3354), tensor(-1.0113), tensor(-0.9293), tensor(-0.2028), tensor(-0.8113), tensor(-0.5749), tensor(-1.1722), tensor(-0.7733), tensor(-0.5722), tensor(-0.9111), tensor(-0.5652), tensor(-0.0854), tensor(-0.3271), tensor(-1.2325), tensor(0.0623), tensor(-1.0804), tensor(-0.6345), tensor(-0.4590), tensor(-0.2852), tensor(-0.9251), tensor(-0.9154), tensor(-0.8302), tensor(-0.4905), tensor(-1.1708), tensor(-0.

52it [41:24, 46.51s/it]


Start Epoch 52
Rewards: [tensor(0.1909), tensor(-1.2618), tensor(-0.9349), tensor(-0.0730), tensor(-0.1059), tensor(-1.2053), tensor(-0.1381), tensor(-0.9515), tensor(-0.8043), tensor(-0.5857), tensor(-1.1965), tensor(-0.6034), tensor(-0.1778), tensor(-0.8725), tensor(-1.0500), tensor(-0.6155), tensor(-1.0756), tensor(-0.3247), tensor(-0.3732), tensor(-0.8317), tensor(0.1157), tensor(-0.6685), tensor(-0.3448), tensor(-0.3766), tensor(-0.3418), tensor(-1.1911), tensor(-0.1741), tensor(-0.4443), tensor(-0.2991), tensor(-0.1311), tensor(-0.5599), tensor(-1.1192), tensor(-0.4490), tensor(-1.2142), tensor(-0.7733), tensor(-0.0972), tensor(-0.3233), tensor(-1.1407), tensor(-0.8043), tensor(-0.4892), tensor(-0.8794), tensor(-0.6379), tensor(-0.9589), tensor(-0.8987), tensor(-0.1156), tensor(-0.9312), tensor(-0.1251), tensor(-1.1885), tensor(-1.0256), tensor(-1.0669), tensor(-0.2431), tensor(-0.5382), tensor(-1.0638), tensor(-0.3094), tensor(-0.3709), tensor(-0.8992), tensor(0.3344), tensor(-

53it [42:08, 45.88s/it]


Start Epoch 53
Rewards: [tensor(-0.5839), tensor(-0.8338), tensor(-0.3289), tensor(-0.1964), tensor(-0.7118), tensor(0.0768), tensor(-0.4729), tensor(-0.8844), tensor(-0.3739), tensor(-0.7052), tensor(-0.3766), tensor(-0.4793), tensor(-0.5978), tensor(0.3344), tensor(-0.3816), tensor(-1.1753), tensor(0.1127), tensor(-0.1382), tensor(-0.6451), tensor(-1.1949), tensor(-1.0899), tensor(-0.8150), tensor(-0.5320), tensor(-0.0384), tensor(-0.2204), tensor(-0.6772), tensor(-0.9555), tensor(-1.1624), tensor(-1.0984), tensor(-0.7444), tensor(-0.7514), tensor(-0.4438), tensor(-0.1775), tensor(-0.3320), tensor(-0.9269), tensor(-0.2251), tensor(-1.2196), tensor(-0.0735), tensor(-0.2560), tensor(-0.8506), tensor(-0.8491), tensor(-0.8718), tensor(-1.0201), tensor(-0.7098), tensor(-0.7445), tensor(-1.4031), tensor(-0.6896), tensor(-0.0499), tensor(-0.3231), tensor(-0.4664), tensor(-0.4891), tensor(-0.5539), tensor(-1.0866), tensor(-1.1769), tensor(-0.4140), tensor(-0.4310), tensor(-0.5192), tensor(-

54it [42:55, 46.26s/it]


Start Epoch 54
Rewards: [tensor(-0.2740), tensor(-0.5158), tensor(-0.5749), tensor(-0.7206), tensor(-0.3833), tensor(-0.5153), tensor(-0.3070), tensor(-0.1121), tensor(-0.6635), tensor(-0.7628), tensor(-0.3654), tensor(-0.5621), tensor(-0.9648), tensor(-0.1532), tensor(-0.9349), tensor(-0.7569), tensor(-1.2140), tensor(-1.1115), tensor(-1.2618), tensor(-0.9730), tensor(-0.7862), tensor(0.0523), tensor(-0.1848), tensor(0.0038), tensor(-0.1460), tensor(-0.3354), tensor(-0.5751), tensor(-0.8393), tensor(-0.5637), tensor(0.0091), tensor(-0.6587), tensor(-0.6324), tensor(-0.7660), tensor(-1.0872), tensor(-0.4052), tensor(-0.8644), tensor(-0.6045), tensor(-0.8742), tensor(-0.8376), tensor(-0.9791), tensor(-0.6255), tensor(-0.9117), tensor(-0.9879), tensor(0.0074), tensor(-0.4310), tensor(-0.8929), tensor(-1.0986), tensor(-0.4742), tensor(-0.4527), tensor(-0.6042), tensor(-0.1519), tensor(-0.5141), tensor(-1.2824), tensor(-0.6690), tensor(-1.2616), tensor(-0.4490), tensor(-0.9910), tensor(0.

55it [43:38, 45.32s/it]


Start Epoch 55
Rewards: [tensor(0.1291), tensor(-0.1466), tensor(-0.7174), tensor(-0.7231), tensor(-0.8008), tensor(-0.8778), tensor(-1.1417), tensor(-0.1702), tensor(-1.3227), tensor(0.3216), tensor(-0.6532), tensor(-1.5038), tensor(-1.2252), tensor(-0.5276), tensor(-0.2256), tensor(-0.6379), tensor(-0.4972), tensor(-0.3827), tensor(-0.1922), tensor(-1.4396), tensor(-0.3732), tensor(-0.5685), tensor(0.0362), tensor(-0.3407), tensor(-1.1944), tensor(-0.6385), tensor(-0.9946), tensor(-0.2308), tensor(-0.1751), tensor(-0.6295), tensor(-0.9895), tensor(-0.5557), tensor(-0.9248), tensor(-1.1769), tensor(-0.5279), tensor(-0.8525), tensor(-0.5953), tensor(-0.5674), tensor(-0.6919), tensor(-1.0177), tensor(-0.6001), tensor(-1.0014), tensor(-0.1150), tensor(-0.8797), tensor(-1.3367), tensor(-0.2935), tensor(-1.0368), tensor(0.0540), tensor(-0.0531), tensor(-0.1775), tensor(-0.4907), tensor(0.1587), tensor(-0.7444), tensor(-0.9589), tensor(-0.4233), tensor(-0.9114), tensor(-0.8142), tensor(-1.

56it [44:32, 47.82s/it]


Start Epoch 56
Rewards: [tensor(-1.2328), tensor(-0.5142), tensor(-0.5664), tensor(-0.6863), tensor(-0.2659), tensor(0.2063), tensor(-0.4067), tensor(-0.8989), tensor(0.1438), tensor(-1.0204), tensor(-0.3012), tensor(0.0471), tensor(-0.3141), tensor(-0.8929), tensor(-0.2774), tensor(-0.2343), tensor(-0.8005), tensor(-0.1717), tensor(0.0877), tensor(-0.2873), tensor(-0.4038), tensor(0.0902), tensor(-1.4689), tensor(-0.7956), tensor(-1.5563), tensor(-0.6385), tensor(0.3636), tensor(0.1405), tensor(-0.8213), tensor(-0.9424), tensor(-0.3141), tensor(-0.4832), tensor(-0.2142), tensor(-0.1938), tensor(-0.1939), tensor(-0.1601), tensor(0.3823), tensor(-0.8818), tensor(-0.0898), tensor(-0.8498), tensor(-0.6585), tensor(-1.0460), tensor(-0.8338), tensor(-0.2082), tensor(-0.6385), tensor(-0.4534), tensor(-0.5657), tensor(-1.1882), tensor(-0.0288), tensor(-0.5471), tensor(-0.0479), tensor(-1.0721), tensor(-0.5710), tensor(-0.4208), tensor(-0.9730), tensor(-0.1471), tensor(-0.6658), tensor(-0.227

57it [45:15, 46.24s/it]


Start Epoch 57
Rewards: [tensor(-0.6910), tensor(-1.0114), tensor(-0.0832), tensor(-0.2194), tensor(-0.1844), tensor(0.0118), tensor(-0.1461), tensor(-0.3390), tensor(-0.8509), tensor(-0.4476), tensor(-1.1864), tensor(-0.8569), tensor(-0.6240), tensor(0.3823), tensor(-1.0397), tensor(-1.2705), tensor(-0.7897), tensor(-0.6507), tensor(-1.1883), tensor(-0.2100), tensor(0.0051), tensor(-0.7897), tensor(-0.1109), tensor(-0.9191), tensor(-0.5584), tensor(-0.7854), tensor(-0.6385), tensor(-0.4450), tensor(-0.0042), tensor(-0.1563), tensor(-0.6284), tensor(-0.4083), tensor(-0.6458), tensor(-1.1864), tensor(-0.4835), tensor(0.0131), tensor(-0.7235), tensor(-0.1204), tensor(-0.2721), tensor(-0.4324), tensor(0.3635), tensor(-0.0979), tensor(-1.0758), tensor(-0.6585), tensor(-1.1998), tensor(-0.6081), tensor(-0.7444), tensor(-0.3943), tensor(-0.3732), tensor(-0.1283), tensor(-0.0428), tensor(-0.6821), tensor(-0.5139), tensor(-0.2990), tensor(-0.6317), tensor(-0.2712), tensor(-0.2221), tensor(-0.

58it [45:56, 44.82s/it]


Start Epoch 58
Rewards: [tensor(-0.4447), tensor(-0.3143), tensor(-0.0901), tensor(-0.3321), tensor(-0.9820), tensor(0.3045), tensor(-0.0965), tensor(-0.9742), tensor(-0.2685), tensor(-0.8742), tensor(-0.4069), tensor(-0.4076), tensor(-0.9288), tensor(-1.2365), tensor(-0.1646), tensor(-0.5015), tensor(-0.6373), tensor(-0.6245), tensor(-0.3732), tensor(-0.3943), tensor(-0.3723), tensor(-1.2325), tensor(-0.3477), tensor(-0.2338), tensor(0.2755), tensor(-1.0215), tensor(-0.9382), tensor(-0.7638), tensor(-0.0901), tensor(-1.0382), tensor(-1.1337), tensor(-0.2798), tensor(-0.5272), tensor(-0.9922), tensor(-0.2301), tensor(-0.9320), tensor(-0.9666), tensor(-0.0142), tensor(-0.6808), tensor(-1.4462), tensor(-0.3264), tensor(-0.4891), tensor(-0.2906), tensor(-0.6563), tensor(-0.3576), tensor(-1.0221), tensor(-0.1436), tensor(-0.0861), tensor(0.3009), tensor(-0.9890), tensor(0.2409), tensor(-0.4602), tensor(-1.0029), tensor(-0.9628), tensor(-0.6449), tensor(-0.0854), tensor(-0.9522), tensor(-0

59it [46:45, 46.06s/it]


Start Epoch 59
Rewards: [tensor(-0.4277), tensor(-0.0751), tensor(-0.5246), tensor(-0.1864), tensor(-0.0058), tensor(-0.2802), tensor(-1.2351), tensor(-1.1292), tensor(-1.2784), tensor(-0.6706), tensor(-1.1898), tensor(-1.1543), tensor(-1.5759), tensor(-0.0911), tensor(-0.3001), tensor(-1.1195), tensor(-0.4527), tensor(-0.6792), tensor(-0.0794), tensor(0.1346), tensor(-1.3112), tensor(-1.0823), tensor(-0.0750), tensor(-1.2142), tensor(-0.0619), tensor(-0.9386), tensor(-0.8003), tensor(-1.0009), tensor(-1.0884), tensor(-1.2917), tensor(-0.6693), tensor(-0.4793), tensor(-0.5246), tensor(-0.2862), tensor(-0.1249), tensor(-1.0203), tensor(-0.2267), tensor(-1.1106), tensor(-0.0905), tensor(-0.1882), tensor(-0.2991), tensor(-1.1543), tensor(0.1030), tensor(-0.8141), tensor(-0.2777), tensor(0.1825), tensor(-0.6816), tensor(-1.0164), tensor(0.0066), tensor(-0.6620), tensor(-0.5225), tensor(-0.8579), tensor(-0.8553), tensor(-0.6385), tensor(-1.2912), tensor(-0.2368), tensor(-0.0638), tensor(-0

60it [47:32, 46.29s/it]


Start Epoch 60
Rewards: [tensor(-0.8989), tensor(0.5631), tensor(-0.4715), tensor(-0.5553), tensor(-0.1130), tensor(-0.2176), tensor(-0.1189), tensor(-1.2892), tensor(-1.0564), tensor(-0.6544), tensor(-1.2504), tensor(-0.6438), tensor(-1.1725), tensor(0.0475), tensor(-0.8105), tensor(-1.0590), tensor(-0.9533), tensor(-0.5044), tensor(-0.2797), tensor(0.4741), tensor(-0.9881), tensor(-1.0128), tensor(-0.4386), tensor(-0.1244), tensor(-1.2784), tensor(0.0036), tensor(-1.1681), tensor(-0.4016), tensor(-0.0402), tensor(-0.8412), tensor(-0.8705), tensor(-0.0243), tensor(-1.1372), tensor(-0.7543), tensor(-0.1262), tensor(-1.0501), tensor(-1.2396), tensor(-1.1355), tensor(-0.6620), tensor(-0.0952), tensor(-1.2325), tensor(-1.2362), tensor(-1.0339), tensor(-1.0221), tensor(-0.7005), tensor(-0.8711), tensor(-0.8579), tensor(-0.7640), tensor(-0.5341), tensor(-1.0081), tensor(-0.3630), tensor(-1.0126), tensor(-1.1945), tensor(-0.4092), tensor(-0.6970), tensor(-0.5823), tensor(-0.4443), tensor(-0

61it [48:23, 47.86s/it]


Start Epoch 61
Rewards: [tensor(-0.4397), tensor(-0.0864), tensor(-0.4750), tensor(-0.9644), tensor(-0.8429), tensor(-0.6385), tensor(-0.3711), tensor(0.0099), tensor(-0.4810), tensor(-0.2499), tensor(0.2278), tensor(0.1066), tensor(-0.3631), tensor(-0.8989), tensor(-0.8093), tensor(-0.3070), tensor(-0.4669), tensor(-0.1996), tensor(-0.3324), tensor(0.2496), tensor(-0.5221), tensor(-0.6009), tensor(0.1405), tensor(-1.2017), tensor(-0.7417), tensor(-0.7885), tensor(0.1444), tensor(-1.0519), tensor(-0.7678), tensor(-0.5023), tensor(-0.6530), tensor(-1.0339), tensor(-0.6864), tensor(-1.0339), tensor(-0.4629), tensor(-0.9293), tensor(-0.7723), tensor(-1.0739), tensor(-0.5737), tensor(-0.6366), tensor(-1.1651), tensor(-0.6716), tensor(-1.1586), tensor(-0.2018), tensor(-0.2539), tensor(-1.1221), tensor(-0.9890), tensor(-0.6319), tensor(-0.2480), tensor(-0.3875), tensor(-0.5092), tensor(-1.2367), tensor(0.0295), tensor(-0.6726), tensor(-0.0821), tensor(-0.6540), tensor(-0.5855), tensor(-0.66

62it [49:12, 47.99s/it]


Start Epoch 62
Rewards: [tensor(-0.2599), tensor(-1.2846), tensor(-0.9341), tensor(-0.4076), tensor(0.0857), tensor(-1.1737), tensor(-0.0068), tensor(-0.2560), tensor(-0.8696), tensor(-0.3209), tensor(-0.4846), tensor(-0.2021), tensor(0.2985), tensor(-0.3964), tensor(-0.8296), tensor(-0.0937), tensor(0.0425), tensor(0.3365), tensor(-0.4038), tensor(-0.8238), tensor(-0.3936), tensor(-0.0243), tensor(-0.2862), tensor(-0.6919), tensor(-0.4208), tensor(0.1749), tensor(-0.8599), tensor(-0.8338), tensor(-1.2892), tensor(-0.1086), tensor(-0.9247), tensor(-0.4922), tensor(0.1747), tensor(-0.3301), tensor(0.1503), tensor(-0.5382), tensor(-0.5264), tensor(0.0132), tensor(-0.9251), tensor(-0.5558), tensor(-1.2192), tensor(-0.0854), tensor(-0.7771), tensor(-1.1106), tensor(-0.1733), tensor(-1.0390), tensor(-0.9932), tensor(-0.7482), tensor(0.0323), tensor(-0.4268), tensor(-1.1621), tensor(-0.9771), tensor(-0.8939), tensor(-0.7594), tensor(-0.5722), tensor(-0.4673), tensor(-0.7956), tensor(-0.9089

63it [49:58, 47.54s/it]


Start Epoch 63
Rewards: [tensor(0.3823), tensor(-0.3135), tensor(-1.2325), tensor(-1.1544), tensor(-0.0832), tensor(-0.5343), tensor(-0.3477), tensor(-0.3364), tensor(-0.8890), tensor(-1.0006), tensor(-0.0824), tensor(-1.0924), tensor(-0.1242), tensor(-0.6728), tensor(-1.0006), tensor(-0.6141), tensor(-0.3379), tensor(-0.6853), tensor(-0.9730), tensor(-0.0429), tensor(-1.2892), tensor(-0.7198), tensor(-0.2100), tensor(-0.3292), tensor(-0.4527), tensor(-0.2884), tensor(-0.2406), tensor(-0.2913), tensor(-1.0831), tensor(-0.9854), tensor(-0.2221), tensor(-1.2899), tensor(-0.8848), tensor(-0.7934), tensor(-0.8296), tensor(0.2501), tensor(-1.3980), tensor(-0.6726), tensor(-0.5219), tensor(-0.8377), tensor(-0.1315), tensor(-0.2329), tensor(-0.7452), tensor(-1.2562), tensor(-1.4021), tensor(-0.9627), tensor(-0.2888), tensor(0.5181), tensor(-0.4546), tensor(-0.7027), tensor(0.2761), tensor(-0.6583), tensor(-0.5368), tensor(-1.2917), tensor(-0.8408), tensor(-1.2912), tensor(-0.3512), tensor(-0

64it [50:46, 47.62s/it]


Start Epoch 64
Rewards: [tensor(0.1943), tensor(0.0528), tensor(-0.0242), tensor(-0.8313), tensor(-1.0974), tensor(-0.8317), tensor(-1.1543), tensor(-0.6280), tensor(-0.1530), tensor(-0.6910), tensor(0.0737), tensor(0.1961), tensor(-0.6821), tensor(-0.5415), tensor(-0.9863), tensor(-0.7098), tensor(-0.5599), tensor(-0.6015), tensor(-1.1493), tensor(-0.0499), tensor(-1.1201), tensor(-0.3001), tensor(-0.5074), tensor(0.1708), tensor(-1.2866), tensor(-0.5751), tensor(-0.6624), tensor(-1.0181), tensor(-0.6385), tensor(-0.7521), tensor(-0.3174), tensor(-1.0034), tensor(-0.0835), tensor(-0.3680), tensor(-0.4379), tensor(0.1574), tensor(-1.0541), tensor(0.3102), tensor(-0.9628), tensor(-1.1450), tensor(0.0523), tensor(-0.9506), tensor(0.0320), tensor(-0.5412), tensor(-0.8753), tensor(-0.6693), tensor(-0.9685), tensor(-1.4410), tensor(-0.7677), tensor(-1.2075), tensor(0.3216), tensor(0.1793), tensor(-0.1365), tensor(-1.2912), tensor(-1.1798), tensor(-0.5812), tensor(-0.5425), tensor(-0.5122),

65it [51:40, 49.47s/it]


Start Epoch 65
Rewards: [tensor(-0.7543), tensor(-0.3307), tensor(-0.4445), tensor(-0.0985), tensor(-0.9592), tensor(-0.7267), tensor(-1.0097), tensor(-0.9412), tensor(-0.5192), tensor(-0.6808), tensor(-0.2455), tensor(-0.7801), tensor(-0.5621), tensor(-0.1169), tensor(-0.8987), tensor(-0.4628), tensor(-1.0855), tensor(-1.0565), tensor(-0.3321), tensor(0.0280), tensor(0.0587), tensor(-0.3301), tensor(-0.3135), tensor(-0.8939), tensor(0.0165), tensor(-0.0467), tensor(-0.0289), tensor(-0.1741), tensor(-0.0013), tensor(-0.6431), tensor(-0.5027), tensor(-0.0925), tensor(-0.3766), tensor(-0.4443), tensor(-0.7309), tensor(-0.2541), tensor(-0.9751), tensor(-0.3193), tensor(-0.3153), tensor(-1.0339), tensor(-0.5459), tensor(-0.3964), tensor(-0.8139), tensor(-0.4140), tensor(0.1752), tensor(-0.5096), tensor(-0.9312), tensor(-0.6872), tensor(-0.1269), tensor(-0.8872), tensor(-0.8705), tensor(-0.3149), tensor(-0.3305), tensor(-0.6199), tensor(-0.4544), tensor(-0.2906), tensor(-0.6062), tensor(-0

66it [52:24, 47.81s/it]


Start Epoch 66
Rewards: [tensor(-1.0390), tensor(0.0531), tensor(-0.4972), tensor(-0.4812), tensor(0.4593), tensor(-0.5190), tensor(0.0221), tensor(-0.5857), tensor(-0.6751), tensor(-0.0889), tensor(-0.2242), tensor(-0.9699), tensor(-1.2325), tensor(-0.4274), tensor(-0.1882), tensor(-0.7559), tensor(-0.6118), tensor(-0.5629), tensor(-0.5855), tensor(-0.9785), tensor(-0.2391), tensor(0.2309), tensor(-0.1498), tensor(-0.5331), tensor(-0.8307), tensor(-0.6438), tensor(-0.2997), tensor(0.1222), tensor(-0.7629), tensor(-0.3039), tensor(-0.2883), tensor(-0.6587), tensor(-0.0805), tensor(0.1030), tensor(-0.5413), tensor(-1.3545), tensor(-0.4233), tensor(-0.2308), tensor(-1.3145), tensor(-0.2339), tensor(-0.6020), tensor(-1.2933), tensor(-1.2866), tensor(-0.0633), tensor(-0.8142), tensor(-0.6419), tensor(-0.7256), tensor(-0.6601), tensor(-0.8885), tensor(-0.1249), tensor(-1.1450), tensor(-1.0412), tensor(-1.0161), tensor(-0.3135), tensor(-0.4673), tensor(0.0857), tensor(-0.2952), tensor(-1.02

67it [53:14, 48.59s/it]


Start Epoch 67
Rewards: [tensor(0.1943), tensor(-0.9718), tensor(0.1361), tensor(-0.2539), tensor(-0.7482), tensor(-1.0419), tensor(-0.6066), tensor(-0.6258), tensor(-1.1796), tensor(-0.5096), tensor(-0.3111), tensor(0.2677), tensor(-0.3660), tensor(-1.0734), tensor(-0.6116), tensor(-1.5759), tensor(-0.2717), tensor(-0.4940), tensor(-1.0887), tensor(-0.2483), tensor(-0.5068), tensor(-0.6407), tensor(-0.1674), tensor(-0.9521), tensor(-0.2541), tensor(-0.8376), tensor(-0.8634), tensor(-0.7061), tensor(-0.1169), tensor(-1.1769), tensor(-0.6001), tensor(-0.1229), tensor(-0.7387), tensor(-0.9764), tensor(-0.8885), tensor(-1.2703), tensor(-1.3268), tensor(-0.6939), tensor(-0.5023), tensor(0.2086), tensor(-0.5462), tensor(-0.1626), tensor(-0.3615), tensor(-0.7360), tensor(-0.7781), tensor(-0.0920), tensor(-1.0201), tensor(-0.6641), tensor(-0.4546), tensor(-1.2457), tensor(-0.9892), tensor(-1.2146), tensor(-0.2954), tensor(-0.3320), tensor(-0.3343), tensor(-0.1367), tensor(-0.2997), tensor(-0

68it [53:58, 47.25s/it]


Start Epoch 68
Rewards: [tensor(-0.9217), tensor(-0.0966), tensor(-0.9966), tensor(-0.2353), tensor(-0.7431), tensor(-1.0045), tensor(-0.9951), tensor(-0.9718), tensor(-0.9878), tensor(-0.3111), tensor(-0.5445), tensor(-0.5138), tensor(0.2144), tensor(-0.1751), tensor(-0.6047), tensor(0.2955), tensor(-0.9842), tensor(-0.2774), tensor(-0.5096), tensor(-0.8226), tensor(-0.1922), tensor(-0.5142), tensor(-1.1012), tensor(-0.7996), tensor(0.0227), tensor(-0.7345), tensor(-0.7750), tensor(-0.3480), tensor(-0.5190), tensor(-0.1519), tensor(-0.9162), tensor(-1.2142), tensor(-0.4436), tensor(-1.2470), tensor(-0.9117), tensor(-0.2053), tensor(-0.3958), tensor(-1.0401), tensor(-0.9089), tensor(-0.6179), tensor(-0.2777), tensor(-0.4264), tensor(-0.6510), tensor(-0.7198), tensor(-0.2783), tensor(-0.6510), tensor(0.3475), tensor(0.1127), tensor(0.0415), tensor(-0.3955), tensor(-0.9644), tensor(-0.5341), tensor(-1.2762), tensor(-0.4303), tensor(-0.8710), tensor(-0.4511), tensor(-0.4635), tensor(-1.0

69it [54:45, 47.10s/it]


Start Epoch 69
Rewards: [tensor(-0.9906), tensor(-0.4673), tensor(0.4885), tensor(-0.0645), tensor(-0.4635), tensor(-0.5331), tensor(-1.3076), tensor(-0.3350), tensor(-0.2272), tensor(-0.8869), tensor(-0.5244), tensor(-0.0258), tensor(-0.3480), tensor(-0.2462), tensor(-0.7152), tensor(-1.0351), tensor(-0.8606), tensor(-1.1195), tensor(-0.1601), tensor(-0.1847), tensor(-0.5142), tensor(-0.9651), tensor(-0.1013), tensor(-0.2188), tensor(-1.2028), tensor(-1.1943), tensor(0.0206), tensor(-0.4654), tensor(-0.1059), tensor(-0.8811), tensor(-0.1441), tensor(0.0227), tensor(-0.5657), tensor(-0.0936), tensor(-0.3875), tensor(-1.1769), tensor(-0.9651), tensor(0.0902), tensor(-0.8992), tensor(0.1029), tensor(-0.6658), tensor(-0.5834), tensor(-1.2233), tensor(-0.7387), tensor(-0.6385), tensor(-0.0393), tensor(-0.3426), tensor(-1.2233), tensor(-0.9382), tensor(-0.2800), tensor(-0.7571), tensor(-0.0979), tensor(-0.3937), tensor(-0.5674), tensor(-0.8122), tensor(0.0415), tensor(-0.2952), tensor(-1.2

70it [55:31, 46.67s/it]


Start Epoch 70
Rewards: [tensor(-0.1775), tensor(-0.3880), tensor(-0.4461), tensor(-0.0961), tensor(-0.8625), tensor(0.0691), tensor(-1.0436), tensor(-1.3268), tensor(-0.8481), tensor(-1.2040), tensor(-1.2105), tensor(-0.1715), tensor(-0.1784), tensor(-1.3392), tensor(-0.8014), tensor(-0.6245), tensor(-0.7480), tensor(-0.0041), tensor(0.0407), tensor(-0.4228), tensor(-0.2174), tensor(-1.2065), tensor(-0.4600), tensor(-0.8588), tensor(-0.6878), tensor(0.1527), tensor(-1.0567), tensor(-0.2382), tensor(-1.0564), tensor(0.1010), tensor(0.0425), tensor(-0.7360), tensor(-0.4795), tensor(-0.7961), tensor(-1.0612), tensor(0.2309), tensor(-0.6122), tensor(-0.1347), tensor(-0.1285), tensor(-0.2135), tensor(-0.1109), tensor(-0.3875), tensor(-0.8615), tensor(-0.7419), tensor(-0.5165), tensor(-0.6153), tensor(-0.6768), tensor(-0.5244), tensor(0.3365), tensor(-0.3807), tensor(-0.9550), tensor(-0.1329), tensor(-0.1817), tensor(-0.2353), tensor(-1.3790), tensor(-0.5577), tensor(-0.2743), tensor(-0.54

71it [56:16, 46.31s/it]


Start Epoch 71
Rewards: [tensor(-0.6172), tensor(-0.5685), tensor(-0.3576), tensor(-0.6889), tensor(-0.9293), tensor(-0.5138), tensor(-0.2338), tensor(0.1291), tensor(-1.1917), tensor(0.1315), tensor(-0.2614), tensor(-0.1382), tensor(-1.0776), tensor(0.0296), tensor(-0.0428), tensor(-0.9007), tensor(-0.6960), tensor(-0.6593), tensor(-0.5556), tensor(-0.3896), tensor(-0.1996), tensor(-1.0200), tensor(-0.8022), tensor(-0.1847), tensor(-0.8710), tensor(-1.2367), tensor(-0.9651), tensor(-0.9052), tensor(-0.6256), tensor(-0.2309), tensor(-0.0289), tensor(0.0879), tensor(-0.3068), tensor(-0.3143), tensor(-0.7898), tensor(-0.1363), tensor(-1.0294), tensor(-0.6821), tensor(-0.7350), tensor(-0.8043), tensor(-0.5804), tensor(-0.5097), tensor(-0.2802), tensor(-0.8481), tensor(-0.0122), tensor(-0.7165), tensor(0.1984), tensor(-0.6879), tensor(-0.1382), tensor(0.5933), tensor(-1.4123), tensor(-0.7549), tensor(-1.1820), tensor(-0.5428), tensor(-0.0751), tensor(-0.6583), tensor(-0.7339), tensor(0.02

72it [57:04, 46.76s/it]


Start Epoch 72
Rewards: [tensor(-0.2895), tensor(-0.4832), tensor(-0.8553), tensor(-1.0857), tensor(-0.5341), tensor(-0.4141), tensor(-0.4418), tensor(-0.8150), tensor(-1.2040), tensor(-0.9241), tensor(-0.8794), tensor(-0.9617), tensor(-0.7961), tensor(-0.1293), tensor(-0.2673), tensor(-0.9651), tensor(-0.5259), tensor(-0.9651), tensor(0.0902), tensor(0.0296), tensor(-1.0180), tensor(-1.0114), tensor(-0.7223), tensor(-1.0029), tensor(-1.3268), tensor(-1.3412), tensor(-0.2935), tensor(0.0100), tensor(-1.2524), tensor(-1.1726), tensor(-0.5341), tensor(-1.2843), tensor(-0.4579), tensor(-0.9323), tensor(-1.2142), tensor(-0.8334), tensor(-0.8223), tensor(-0.3805), tensor(-0.2902), tensor(-0.1441), tensor(-1.1689), tensor(-0.5368), tensor(0.2098), tensor(-1.2417), tensor(-1.2043), tensor(-0.2114), tensor(-0.6706), tensor(-0.7771), tensor(-0.5015), tensor(-0.1881), tensor(-0.4730), tensor(-0.9186), tensor(0.0732), tensor(-0.6855), tensor(-0.3241), tensor(-1.2140), tensor(-0.0132), tensor(-1.

73it [57:49, 46.18s/it]


Start Epoch 73
Rewards: [tensor(-0.2537), tensor(-1.1292), tensor(-0.9651), tensor(-0.6489), tensor(-0.9010), tensor(-0.5415), tensor(-1.2714), tensor(-0.5713), tensor(-0.2545), tensor(0.0074), tensor(-0.5545), tensor(-1.1802), tensor(-0.3143), tensor(-1.1749), tensor(-1.0177), tensor(-0.9117), tensor(0.3216), tensor(-0.4219), tensor(-0.3711), tensor(-1.0784), tensor(-0.7750), tensor(-0.2302), tensor(-0.6690), tensor(-0.8481), tensor(-0.2886), tensor(-0.6243), tensor(-0.6153), tensor(-0.3665), tensor(0.3508), tensor(-0.9031), tensor(-1.3902), tensor(0.2164), tensor(0.3508), tensor(-0.4673), tensor(-1.0924), tensor(-0.8022), tensor(0.0895), tensor(-0.9533), tensor(0.0287), tensor(-1.1543), tensor(0.3074), tensor(-0.9010), tensor(-1.2846), tensor(-0.9951), tensor(-0.6160), tensor(-0.1080), tensor(0.0649), tensor(-0.8359), tensor(-0.5264), tensor(-0.4832), tensor(-1.1931), tensor(-0.9293), tensor(0.1382), tensor(-0.7316), tensor(-1.3145), tensor(-0.7005), tensor(-0.7225), tensor(-0.4457)

74it [58:38, 47.17s/it]


Start Epoch 74
Rewards: [tensor(-0.0218), tensor(-0.5533), tensor(-0.9685), tensor(-0.1722), tensor(0.0828), tensor(-0.2480), tensor(-0.8664), tensor(-0.1754), tensor(-0.5827), tensor(0.0661), tensor(-0.5666), tensor(-0.4376), tensor(-0.3892), tensor(-0.6066), tensor(0.0280), tensor(-0.9578), tensor(-0.5050), tensor(-0.1530), tensor(-0.0045), tensor(-1.2061), tensor(-0.9890), tensor(-0.4720), tensor(0.1708), tensor(-0.4101), tensor(0.0485), tensor(-0.8819), tensor(-0.2428), tensor(-0.5068), tensor(0.0100), tensor(-1.0731), tensor(-0.9166), tensor(-0.3428), tensor(-1.1931), tensor(-0.5538), tensor(-0.0659), tensor(-1.1694), tensor(0.2496), tensor(-0.4664), tensor(-0.1626), tensor(0.1708), tensor(-0.3286), tensor(-0.2224), tensor(-0.6014), tensor(-1.0734), tensor(-0.7005), tensor(-0.0231), tensor(-0.1161), tensor(-0.7628), tensor(-0.0685), tensor(-0.7698), tensor(-1.0274), tensor(-0.1358), tensor(-0.3350), tensor(-0.3018), tensor(-0.8223), tensor(0.0610), tensor(-0.1315), tensor(-0.3622

75it [59:27, 47.69s/it]


Start Epoch 75
Rewards: [tensor(-0.0992), tensor(-1.0520), tensor(-0.9668), tensor(-0.6772), tensor(-0.9186), tensor(-0.2858), tensor(-1.0401), tensor(-0.9251), tensor(-0.6295), tensor(-0.5077), tensor(-0.9631), tensor(-0.8992), tensor(-0.7716), tensor(-0.9117), tensor(0.1642), tensor(-0.2132), tensor(-0.7897), tensor(-1.0832), tensor(-0.8959), tensor(-0.5695), tensor(-0.5514), tensor(-0.1711), tensor(-0.2475), tensor(-0.3877), tensor(0.5181), tensor(-0.7771), tensor(-0.4914), tensor(-0.4534), tensor(-0.8917), tensor(-0.8407), tensor(-1.0030), tensor(-0.4100), tensor(-0.8509), tensor(-0.9111), tensor(-0.7723), tensor(-0.3018), tensor(-0.4379), tensor(-0.1657), tensor(-0.1347), tensor(-0.7934), tensor(-0.2705), tensor(-1.0209), tensor(-1.0093), tensor(-0.6432), tensor(-0.2132), tensor(-0.1571), tensor(-0.5973), tensor(-0.5614), tensor(-1.0893), tensor(-0.6504), tensor(-0.0166), tensor(-0.5092), tensor(-0.4196), tensor(0.2180), tensor(-0.5214), tensor(-0.6131), tensor(-1.2203), tensor(-

76it [1:00:15, 47.75s/it]


Start Epoch 76
Rewards: [tensor(-0.6768), tensor(-0.2431), tensor(-0.3665), tensor(-0.0258), tensor(-0.4800), tensor(-1.2394), tensor(-1.2492), tensor(-0.3805), tensor(0.0532), tensor(-0.9797), tensor(-0.2877), tensor(-1.3292), tensor(-0.9952), tensor(-0.5626), tensor(-0.4944), tensor(-0.5309), tensor(-0.9651), tensor(-0.7399), tensor(-1.0664), tensor(-0.3312), tensor(-0.7956), tensor(0.1587), tensor(-0.3454), tensor(-0.9631), tensor(-0.3867), tensor(0.1730), tensor(-0.8257), tensor(-0.9577), tensor(-0.3660), tensor(-1.1012), tensor(-0.1614), tensor(-0.7898), tensor(-0.7897), tensor(-0.9414), tensor(-0.1329), tensor(-0.7504), tensor(-1.3190), tensor(-0.0317), tensor(-0.2419), tensor(-0.2683), tensor(-0.3984), tensor(-0.0531), tensor(-0.4715), tensor(-1.2524), tensor(-0.0336), tensor(-0.8950), tensor(0.2353), tensor(-0.9154), tensor(-1.3268), tensor(-0.4789), tensor(-1.0015), tensor(-0.4669), tensor(-0.5487), tensor(-0.6658), tensor(-0.0121), tensor(-0.4810), tensor(-0.7836), tensor(-0

77it [1:01:00, 46.79s/it]


Start Epoch 77
Rewards: [tensor(-0.5211), tensor(-0.8753), tensor(-0.7102), tensor(-1.0321), tensor(-0.3192), tensor(-1.0265), tensor(-0.0406), tensor(-0.3668), tensor(0.0074), tensor(-1.2228), tensor(-0.2877), tensor(-0.4509), tensor(0.1318), tensor(-0.7119), tensor(-1.0256), tensor(-0.9657), tensor(-0.1369), tensor(-0.1049), tensor(0.0531), tensor(-0.6666), tensor(-0.3390), tensor(0.0953), tensor(0.1070), tensor(-0.9241), tensor(-0.5428), tensor(-0.8093), tensor(-1.0729), tensor(-0.9653), tensor(0.0295), tensor(-1.2784), tensor(-0.8019), tensor(-0.2425), tensor(-0.9071), tensor(-0.2740), tensor(-0.1057), tensor(-0.1404), tensor(-0.8313), tensor(-0.4892), tensor(0.0206), tensor(-1.1106), tensor(-0.8126), tensor(-0.3232), tensor(-0.7777), tensor(-0.7234), tensor(0.1157), tensor(-1.2293), tensor(-0.4914), tensor(-0.0659), tensor(-0.5539), tensor(-0.0598), tensor(-1.2192), tensor(-0.5482), tensor(-0.4007), tensor(-0.3768), tensor(-0.6385), tensor(0.3102), tensor(-1.1574), tensor(-0.2462

78it [1:01:48, 47.19s/it]


Start Epoch 78
Rewards: [tensor(0.0654), tensor(-1.2003), tensor(-1.2325), tensor(0.0175), tensor(-0.9269), tensor(-0.6345), tensor(-0.3154), tensor(-0.5666), tensor(-0.2346), tensor(-0.0450), tensor(-0.1822), tensor(-0.5840), tensor(-0.5425), tensor(-0.6659), tensor(-1.1686), tensor(-0.4223), tensor(-0.3639), tensor(0.0654), tensor(-0.4100), tensor(-1.2061), tensor(-0.9282), tensor(0.4175), tensor(-0.1404), tensor(-0.9651), tensor(-0.3630), tensor(0.1010), tensor(-0.3670), tensor(-0.4249), tensor(-1.0419), tensor(-0.7152), tensor(-0.7628), tensor(0.0532), tensor(0.2506), tensor(-0.7713), tensor(0.1493), tensor(-0.6385), tensor(0.3402), tensor(-0.6174), tensor(-0.8302), tensor(-0.4273), tensor(-0.8619), tensor(-0.7414), tensor(-0.0854), tensor(-1.2165), tensor(-0.7605), tensor(-0.1751), tensor(-0.0645), tensor(0.0588), tensor(-1.0452), tensor(-0.4729), tensor(-0.7859), tensor(-1.2892), tensor(-0.4972), tensor(-0.6009), tensor(-0.0910), tensor(-1.0256), tensor(-0.1936), tensor(-0.8596)

79it [1:02:35, 47.33s/it]


Start Epoch 79
Rewards: [tensor(-0.4541), tensor(-0.9275), tensor(-0.3312), tensor(-0.2183), tensor(-0.5190), tensor(-0.3006), tensor(-0.2788), tensor(-0.8605), tensor(-1.4462), tensor(-0.4795), tensor(-1.0460), tensor(-1.3268), tensor(-0.7005), tensor(-0.4101), tensor(-0.9337), tensor(-0.8688), tensor(-0.5827), tensor(-0.1086), tensor(-1.1337), tensor(-0.9241), tensor(-1.1726), tensor(-0.0597), tensor(-0.8035), tensor(-0.8885), tensor(-0.6559), tensor(0.1405), tensor(-1.1775), tensor(-0.7836), tensor(0.1127), tensor(-1.2644), tensor(-0.2221), tensor(-0.6601), tensor(-1.2846), tensor(-0.6653), tensor(-0.2826), tensor(-1.2043), tensor(-0.1302), tensor(-0.1775), tensor(-0.0229), tensor(-0.7419), tensor(-0.1299), tensor(0.0332), tensor(-0.7161), tensor(0.1001), tensor(-0.5126), tensor(-0.3660), tensor(-0.3487), tensor(-0.8481), tensor(-0.8317), tensor(-0.3208), tensor(-1.2064), tensor(-0.8429), tensor(-0.2534), tensor(-0.3892), tensor(-0.9347), tensor(-0.1318), tensor(-0.0425), tensor(-0

80it [1:03:24, 47.85s/it]


Start Epoch 80
Rewards: [tensor(-0.5141), tensor(-0.4877), tensor(-0.2893), tensor(-0.8386), tensor(0.0269), tensor(-0.4100), tensor(-0.3622), tensor(-0.3293), tensor(-0.1347), tensor(-0.9895), tensor(-1.1388), tensor(0.1066), tensor(-0.4527), tensor(0.1029), tensor(-0.0936), tensor(-0.7970), tensor(-1.2824), tensor(-1.0642), tensor(-0.3429), tensor(0.5631), tensor(-0.5695), tensor(-0.1372), tensor(-0.3166), tensor(-1.0016), tensor(-1.0256), tensor(-0.6031), tensor(-1.4444), tensor(-0.6970), tensor(-0.2990), tensor(-0.5207), tensor(-0.3075), tensor(-1.1737), tensor(-1.0035), tensor(-0.2147), tensor(-0.9010), tensor(-0.1519), tensor(-1.0419), tensor(-0.4662), tensor(0.6414), tensor(-0.2764), tensor(-0.3259), tensor(-1.3282), tensor(0.5693), tensor(-0.5503), tensor(-0.3156), tensor(-1.2582), tensor(0.4175), tensor(-0.5973), tensor(-0.3193), tensor(-0.7052), tensor(-0.1913), tensor(-0.8339), tensor(-0.8710), tensor(-0.3944), tensor(-1.3429), tensor(-1.0045), tensor(-0.6438), tensor(-0.98

81it [1:04:12, 47.75s/it]


Start Epoch 81
Rewards: [tensor(-0.9217), tensor(-0.9901), tensor(-1.0811), tensor(-0.6474), tensor(-0.7021), tensor(0.2603), tensor(-0.1329), tensor(-0.7697), tensor(-0.1614), tensor(-0.9628), tensor(-1.3412), tensor(-0.5909), tensor(-1.0855), tensor(-0.9312), tensor(0.0100), tensor(-0.4100), tensor(-0.3350), tensor(0.0765), tensor(-0.7976), tensor(-0.6366), tensor(-0.8711), tensor(-0.2279), tensor(-0.1238), tensor(-0.1718), tensor(-0.2802), tensor(-0.1751), tensor(-0.6644), tensor(-0.1347), tensor(-0.4294), tensor(-0.7279), tensor(-0.5412), tensor(-0.3913), tensor(-0.8725), tensor(-0.1922), tensor(0.0532), tensor(0.1984), tensor(-0.3001), tensor(-0.8307), tensor(-0.7214), tensor(-1.0916), tensor(-0.4673), tensor(0.5181), tensor(-0.8992), tensor(-1.3608), tensor(-0.1285), tensor(-1.0113), tensor(-0.2440), tensor(0.0768), tensor(-0.3135), tensor(-0.1382), tensor(0.2962), tensor(-0.9700), tensor(-1.1585), tensor(-0.9152), tensor(-0.4066), tensor(-0.9578), tensor(-0.6617), tensor(0.1693

82it [1:04:58, 47.28s/it]


Start Epoch 82
Rewards: [tensor(-0.2149), tensor(-0.0889), tensor(-0.0134), tensor(-1.0244), tensor(-0.6048), tensor(-0.9165), tensor(-0.1397), tensor(-0.3228), tensor(-0.7226), tensor(-0.2259), tensor(-0.0355), tensor(-0.8043), tensor(0.1346), tensor(-0.9651), tensor(-0.6910), tensor(-0.5666), tensor(-0.2862), tensor(-0.7854), tensor(0.0074), tensor(-0.0042), tensor(-0.7862), tensor(-0.9026), tensor(-0.2409), tensor(-0.3921), tensor(-0.1372), tensor(-0.1626), tensor(-0.5248), tensor(-1.1868), tensor(-1.1911), tensor(-0.5354), tensor(-0.5744), tensor(-0.2028), tensor(-1.1686), tensor(-0.5857), tensor(-1.0852), tensor(-0.8150), tensor(-0.6070), tensor(0.2317), tensor(-0.2740), tensor(-0.8045), tensor(-0.1420), tensor(-0.9026), tensor(-0.5103), tensor(-0.4635), tensor(-0.2302), tensor(0.0012), tensor(-0.7452), tensor(-0.9235), tensor(-1.2244), tensor(-1.2866), tensor(-0.1130), tensor(-0.5277), tensor(0.1493), tensor(-0.7942), tensor(-0.2018), tensor(-0.1262), tensor(-1.2310), tensor(-1.

83it [1:05:44, 46.81s/it]


Start Epoch 83
Rewards: [tensor(-0.3374), tensor(-0.8975), tensor(-0.4729), tensor(-0.3549), tensor(-0.7781), tensor(-0.8019), tensor(-0.2798), tensor(-0.8688), tensor(0.0123), tensor(-0.5066), tensor(-0.0973), tensor(-1.2325), tensor(-0.3875), tensor(-0.5122), tensor(-0.5138), tensor(-0.2475), tensor(-0.5695), tensor(-0.5849), tensor(-0.0597), tensor(-0.7854), tensor(-1.1706), tensor(-1.0156), tensor(-1.2980), tensor(-0.3877), tensor(-0.6544), tensor(-1.2478), tensor(-0.3549), tensor(-0.5637), tensor(-0.9191), tensor(-0.1057), tensor(-0.2611), tensor(-0.2240), tensor(-0.3454), tensor(-0.2015), tensor(0.0051), tensor(-1.1656), tensor(-0.8226), tensor(-0.7143), tensor(0.1799), tensor(-1.0980), tensor(-0.7922), tensor(0.2021), tensor(-1.1915), tensor(-0.1690), tensor(-0.5209), tensor(-0.6520), tensor(-0.4835), tensor(-0.8742), tensor(0.4541), tensor(-0.9236), tensor(-0.2480), tensor(-0.4752), tensor(-0.9091), tensor(-0.1844), tensor(-0.7198), tensor(0.2506), tensor(-0.1624), tensor(0.01

84it [1:06:26, 45.51s/it]


Start Epoch 84
Rewards: [tensor(-0.7806), tensor(-0.3450), tensor(-0.5354), tensor(-1.2293), tensor(-1.0029), tensor(-0.2422), tensor(-0.7698), tensor(0.1961), tensor(-0.2218), tensor(-0.2036), tensor(-0.4769), tensor(-1.0010), tensor(-0.2005), tensor(-0.3732), tensor(-0.9217), tensor(-0.2188), tensor(-0.8818), tensor(-0.0058), tensor(-0.4235), tensor(-0.5556), tensor(-0.5080), tensor(0.0640), tensor(-0.5877), tensor(-0.4185), tensor(-0.1921), tensor(-0.6952), tensor(-0.7073), tensor(-0.3708), tensor(-0.6426), tensor(-1.1174), tensor(-0.7223), tensor(-0.5673), tensor(-0.9555), tensor(0.0268), tensor(-0.0242), tensor(-0.8148), tensor(-0.2858), tensor(-0.5553), tensor(-0.3458), tensor(-0.6373), tensor(-0.3661), tensor(-0.6240), tensor(-1.2395), tensor(-0.4273), tensor(-0.8534), tensor(0.0260), tensor(-1.4341), tensor(-0.4864), tensor(-0.5420), tensor(-0.2439), tensor(-1.2168), tensor(-0.8630), tensor(-0.9198), tensor(-0.2858), tensor(-0.9922), tensor(-0.7684), tensor(-0.2625), tensor(-0

85it [1:07:15, 46.42s/it]


Start Epoch 85
Rewards: [tensor(-1.2795), tensor(0.0947), tensor(-0.8500), tensor(0.4151), tensor(-0.8549), tensor(-1.2142), tensor(-0.5926), tensor(-0.7662), tensor(-0.7476), tensor(-0.6544), tensor(-0.9842), tensor(-0.1161), tensor(-0.6429), tensor(-0.2936), tensor(-0.8003), tensor(-0.1420), tensor(-0.1375), tensor(-0.7937), tensor(-0.8776), tensor(-1.1533), tensor(-1.1339), tensor(-0.8606), tensor(-0.0973), tensor(-0.3408), tensor(-0.9349), tensor(-0.8989), tensor(-0.1867), tensor(-0.7348), tensor(-0.2517), tensor(-0.0550), tensor(-0.9424), tensor(-0.1544), tensor(-0.7625), tensor(-0.1657), tensor(-0.1859), tensor(-0.2142), tensor(-0.7685), tensor(0.0485), tensor(0.1363), tensor(0.1034), tensor(-0.6641), tensor(-0.7897), tensor(-0.7662), tensor(-0.0480), tensor(-1.0665), tensor(-0.7138), tensor(-0.0965), tensor(0.0532), tensor(-0.6603), tensor(-0.4324), tensor(-0.2236), tensor(-0.5744), tensor(-1.0536), tensor(-1.1388), tensor(-0.0638), tensor(-1.2043), tensor(-1.2053), tensor(-1.1

86it [1:08:02, 46.71s/it]


Start Epoch 86
Rewards: [tensor(-0.4138), tensor(-1.2486), tensor(-0.2021), tensor(-1.3268), tensor(-1.1229), tensor(-1.0113), tensor(0.0158), tensor(-0.6081), tensor(-0.8676), tensor(-1.0132), tensor(-1.0483), tensor(-1.1012), tensor(-1.2705), tensor(-1.2228), tensor(-1.0494), tensor(-1.1745), tensor(-0.7755), tensor(-0.3247), tensor(-0.5652), tensor(-0.5445), tensor(-0.7676), tensor(-0.6641), tensor(-1.2140), tensor(-0.5103), tensor(-1.1021), tensor(-0.1933), tensor(-1.0006), tensor(-1.0361), tensor(-1.2061), tensor(-0.5341), tensor(-0.7571), tensor(-0.6585), tensor(-0.1311), tensor(-1.0351), tensor(0.0131), tensor(-0.5209), tensor(-0.4360), tensor(-0.9071), tensor(-0.7052), tensor(-0.3477), tensor(-1.2846), tensor(-0.9676), tensor(0.0099), tensor(-1.1119), tensor(-0.8525), tensor(-1.2460), tensor(0.0732), tensor(-0.6388), tensor(-1.0276), tensor(0.0540), tensor(-1.0541), tensor(-0.1784), tensor(-0.5097), tensor(-0.0371), tensor(-1.0035), tensor(-0.2302), tensor(-0.8201), tensor(-0.

87it [1:08:50, 47.14s/it]


Start Epoch 87
Rewards: [tensor(-1.3268), tensor(-0.9293), tensor(-0.5577), tensor(-0.7385), tensor(-0.8151), tensor(-1.1915), tensor(-1.2712), tensor(-1.0164), tensor(-0.5020), tensor(-1.0190), tensor(-0.1932), tensor(-0.4669), tensor(-0.5538), tensor(-0.0429), tensor(-0.1081), tensor(-0.5126), tensor(-0.5425), tensor(-0.3955), tensor(-0.8014), tensor(-0.1741), tensor(-1.2912), tensor(-1.3145), tensor(-0.2883), tensor(-0.5092), tensor(-0.1316), tensor(-0.9323), tensor(-1.2860), tensor(-0.0751), tensor(-0.0943), tensor(-0.5926), tensor(-0.7772), tensor(-0.9890), tensor(-0.4715), tensor(-0.7898), tensor(-0.8302), tensor(-0.9854), tensor(-0.1778), tensor(0.1986), tensor(-1.2075), tensor(0.1445), tensor(-0.3109), tensor(-0.5978), tensor(-0.0973), tensor(-0.6432), tensor(0.1346), tensor(-0.5360), tensor(-1.0665), tensor(-1.1944), tensor(-0.5666), tensor(-0.1456), tensor(-0.6617), tensor(-0.5331), tensor(-0.2224), tensor(-0.8543), tensor(-0.2901), tensor(-0.6240), tensor(-0.7339), tensor(-

88it [1:09:41, 48.20s/it]


Start Epoch 88
Rewards: [tensor(-0.8676), tensor(0.0158), tensor(-0.8004), tensor(0.1503), tensor(-1.2003), tensor(-1.0981), tensor(0.0019), tensor(-1.1868), tensor(-0.1775), tensor(-0.4762), tensor(-0.6544), tensor(0.0051), tensor(-1.0015), tensor(-0.5971), tensor(-0.0016), tensor(0.3009), tensor(-0.8753), tensor(0.0155), tensor(0.1986), tensor(-0.6518), tensor(0.2278), tensor(-0.4122), tensor(-1.0164), tensor(-1.0541), tensor(-0.4962), tensor(-0.8235), tensor(-0.9764), tensor(-0.6600), tensor(0.2474), tensor(-0.3364), tensor(-0.6853), tensor(-0.4888), tensor(0.1405), tensor(-0.9617), tensor(-0.1791), tensor(-0.9718), tensor(-1.2093), tensor(-0.6585), tensor(-0.6176), tensor(-0.7028), tensor(-0.8711), tensor(-0.6042), tensor(-0.5154), tensor(-0.2305), tensor(-0.5038), tensor(-0.1195), tensor(-1.0368), tensor(-0.4447), tensor(-0.2788), tensor(-0.8549), tensor(-0.1913), tensor(-0.4005), tensor(-0.2698), tensor(-0.2906), tensor(-1.2562), tensor(-1.2091), tensor(-1.1331), tensor(-0.5465)

89it [1:10:34, 49.61s/it]


Start Epoch 89
Rewards: [tensor(-0.6130), tensor(-1.1543), tensor(-1.1012), tensor(-1.2228), tensor(-0.9205), tensor(-1.1681), tensor(-0.3660), tensor(-1.1670), tensor(-0.4948), tensor(-0.9847), tensor(-0.2859), tensor(-0.9802), tensor(-0.9949), tensor(-0.4907), tensor(-0.4511), tensor(-1.2606), tensor(-1.1410), tensor(-0.8630), tensor(-0.6290), tensor(-0.1864), tensor(-1.1943), tensor(-0.9454), tensor(-0.8245), tensor(-0.2218), tensor(-0.2954), tensor(-0.5877), tensor(-0.0131), tensor(-0.7684), tensor(-0.2777), tensor(-0.2751), tensor(-1.0221), tensor(-0.0645), tensor(-0.7956), tensor(-0.5028), tensor(-0.7590), tensor(0.0330), tensor(-0.6280), tensor(-0.8014), tensor(-1.1827), tensor(0.1387), tensor(-1.0762), tensor(-0.1956), tensor(-0.7772), tensor(-0.2440), tensor(-0.3324), tensor(-1.0164), tensor(-0.1302), tensor(-0.0013), tensor(0.2569), tensor(-0.6438), tensor(-0.5867), tensor(-1.3286), tensor(-0.0122), tensor(-0.4579), tensor(-0.7360), tensor(-0.9940), tensor(-0.9651), tensor(-

90it [1:11:23, 49.57s/it]


Start Epoch 90
Rewards: [tensor(-0.2957), tensor(-0.9827), tensor(-0.8688), tensor(-0.6048), tensor(-0.1244), tensor(0.0540), tensor(-0.7609), tensor(-0.7898), tensor(0.1368), tensor(-0.9651), tensor(-0.6317), tensor(-0.4341), tensor(0.1820), tensor(0.0029), tensor(-0.2149), tensor(-0.8534), tensor(-1.2562), tensor(-0.1775), tensor(-1.2339), tensor(-0.2661), tensor(-1.2164), tensor(-0.1913), tensor(-0.9344), tensor(-0.8992), tensor(-0.5103), tensor(-1.0651), tensor(-0.7482), tensor(0.0286), tensor(-0.7781), tensor(-0.1821), tensor(0.1799), tensor(-0.8885), tensor(-1.4410), tensor(-0.2962), tensor(-0.0950), tensor(-0.4356), tensor(-0.0296), tensor(-0.9556), tensor(0.3912), tensor(0.0661), tensor(-0.2906), tensor(-0.3012), tensor(-0.6593), tensor(-0.7100), tensor(-0.7417), tensor(0.0155), tensor(0.0426), tensor(-1.0619), tensor(-0.6172), tensor(-0.3867), tensor(-0.8696), tensor(-1.2551), tensor(-0.1745), tensor(-0.3012), tensor(-1.0651), tensor(-0.9060), tensor(-0.2883), tensor(0.2278),

91it [1:12:15, 50.13s/it]


Start Epoch 91
Rewards: [tensor(-0.9700), tensor(-1.0339), tensor(-0.6116), tensor(-0.6023), tensor(-0.5469), tensor(-0.4050), tensor(-0.3345), tensor(-1.0899), tensor(-0.7638), tensor(-0.8988), tensor(-1.0022), tensor(-0.9506), tensor(-0.3803), tensor(0.0678), tensor(-1.2606), tensor(-0.2843), tensor(-1.2419), tensor(-0.7282), tensor(0.2040), tensor(-0.0595), tensor(-0.8987), tensor(0.0691), tensor(-1.1493), tensor(-1.4515), tensor(-0.0275), tensor(-0.0864), tensor(-1.0132), tensor(-0.6031), tensor(-1.1673), tensor(-1.0338), tensor(-1.1321), tensor(-0.7974), tensor(-0.9631), tensor(-0.6743), tensor(-0.8312), tensor(-0.5068), tensor(-1.1621), tensor(-1.0500), tensor(-0.6824), tensor(-0.8992), tensor(-1.0156), tensor(-1.0190), tensor(-0.1741), tensor(-0.1369), tensor(-0.4664), tensor(-0.2835), tensor(-0.0242), tensor(-0.0141), tensor(-1.0045), tensor(-0.4016), tensor(-1.2038), tensor(0.1894), tensor(-0.4793), tensor(-0.2425), tensor(-0.0218), tensor(-1.1106), tensor(-0.9483), tensor(-0

92it [1:13:02, 49.20s/it]


Start Epoch 92
Rewards: [tensor(-0.4602), tensor(-0.9345), tensor(-0.7518), tensor(-1.5563), tensor(-0.5161), tensor(-0.9895), tensor(-0.4351), tensor(-0.6366), tensor(0.0877), tensor(-0.4300), tensor(-1.1908), tensor(0.0036), tensor(-0.7225), tensor(-0.5560), tensor(-0.7385), tensor(-1.1407), tensor(-0.9861), tensor(-1.1502), tensor(-0.6929), tensor(-0.3660), tensor(-0.2149), tensor(-0.8696), tensor(-1.2989), tensor(-0.0949), tensor(-0.0275), tensor(-0.6752), tensor(-0.9767), tensor(-0.8992), tensor(-0.3143), tensor(-0.2959), tensor(-1.2437), tensor(-1.2061), tensor(-0.2659), tensor(-0.2539), tensor(0.0295), tensor(-0.2382), tensor(-1.3286), tensor(-0.9975), tensor(-0.2018), tensor(-1.0782), tensor(-0.7480), tensor(-0.4101), tensor(-1.0177), tensor(-1.2192), tensor(-0.8003), tensor(-1.2152), tensor(-0.1625), tensor(-1.3509), tensor(0.0242), tensor(-0.9995), tensor(-0.9283), tensor(0.2662), tensor(-0.6009), tensor(-0.3138), tensor(-0.6772), tensor(-0.9517), tensor(-0.9879), tensor(-0.

93it [1:13:48, 48.23s/it]


Start Epoch 93
Rewards: [tensor(-1.1106), tensor(-0.4145), tensor(-0.6451), tensor(-0.6385), tensor(-0.5241), tensor(-1.2794), tensor(-1.2146), tensor(-1.2087), tensor(-1.0340), tensor(-0.8550), tensor(-0.0619), tensor(-0.5080), tensor(-0.8454), tensor(-0.2100), tensor(-0.3768), tensor(-0.2194), tensor(-0.5279), tensor(-1.0893), tensor(-0.5080), tensor(-0.0645), tensor(-0.0418), tensor(-0.8578), tensor(-0.4351), tensor(-0.1299), tensor(-0.1244), tensor(0.1961), tensor(-0.9771), tensor(-0.9827), tensor(-1.1352), tensor(-0.8201), tensor(-0.5248), tensor(-0.8071), tensor(-0.5428), tensor(-0.9282), tensor(-0.3630), tensor(-0.2036), tensor(-0.5049), tensor(-0.3603), tensor(-0.8644), tensor(-0.8407), tensor(-1.0132), tensor(-0.9208), tensor(-1.0557), tensor(-0.3258), tensor(0.1029), tensor(-0.0042), tensor(-0.1129), tensor(-0.9162), tensor(-0.8301), tensor(-0.2754), tensor(-0.3324), tensor(-0.6543), tensor(-0.6530), tensor(-0.5507), tensor(0.0828), tensor(-0.4627), tensor(-0.2346), tensor(-

94it [1:14:41, 49.68s/it]


Start Epoch 94
Rewards: [tensor(-1.2022), tensor(-0.9154), tensor(-0.5877), tensor(-0.8141), tensor(0.1043), tensor(-0.5240), tensor(-0.9730), tensor(-0.1169), tensor(-0.9257), tensor(-0.9320), tensor(-0.4273), tensor(-0.5272), tensor(-0.3666), tensor(-0.6987), tensor(-0.6792), tensor(-1.3310), tensor(0.1693), tensor(-0.8274), tensor(0.1083), tensor(-1.1417), tensor(-0.1511), tensor(-0.3014), tensor(0.0036), tensor(-0.2036), tensor(-1.2091), tensor(-0.4625), tensor(-0.1011), tensor(-0.4120), tensor(-0.3732), tensor(0.0296), tensor(-0.6759), tensor(-0.4790), tensor(-0.3350), tensor(0.0399), tensor(-0.7620), tensor(-0.9992), tensor(-0.3187), tensor(-0.2797), tensor(-1.0612), tensor(0.1444), tensor(-0.4881), tensor(0.1803), tensor(-0.5244), tensor(-0.4386), tensor(-1.1337), tensor(0.3151), tensor(-0.1057), tensor(0.2317), tensor(-0.1214), tensor(-0.5327), tensor(-0.4076), tensor(-0.1420), tensor(-0.1590), tensor(-1.2056), tensor(-0.5305), tensor(0.1995), tensor(-0.9890), tensor(-0.7629),

95it [1:15:30, 49.47s/it]


Start Epoch 95
Rewards: [tensor(-0.3379), tensor(-0.8243), tensor(-0.7745), tensor(-0.2382), tensor(-1.1642), tensor(0.3635), tensor(-0.8213), tensor(-0.8784), tensor(-0.3652), tensor(0.1222), tensor(-0.3454), tensor(-0.2893), tensor(-0.9059), tensor(0.0654), tensor(-0.8729), tensor(0.1030), tensor(-0.5792), tensor(-0.0406), tensor(-1.2055), tensor(-0.6240), tensor(-0.1032), tensor(-0.2754), tensor(-1.1023), tensor(-0.6444), tensor(-0.3875), tensor(-1.2912), tensor(0.3780), tensor(-0.5125), tensor(-0.0543), tensor(-0.5701), tensor(-0.7698), tensor(-0.8054), tensor(-0.5839), tensor(-0.8872), tensor(-0.2474), tensor(-0.7027), tensor(-0.7798), tensor(0.3971), tensor(-0.8742), tensor(-0.5482), tensor(-0.1244), tensor(0.1631), tensor(-1.2276), tensor(-0.2877), tensor(-1.2562), tensor(-0.9556), tensor(-0.2167), tensor(-0.7360), tensor(-0.6666), tensor(-0.2440), tensor(-1.0016), tensor(-1.2325), tensor(-0.9091), tensor(-0.3068), tensor(-0.9799), tensor(-0.3680), tensor(-0.3324), tensor(-1.30

96it [1:16:22, 50.36s/it]


Start Epoch 96
Rewards: [tensor(-0.6645), tensor(-1.2181), tensor(-0.1382), tensor(-1.1814), tensor(-0.4547), tensor(-0.3007), tensor(-0.6538), tensor(-1.4086), tensor(-0.2174), tensor(0.0227), tensor(-0.9521), tensor(-0.3668), tensor(0.2074), tensor(-0.5068), tensor(-0.4669), tensor(-0.8019), tensor(-0.2986), tensor(-1.1537), tensor(-0.4228), tensor(-0.8715), tensor(-0.3650), tensor(-1.0150), tensor(-0.0936), tensor(-0.1121), tensor(-0.5291), tensor(-0.6373), tensor(-0.6617), tensor(-0.0648), tensor(-0.9599), tensor(-0.1715), tensor(-1.1949), tensor(-0.6853), tensor(-0.7419), tensor(-1.0969), tensor(-0.5027), tensor(-0.6385), tensor(-0.1032), tensor(-0.8491), tensor(-0.1844), tensor(-0.5343), tensor(-0.0889), tensor(0.0989), tensor(-1.1352), tensor(0.4885), tensor(-0.2774), tensor(0.0702), tensor(-0.1597), tensor(-0.4360), tensor(-1.0739), tensor(-0.2646), tensor(-0.4898), tensor(-0.1669), tensor(-1.1278), tensor(-0.3153), tensor(-0.5512), tensor(-0.2462), tensor(-0.0275), tensor(-0.

97it [1:17:15, 50.92s/it]


Start Epoch 97
Rewards: [tensor(-0.7771), tensor(-0.2913), tensor(-1.0060), tensor(-0.1295), tensor(0.0623), tensor(-0.8476), tensor(-0.4244), tensor(-0.8885), tensor(-0.4274), tensor(-1.1309), tensor(-1.1119), tensor(-0.6325), tensor(-1.0541), tensor(-0.1044), tensor(-0.8820), tensor(-0.8253), tensor(-1.2396), tensor(-0.8490), tensor(-0.1293), tensor(-0.7362), tensor(0.0528), tensor(-0.2859), tensor(-0.3833), tensor(-0.1747), tensor(-0.2560), tensor(-0.1599), tensor(0.2187), tensor(-0.3208), tensor(-0.1404), tensor(-0.6821), tensor(0.0691), tensor(-1.2038), tensor(-0.7360), tensor(-0.7009), tensor(-0.7628), tensor(-0.3264), tensor(-1.0782), tensor(-0.9010), tensor(-0.7052), tensor(-0.5420), tensor(-0.1671), tensor(-0.7098), tensor(0.2309), tensor(-0.9293), tensor(-0.6009), tensor(0.0947), tensor(-0.8980), tensor(-1.1802), tensor(0.0462), tensor(0.0765), tensor(-0.6752), tensor(-0.8369), tensor(-0.1722), tensor(0.0175), tensor(-0.7569), tensor(-0.4005), tensor(-1.0579), tensor(-0.0901

98it [1:18:03, 50.03s/it]


Start Epoch 98
Rewards: [tensor(0.1346), tensor(-0.3343), tensor(-0.5538), tensor(-0.9648), tensor(-0.5857), tensor(-0.0496), tensor(-0.1663), tensor(0.0416), tensor(-0.5849), tensor(-0.6728), tensor(0.2040), tensor(0.0793), tensor(-0.8869), tensor(-0.5480), tensor(-0.7772), tensor(-0.2500), tensor(-1.2043), tensor(0.2283), tensor(-1.0790), tensor(-0.0406), tensor(-0.6923), tensor(0.1749), tensor(-0.2477), tensor(-0.3681), tensor(-0.3666), tensor(-1.0274), tensor(-0.9249), tensor(-0.9861), tensor(-0.6070), tensor(-0.3012), tensor(-0.6879), tensor(-0.9317), tensor(-0.3521), tensor(-0.3332), tensor(-0.4196), tensor(-0.5125), tensor(-0.4922), tensor(-0.8929), tensor(-0.7187), tensor(0.2309), tensor(-0.7898), tensor(-1.2912), tensor(-1.3268), tensor(-0.6737), tensor(-0.6062), tensor(0.1052), tensor(0.1763), tensor(-0.5117), tensor(-0.4782), tensor(-1.1352), tensor(-1.3499), tensor(-0.9428), tensor(-0.5985), tensor(-0.0824), tensor(-0.5629), tensor(-0.3009), tensor(-0.4460), tensor(-0.9428

99it [1:18:54, 50.52s/it]


Start Epoch 99
Rewards: [tensor(-0.2979), tensor(-0.4596), tensor(-0.3006), tensor(0.1382), tensor(-0.5005), tensor(-0.6258), tensor(-0.3344), tensor(-0.8126), tensor(-0.9716), tensor(-0.7677), tensor(-0.5425), tensor(-1.0573), tensor(0.1445), tensor(0.1405), tensor(-0.5710), tensor(-0.3818), tensor(-1.1106), tensor(0.1445), tensor(-0.9414), tensor(-1.2352), tensor(-0.5857), tensor(-0.1671), tensor(-0.8376), tensor(-0.2686), tensor(-0.5241), tensor(-0.1751), tensor(-0.2541), tensor(-1.0032), tensor(-0.9802), tensor(-0.3743), tensor(-0.9323), tensor(-0.4628), tensor(-1.2325), tensor(-0.4313), tensor(-1.2263), tensor(0.0654), tensor(-0.6062), tensor(-0.5010), tensor(-0.5749), tensor(-0.8045), tensor(-0.1715), tensor(0.1066), tensor(-0.1921), tensor(-0.0603), tensor(-0.8540), tensor(-1.3145), tensor(-0.1671), tensor(-1.0969), tensor(-1.1615), tensor(-0.2218), tensor(0.3995), tensor(-0.6062), tensor(-0.1706), tensor(0.3292), tensor(-1.3062), tensor(-0.6726), tensor(-0.8694), tensor(-0.638

100it [1:19:44, 50.29s/it]


Start Epoch 100
Rewards: [tensor(-1.0339), tensor(-0.2462), tensor(-0.4138), tensor(-1.2022), tensor(0.0640), tensor(-0.1833), tensor(-0.5751), tensor(-0.4228), tensor(-0.3428), tensor(-0.5804), tensor(-0.0251), tensor(-0.0895), tensor(-1.3499), tensor(-1.1340), tensor(0.1052), tensor(-0.0218), tensor(-1.0080), tensor(-0.9555), tensor(-1.3902), tensor(-0.7454), tensor(-0.4155), tensor(-0.3818), tensor(-0.1833), tensor(-0.3545), tensor(-1.2607), tensor(-0.0242), tensor(-1.0244), tensor(-1.0660), tensor(-0.2053), tensor(-1.1915), tensor(-0.3209), tensor(-0.9759), tensor(-0.0384), tensor(-0.9125), tensor(-0.5754), tensor(-0.3723), tensor(-0.2224), tensor(-0.8226), tensor(-0.8741), tensor(-0.3102), tensor(-0.3050), tensor(0.0051), tensor(-0.9347), tensor(-0.5214), tensor(-1.0119), tensor(-0.1893), tensor(-0.0815), tensor(-0.9827), tensor(0.2803), tensor(-0.2835), tensor(0.0100), tensor(-1.1335), tensor(-0.6821), tensor(0.0268), tensor(-1.3317), tensor(-0.5248), tensor(-0.0979), tensor(-1.

101it [1:20:31, 49.33s/it]


Start Epoch 101
Rewards: [tensor(-0.1178), tensor(-1.1585), tensor(-0.5980), tensor(-0.3582), tensor(0.0689), tensor(0.3216), tensor(-0.9628), tensor(-0.4062), tensor(-0.9628), tensor(-1.3635), tensor(0.2187), tensor(-1.2912), tensor(-0.8344), tensor(-0.0679), tensor(-0.8376), tensor(0.0174), tensor(-1.0615), tensor(-0.5154), tensor(-1.2325), tensor(-0.5240), tensor(-0.8549), tensor(-1.0100), tensor(-0.4892), tensor(-0.7677), tensor(-0.1882), tensor(0.1445), tensor(-0.9556), tensor(-0.2537), tensor(-0.9879), tensor(-1.0478), tensor(-1.1331), tensor(-0.7677), tensor(-0.7511), tensor(-0.6366), tensor(-0.0805), tensor(-0.7684), tensor(-0.8992), tensor(-0.9102), tensor(-1.1829), tensor(-0.5226), tensor(-0.3211), tensor(-0.2211), tensor(-0.5165), tensor(-0.5666), tensor(-0.9547), tensor(-0.4635), tensor(-1.0979), tensor(0.1909), tensor(-0.2368), tensor(-0.2382), tensor(-0.3343), tensor(-0.7169), tensor(-0.7629), tensor(-0.5840), tensor(-1.2325), tensor(-0.8201), tensor(-0.5669), tensor(0.0

102it [1:21:19, 48.86s/it]


Start Epoch 102
Rewards: [tensor(-0.7417), tensor(-0.8601), tensor(-0.0242), tensor(-0.6617), tensor(-0.0964), tensor(0.1708), tensor(-0.3949), tensor(-0.2188), tensor(-0.9428), tensor(-0.5077), tensor(-0.1365), tensor(-0.3408), tensor(-0.9186), tensor(0.0490), tensor(-1.1500), tensor(-0.3208), tensor(-1.2562), tensor(-0.2132), tensor(-0.9982), tensor(-0.7021), tensor(-0.2560), tensor(-0.3111), tensor(-0.5849), tensor(-0.2053), tensor(-0.9791), tensor(-1.2203), tensor(-0.8238), tensor(-0.5514), tensor(-1.5563), tensor(-0.1702), tensor(-0.5533), tensor(0.0280), tensor(-0.2475), tensor(-0.2149), tensor(-1.0312), tensor(-0.8372), tensor(-0.7459), tensor(-0.8197), tensor(-0.9612), tensor(-1.0145), tensor(-0.6504), tensor(-0.6609), tensor(-0.0042), tensor(-0.7911), tensor(-0.2705), tensor(-0.6918), tensor(-0.1485), tensor(-0.3309), tensor(-0.5279), tensor(-0.2323), tensor(-0.3201), tensor(-0.4628), tensor(-1.1360), tensor(-1.1517), tensor(-0.7898), tensor(-0.0480), tensor(-0.1817), tensor(

103it [1:22:06, 48.34s/it]


Start Epoch 103
Rewards: [tensor(-0.9821), tensor(-0.5480), tensor(-0.3913), tensor(-0.4705), tensor(-1.0666), tensor(-0.5341), tensor(-0.5327), tensor(-0.0247), tensor(-0.2858), tensor(-0.5465), tensor(-0.8750), tensor(-0.1177), tensor(-0.6849), tensor(-0.5615), tensor(-1.2003), tensor(-0.0467), tensor(-1.0244), tensor(-0.4264), tensor(-1.3392), tensor(-0.4313), tensor(-0.0920), tensor(-0.5044), tensor(-1.1708), tensor(0.0362), tensor(-0.1433), tensor(-0.2611), tensor(0.5631), tensor(-0.7256), tensor(-0.2931), tensor(-0.3039), tensor(-0.7521), tensor(-1.1864), tensor(-0.2428), tensor(-0.1938), tensor(0.1387), tensor(-1.1741), tensor(-1.0514), tensor(-1.0638), tensor(-1.2325), tensor(-0.8386), tensor(-0.2364), tensor(-0.9699), tensor(0.1708), tensor(-1.3203), tensor(-0.3407), tensor(-0.4066), tensor(-0.2661), tensor(-1.0276), tensor(-0.3216), tensor(-0.8008), tensor(-0.5697), tensor(-0.2205), tensor(0.0323), tensor(-0.2301), tensor(-0.7466), tensor(-0.7873), tensor(-1.2846), tensor(-0

104it [1:22:55, 48.54s/it]


Start Epoch 104
Rewards: [tensor(0.1894), tensor(0.2282), tensor(-0.8987), tensor(-1.1864), tensor(-1.0955), tensor(-0.1722), tensor(-0.6201), tensor(-0.8818), tensor(-0.9771), tensor(-1.0096), tensor(-1.2142), tensor(0.0485), tensor(-0.5291), tensor(-0.0068), tensor(-0.5953), tensor(-0.8356), tensor(-0.6824), tensor(-0.0336), tensor(-1.0395), tensor(-0.7684), tensor(-1.0955), tensor(-0.9890), tensor(-0.7459), tensor(0.5933), tensor(-0.2122), tensor(-0.0853), tensor(-1.2043), tensor(-0.5872), tensor(-0.3984), tensor(-1.2181), tensor(-0.2221), tensor(-0.8223), tensor(-0.6319), tensor(-1.0093), tensor(-0.7769), tensor(-0.7961), tensor(-0.0967), tensor(-0.9125), tensor(0.0732), tensor(-0.4509), tensor(-0.4662), tensor(-0.8142), tensor(-0.6664), tensor(-0.6126), tensor(-0.7214), tensor(0.0610), tensor(-0.3321), tensor(-1.0033), tensor(-0.4812), tensor(-0.9302), tensor(-0.0936), tensor(0.2962), tensor(-0.4397), tensor(-0.4720), tensor(-0.0920), tensor(0.4485), tensor(-0.8057), tensor(-0.70

105it [1:23:43, 48.48s/it]


Start Epoch 105
Rewards: [tensor(-0.4277), tensor(-0.1626), tensor(-0.5666), tensor(-0.8369), tensor(-1.2917), tensor(-0.3060), tensor(-0.3241), tensor(-0.3545), tensor(-0.8301), tensor(-0.7250), tensor(-0.8105), tensor(-1.0987), tensor(-0.2046), tensor(0.1730), tensor(-0.9333), tensor(-0.0855), tensor(-0.1881), tensor(-0.2308), tensor(-0.1749), tensor(-0.2126), tensor(-0.7309), tensor(-1.0564), tensor(-0.8412), tensor(-0.5751), tensor(-0.7058), tensor(-0.5125), tensor(-0.1711), tensor(-0.8022), tensor(0.1493), tensor(-0.3665), tensor(-0.7325), tensor(-0.6821), tensor(-0.4397), tensor(0.4603), tensor(-1.3376), tensor(-0.0042), tensor(-0.6656), tensor(-0.2776), tensor(-1.1218), tensor(-1.0520), tensor(-0.5015), tensor(-1.0256), tensor(-0.5041), tensor(-1.1357), tensor(0.0416), tensor(-0.4196), tensor(0.2317), tensor(-1.1931), tensor(-0.4016), tensor(-0.1638), tensor(0.3635), tensor(0.1799), tensor(-0.3389), tensor(-0.1753), tensor(-0.8742), tensor(-0.2798), tensor(-0.0973), tensor(-0.2

106it [1:24:30, 48.01s/it]


Start Epoch 106
Rewards: [tensor(-0.8820), tensor(0.0606), tensor(-0.2673), tensor(-0.6240), tensor(-0.6790), tensor(-0.3879), tensor(-0.6058), tensor(-1.0113), tensor(-1.0390), tensor(-0.4796), tensor(-1.1931), tensor(-1.2445), tensor(-0.1285), tensor(-0.8185), tensor(-0.9148), tensor(-0.7912), tensor(-0.2439), tensor(-0.2368), tensor(-0.0231), tensor(-0.2798), tensor(-1.1316), tensor(-1.0010), tensor(-0.1244), tensor(0.3009), tensor(-0.0910), tensor(-0.2165), tensor(-0.3576), tensor(-0.8491), tensor(-0.6638), tensor(-0.4433), tensor(-1.3790), tensor(-0.4461), tensor(-0.1626), tensor(-0.5664), tensor(-1.0669), tensor(-0.9815), tensor(-0.0905), tensor(-0.4915), tensor(-0.5377), tensor(-0.4638), tensor(-0.9020), tensor(-1.1580), tensor(-0.5118), tensor(-0.2285), tensor(-0.2368), tensor(-0.6641), tensor(-1.3497), tensor(-0.6225), tensor(-1.1726), tensor(0.5933), tensor(-0.0117), tensor(-0.8950), tensor(-0.9382), tensor(-0.3549), tensor(-1.3268), tensor(-0.2284), tensor(0.0332), tensor(-

107it [1:25:24, 49.83s/it]


Start Epoch 107
Rewards: [tensor(-0.9063), tensor(-0.6325), tensor(-1.3499), tensor(-0.9344), tensor(-0.2242), tensor(-0.9685), tensor(-1.0557), tensor(0.0277), tensor(-0.3006), tensor(-1.1092), tensor(-0.6603), tensor(-1.1236), tensor(-0.2756), tensor(-0.1295), tensor(-0.5872), tensor(-1.1171), tensor(-0.1990), tensor(-0.1177), tensor(-0.7215), tensor(0.2979), tensor(-0.5023), tensor(-0.0867), tensor(-0.1049), tensor(-1.0811), tensor(-0.0039), tensor(0.1438), tensor(-0.9835), tensor(-0.6645), tensor(-0.1133), tensor(-0.4277), tensor(-0.2308), tensor(-0.3875), tensor(-1.1651), tensor(-0.1488), tensor(-0.8035), tensor(-0.6734), tensor(-0.9895), tensor(-1.1917), tensor(-1.2562), tensor(-1.3268), tensor(-0.6426), tensor(-0.3921), tensor(-0.2167), tensor(-0.7448), tensor(-0.6432), tensor(-0.3240), tensor(-0.4653), tensor(-0.8718), tensor(-0.5354), tensor(-0.7100), tensor(-0.6737), tensor(0.1382), tensor(-0.9187), tensor(-0.4881), tensor(-0.7590), tensor(-0.7225), tensor(-0.6759), tensor(0

108it [1:26:08, 48.09s/it]


Start Epoch 108
Rewards: [tensor(-1.0674), tensor(-0.3450), tensor(0.1553), tensor(-0.7174), tensor(-0.0288), tensor(-0.6282), tensor(0.2501), tensor(-0.1302), tensor(-0.6816), tensor(-0.4743), tensor(-0.3545), tensor(-0.3293), tensor(0.1444), tensor(-0.0609), tensor(-0.2887), tensor(0.0543), tensor(-0.7215), tensor(-0.1718), tensor(0.1382), tensor(-0.5266), tensor(-0.5867), tensor(-0.9412), tensor(-0.0247), tensor(-0.5469), tensor(-0.4460), tensor(-0.3111), tensor(-0.8237), tensor(-0.7562), tensor(-0.6952), tensor(-0.3579), tensor(-0.1485), tensor(-0.8344), tensor(-0.9301), tensor(-1.0177), tensor(0.1364), tensor(-0.0278), tensor(-0.3301), tensor(-0.2267), tensor(-0.1690), tensor(-0.2929), tensor(0.0242), tensor(-0.0908), tensor(-0.2029), tensor(-0.5143), tensor(-1.0030), tensor(-0.3286), tensor(-0.5797), tensor(-0.2783), tensor(0.1364), tensor(-0.9855), tensor(0.4064), tensor(-0.9550), tensor(-0.2883), tensor(-0.7713), tensor(-0.4512), tensor(-0.9922), tensor(-0.9093), tensor(0.0971

109it [1:26:56, 48.05s/it]


Start Epoch 109
Rewards: [tensor(-0.9254), tensor(-0.6603), tensor(-0.5141), tensor(-0.3913), tensor(-0.5553), tensor(-1.2956), tensor(-0.5279), tensor(-0.7073), tensor(-1.4031), tensor(0.0543), tensor(-1.0412), tensor(0.0964), tensor(-0.1754), tensor(-0.5637), tensor(-0.0870), tensor(-0.3198), tensor(-0.6855), tensor(-1.0907), tensor(0.0989), tensor(-0.4100), tensor(-0.2911), tensor(-1.1352), tensor(-0.2018), tensor(-1.0390), tensor(-0.0428), tensor(-0.1982), tensor(-0.9506), tensor(-0.3666), tensor(-1.0338), tensor(-0.8293), tensor(-0.9412), tensor(0.4151), tensor(-1.2142), tensor(-0.2109), tensor(-0.0231), tensor(-1.0156), tensor(-0.5840), tensor(-0.4782), tensor(0.0426), tensor(-0.0317), tensor(0.0528), tensor(-0.5276), tensor(-0.3149), tensor(-0.1882), tensor(-1.2099), tensor(-1.2860), tensor(-1.1944), tensor(-0.1358), tensor(-0.3964), tensor(-0.1302), tensor(-0.8382), tensor(-0.8338), tensor(-0.4351), tensor(-0.4511), tensor(-0.0785), tensor(-0.1644), tensor(-0.4601), tensor(-0.

110it [1:27:45, 48.36s/it]


Start Epoch 110
Rewards: [tensor(0.0698), tensor(-1.1053), tensor(-1.2601), tensor(-0.9217), tensor(-1.0114), tensor(-1.0338), tensor(-0.1741), tensor(-0.8776), tensor(-0.7235), tensor(-1.3576), tensor(-0.1365), tensor(-0.2906), tensor(0.0036), tensor(-0.7640), tensor(-0.5585), tensor(-0.2929), tensor(-0.9986), tensor(-0.6045), tensor(-0.8008), tensor(-0.1922), tensor(-0.8784), tensor(-1.2146), tensor(-0.6385), tensor(-0.5338), tensor(-0.2272), tensor(-1.1247), tensor(0.2040), tensor(-0.0229), tensor(-0.6351), tensor(-0.4840), tensor(-0.4501), tensor(-0.9589), tensor(-0.9968), tensor(0.3009), tensor(-0.7426), tensor(-0.5971), tensor(-0.8283), tensor(-0.8008), tensor(-0.9089), tensor(-0.3477), tensor(-0.9282), tensor(-0.8491), tensor(-0.4915), tensor(-0.9869), tensor(-1.1562), tensor(-0.4395), tensor(-0.8253), tensor(0.2505), tensor(-0.8664), tensor(-0.9627), tensor(-0.8831), tensor(-0.5459), tensor(-0.0760), tensor(-0.3312), tensor(-0.3615), tensor(-0.3618), tensor(0.3457), tensor(-0.

111it [1:28:32, 47.82s/it]


Start Epoch 111
Rewards: [tensor(-0.3325), tensor(-0.7325), tensor(-0.1913), tensor(-0.9651), tensor(-0.0355), tensor(-0.4450), tensor(-0.3231), tensor(-0.6174), tensor(-0.4418), tensor(0.0428), tensor(-1.0979), tensor(-0.4050), tensor(-0.0336), tensor(-0.1156), tensor(-0.0901), tensor(-0.5917), tensor(-0.7569), tensor(-0.9479), tensor(-0.3818), tensor(-0.7488), tensor(-0.4742), tensor(-0.3198), tensor(-0.7809), tensor(-0.5266), tensor(-0.4832), tensor(0.2501), tensor(0.0737), tensor(0.3971), tensor(0.0132), tensor(-0.4389), tensor(-0.3603), tensor(0.1029), tensor(-0.8987), tensor(-0.4123), tensor(-0.7302), tensor(-1.3499), tensor(-0.5538), tensor(-1.3929), tensor(-0.7111), tensor(-0.5599), tensor(-1.2303), tensor(-0.4426), tensor(-0.7745), tensor(-0.8525), tensor(-0.6544), tensor(-1.0756), tensor(-0.1461), tensor(-0.3917), tensor(-0.6685), tensor(-0.7488), tensor(-0.1161), tensor(-0.7897), tensor(-0.1961), tensor(-0.4650), tensor(-0.1806), tensor(-0.9342), tensor(-0.0418), tensor(-0.

112it [1:29:19, 47.49s/it]


Start Epoch 112
Rewards: [tensor(-0.2901), tensor(-0.5170), tensor(-0.7052), tensor(-0.9954), tensor(-0.9759), tensor(-0.8490), tensor(-0.7313), tensor(-0.4846), tensor(-0.4581), tensor(-0.9643), tensor(-0.7620), tensor(0.0523), tensor(-1.0776), tensor(-0.7032), tensor(-0.6510), tensor(0.2404), tensor(-1.5563), tensor(-0.5749), tensor(-0.6317), tensor(-0.3428), tensor(-0.3633), tensor(0.0036), tensor(-0.7897), tensor(-0.5319), tensor(-0.6385), tensor(-0.0142), tensor(-0.7111), tensor(-0.4891), tensor(-0.3259), tensor(-0.3615), tensor(-0.4891), tensor(-0.6518), tensor(-1.2303), tensor(-1.1092), tensor(-1.0010), tensor(-1.1543), tensor(-0.1646), tensor(-0.2359), tensor(-0.3805), tensor(-0.4653), tensor(-1.1965), tensor(-0.3522), tensor(0.0066), tensor(-0.6540), tensor(-1.0132), tensor(0.2317), tensor(0.1574), tensor(-0.7009), tensor(-0.0275), tensor(-0.3958), tensor(-0.3390), tensor(-0.1626), tensor(-0.2499), tensor(-0.2918), tensor(-0.6417), tensor(-1.2303), tensor(-1.1377), tensor(-0.

113it [1:30:04, 47.00s/it]


Start Epoch 113
Rewards: [tensor(-0.2943), tensor(-1.1569), tensor(-0.1204), tensor(-0.2522), tensor(-0.9282), tensor(0.0260), tensor(-0.6449), tensor(-0.7459), tensor(-0.3987), tensor(-0.1032), tensor(-0.5553), tensor(-0.8810), tensor(-0.2698), tensor(-1.2478), tensor(-0.4155), tensor(-0.7360), tensor(-0.1195), tensor(0.0364), tensor(-0.7419), tensor(-0.7862), tensor(0.0879), tensor(0.0897), tensor(-0.3233), tensor(-1.1882), tensor(-1.2058), tensor(-0.4635), tensor(-1.0281), tensor(-0.3875), tensor(-0.9986), tensor(0.2871), tensor(-0.2546), tensor(-1.1543), tensor(-0.9890), tensor(-0.7781), tensor(-0.6290), tensor(-0.3233), tensor(-0.7898), tensor(-0.1938), tensor(-0.6603), tensor(0.3009), tensor(-0.1293), tensor(-1.3268), tensor(-0.8093), tensor(-0.1057), tensor(-1.0043), tensor(-0.9951), tensor(-0.8847), tensor(-0.8612), tensor(-1.2244), tensor(-0.4604), tensor(0.5131), tensor(-0.5074), tensor(-0.3837), tensor(-0.6042), tensor(-0.6538), tensor(-0.2357), tensor(-0.7548), tensor(-0.7

114it [1:30:57, 48.58s/it]


Start Epoch 114
Rewards: [tensor(-0.9269), tensor(-1.2562), tensor(-1.1965), tensor(-1.0035), tensor(0.0843), tensor(-0.4050), tensor(-0.8811), tensor(-0.1347), tensor(0.1052), tensor(-0.3050), tensor(-0.5657), tensor(-0.1834), tensor(-0.3969), tensor(-0.0132), tensor(0.0895), tensor(-0.7279), tensor(-0.8019), tensor(-0.6540), tensor(-1.1544), tensor(-0.7073), tensor(-0.5097), tensor(0.1766), tensor(0.0118), tensor(-0.8109), tensor(0.0415), tensor(0.3823), tensor(-0.0512), tensor(-0.7942), tensor(-0.1881), tensor(-0.7841), tensor(-0.0039), tensor(0.0588), tensor(0.0416), tensor(-0.0666), tensor(-0.6520), tensor(-0.1626), tensor(-0.2911), tensor(-0.4376), tensor(0.3448), tensor(-0.1663), tensor(-0.5248), tensor(-0.6706), tensor(-0.9993), tensor(-0.2704), tensor(-0.9890), tensor(-1.2165), tensor(-0.8921), tensor(-0.9651), tensor(-1.2065), tensor(-0.5759), tensor(-0.8275), tensor(-0.7934), tensor(-1.0855), tensor(-0.1864), tensor(-0.4244), tensor(-1.0762), tensor(0.2506), tensor(0.0296),

115it [1:31:44, 48.07s/it]


Start Epoch 115
Rewards: [tensor(-0.5797), tensor(-1.3227), tensor(-0.3896), tensor(-1.2562), tensor(-0.4360), tensor(-1.3888), tensor(-0.9638), tensor(-0.6716), tensor(-0.4450), tensor(0.3501), tensor(-1.2846), tensor(-0.4141), tensor(-0.9854), tensor(0.0296), tensor(-0.7111), tensor(-0.4062), tensor(-1.0638), tensor(0.1793), tensor(-0.5382), tensor(-1.1864), tensor(-0.1606), tensor(-0.0697), tensor(-1.0361), tensor(-0.7974), tensor(-0.5490), tensor(-0.6779), tensor(-0.4842), tensor(-0.4901), tensor(-0.0903), tensor(-0.4341), tensor(0.3402), tensor(-0.4509), tensor(-0.2906), tensor(-0.8820), tensor(0.0217), tensor(-1.1551), tensor(-0.1638), tensor(-0.4138), tensor(-1.2043), tensor(-1.0530), tensor(-0.5010), tensor(-0.7073), tensor(-0.6716), tensor(-0.0041), tensor(-1.3268), tensor(-0.4835), tensor(-0.2520), tensor(-0.8893), tensor(0.1825), tensor(-0.8151), tensor(-0.1420), tensor(0.0066), tensor(-0.2615), tensor(-1.2347), tensor(-0.9821), tensor(-0.4527), tensor(-1.0519), tensor(-0.1

116it [1:32:31, 47.94s/it]


Start Epoch 116
Rewards: [tensor(-0.2990), tensor(-0.7831), tensor(-0.4379), tensor(-0.0384), tensor(-0.4547), tensor(-0.7153), tensor(-0.3875), tensor(-0.9424), tensor(-0.1177), tensor(-1.1864), tensor(-0.8407), tensor(-0.1833), tensor(0.0585), tensor(0.3146), tensor(-1.0784), tensor(-0.5571), tensor(-0.3312), tensor(-0.9827), tensor(-0.8980), tensor(-1.1372), tensor(-0.3506), tensor(0.1289), tensor(-0.2835), tensor(-0.3149), tensor(-0.1833), tensor(-0.5804), tensor(0.0171), tensor(-0.5279), tensor(-1.0151), tensor(-0.5043), tensor(-1.1864), tensor(-0.7662), tensor(-0.1242), tensor(-0.2236), tensor(0.1127), tensor(-0.1690), tensor(-0.4386), tensor(-0.5911), tensor(0.5631), tensor(-0.4705), tensor(-0.5584), tensor(-0.4244), tensor(-1.1689), tensor(-0.9232), tensor(-0.2005), tensor(-1.2582), tensor(-1.1428), tensor(-0.6324), tensor(-0.6245), tensor(-0.1921), tensor(-0.1937), tensor(-0.1893), tensor(-0.3012), tensor(-0.8766), tensor(-0.5077), tensor(-0.1109), tensor(-0.2135), tensor(0.3

117it [1:33:23, 49.01s/it]


Start Epoch 117
Rewards: [tensor(-0.6530), tensor(-0.3271), tensor(0.0221), tensor(-1.1864), tensor(-0.7027), tensor(-0.4796), tensor(-0.9275), tensor(0.1034), tensor(-1.0043), tensor(-1.0100), tensor(0.0091), tensor(-0.6790), tensor(-1.0979), tensor(-1.2105), tensor(-0.3708), tensor(-0.8514), tensor(-0.8022), tensor(-1.1653), tensor(0.0905), tensor(-1.0544), tensor(-0.1156), tensor(-0.8139), tensor(-0.2122), tensor(-0.4729), tensor(-0.3321), tensor(-0.0526), tensor(-1.0958), tensor(-0.2135), tensor(-0.4625), tensor(-0.2440), tensor(-0.7057), tensor(-0.8711), tensor(-0.4512), tensor(-0.8644), tensor(-1.2233), tensor(-1.0734), tensor(-0.7761), tensor(-0.5551), tensor(-0.5081), tensor(-1.1544), tensor(-0.8429), tensor(-0.7445), tensor(-0.8578), tensor(-0.3949), tensor(-1.2142), tensor(-0.5221), tensor(-0.2515), tensor(-0.9604), tensor(-0.9759), tensor(-0.2690), tensor(-0.0910), tensor(0.0123), tensor(-0.7662), tensor(-0.4705), tensor(-0.1718), tensor(-0.2440), tensor(-0.6695), tensor(-0

118it [1:34:18, 50.74s/it]


Start Epoch 118
Rewards: [tensor(-0.0574), tensor(-1.2601), tensor(-0.4669), tensor(0.3344), tensor(-0.7054), tensor(-0.1488), tensor(-0.5471), tensor(0.2264), tensor(-0.4062), tensor(-0.9594), tensor(-1.1450), tensor(-0.9890), tensor(-0.8756), tensor(-0.3958), tensor(-0.4715), tensor(-0.5995), tensor(-0.4268), tensor(0.0277), tensor(-0.4501), tensor(-0.5341), tensor(-1.0784), tensor(-0.9962), tensor(-1.4444), tensor(-0.2174), tensor(0.0102), tensor(-1.1053), tensor(-0.9533), tensor(-0.9604), tensor(-0.3181), tensor(-0.8314), tensor(-0.4782), tensor(-0.3006), tensor(-0.0331), tensor(-0.1311), tensor(-1.1999), tensor(0.1574), tensor(-0.8525), tensor(-0.8402), tensor(-0.1466), tensor(-0.4376), tensor(-0.5855), tensor(-0.5094), tensor(-1.0209), tensor(-1.0378), tensor(-1.2437), tensor(-1.0119), tensor(-0.2151), tensor(-1.0126), tensor(0.2144), tensor(-1.3268), tensor(-0.7047), tensor(-0.4228), tensor(-0.7729), tensor(-1.0731), tensor(-0.6656), tensor(-0.8003), tensor(0.5181), tensor(-0.9

119it [1:35:05, 49.71s/it]


Start Epoch 119
Rewards: [tensor(-1.0958), tensor(-0.7840), tensor(-0.0406), tensor(-0.7027), tensor(-0.3344), tensor(-0.1057), tensor(0.0268), tensor(-1.2644), tensor(-0.7009), tensor(-0.4450), tensor(-0.6158), tensor(-0.6737), tensor(-0.9951), tensor(-0.9771), tensor(0.1289), tensor(-1.1500), tensor(0.3286), tensor(-0.5382), tensor(-0.0513), tensor(-0.7176), tensor(0.1766), tensor(-0.0288), tensor(-0.6728), tensor(-0.1717), tensor(-0.3668), tensor(-0.7897), tensor(-0.8018), tensor(-0.8338), tensor(-1.1195), tensor(-0.1787), tensor(0.0543), tensor(-0.5020), tensor(-0.4581), tensor(-0.3408), tensor(-0.9176), tensor(-0.3306), tensor(-0.9093), tensor(0.2278), tensor(-0.5911), tensor(0.0132), tensor(-0.0384), tensor(-0.7677), tensor(-1.0034), tensor(-1.0489), tensor(-0.5190), tensor(-0.0428), tensor(-1.1058), tensor(-0.2176), tensor(-0.7169), tensor(-0.9604), tensor(-0.8148), tensor(-0.9861), tensor(-0.3605), tensor(-0.4233), tensor(-0.9345), tensor(-0.5849), tensor(-0.9992), tensor(-0.9

120it [1:35:52, 48.85s/it]


Start Epoch 120
Rewards: [tensor(-0.7543), tensor(0.0768), tensor(-0.4629), tensor(-0.2251), tensor(-0.6610), tensor(-0.7979), tensor(-0.3633), tensor(0.2140), tensor(-1.0200), tensor(-0.0889), tensor(-1.1864), tensor(-1.1725), tensor(-0.7716), tensor(0.2641), tensor(-1.3499), tensor(-1.1944), tensor(-0.0045), tensor(-0.8257), tensor(-0.8476), tensor(-0.8245), tensor(-0.1961), tensor(-0.5425), tensor(-0.4972), tensor(-0.5744), tensor(-0.7836), tensor(-0.1778), tensor(-1.0204), tensor(-0.1717), tensor(-1.2056), tensor(-0.2028), tensor(-0.0972), tensor(-1.0996), tensor(-0.7459), tensor(-0.7234), tensor(-0.2155), tensor(-0.0289), tensor(-1.1372), tensor(0.1413), tensor(-1.4601), tensor(-0.4141), tensor(-0.0943), tensor(-0.2578), tensor(-1.3268), tensor(-1.1574), tensor(-0.6779), tensor(-0.5480), tensor(-0.5118), tensor(-0.7223), tensor(-0.2835), tensor(-0.7562), tensor(-0.1169), tensor(-0.9949), tensor(-0.4235), tensor(-0.7279), tensor(-1.2396), tensor(-0.7885), tensor(-0.0979), tensor(-

121it [1:36:40, 48.75s/it]


Start Epoch 121
Rewards: [tensor(-0.3680), tensor(-0.4673), tensor(-1.0045), tensor(-0.4541), tensor(-0.1711), tensor(-0.8711), tensor(-0.4083), tensor(-0.7569), tensor(0.0330), tensor(-0.5834), tensor(-0.9379), tensor(-0.7713), tensor(-1.0016), tensor(-0.2020), tensor(-0.4635), tensor(-1.2696), tensor(0.0843), tensor(0.0158), tensor(-1.1944), tensor(0.0277), tensor(0.3508), tensor(-0.9651), tensor(-0.3958), tensor(-0.1456), tensor(-0.7459), tensor(-1.2392), tensor(-0.4332), tensor(-0.9031), tensor(-0.7684), tensor(-0.9843), tensor(-0.7347), tensor(-0.9312), tensor(-0.7620), tensor(-1.2504), tensor(-0.2743), tensor(-0.4246), tensor(-0.4905), tensor(-0.8008), tensor(-0.1488), tensor(-0.9940), tensor(-1.1917), tensor(-1.0866), tensor(-1.4000), tensor(0.2021), tensor(-0.9995), tensor(-0.8776), tensor(-0.7761), tensor(-0.4796), tensor(-0.4901), tensor(-0.4840), tensor(-1.0029), tensor(-0.2194), tensor(-1.0955), tensor(-0.9007), tensor(-0.8917), tensor(-0.9333), tensor(-1.0177), tensor(-1.

122it [1:37:34, 50.29s/it]


Start Epoch 122
Rewards: [tensor(0.1425), tensor(-0.7309), tensor(-0.7629), tensor(-0.7344), tensor(-1.1864), tensor(-0.0832), tensor(-0.3943), tensor(-0.5080), tensor(-0.5685), tensor(-0.0467), tensor(-0.2797), tensor(-0.7460), tensor(-1.0030), tensor(-1.1067), tensor(-0.7480), tensor(-0.5277), tensor(-0.7863), tensor(0.0158), tensor(0.0768), tensor(0.0175), tensor(-0.3328), tensor(-0.5066), tensor(-0.3921), tensor(-0.9031), tensor(-0.9651), tensor(-1.1388), tensor(-1.0010), tensor(-0.1511), tensor(-0.6620), tensor(-0.7571), tensor(-0.1626), tensor(-1.0339), tensor(-0.3877), tensor(0.0623), tensor(-1.1694), tensor(-1.1058), tensor(-0.8109), tensor(-1.1229), tensor(-0.5320), tensor(-0.7111), tensor(-0.6772), tensor(-0.6351), tensor(-0.4864), tensor(-0.9483), tensor(-0.9205), tensor(-1.3310), tensor(-0.7226), tensor(-0.2698), tensor(-0.4436), tensor(-0.8201), tensor(-1.1710), tensor(-1.2325), tensor(-0.8431), tensor(-0.4356), tensor(-0.2198), tensor(-0.9251), tensor(-0.1882), tensor(-0

123it [1:38:23, 49.74s/it]


Start Epoch 123
Rewards: [tensor(-0.3867), tensor(-0.6792), tensor(-0.1990), tensor(-0.2560), tensor(-0.5710), tensor(-0.4476), tensor(-0.2462), tensor(-0.2204), tensor(-0.3247), tensor(-0.5143), tensor(-0.1881), tensor(-0.0480), tensor(-0.2323), tensor(-1.0739), tensor(-1.2058), tensor(-1.3305), tensor(-0.3198), tensor(-1.0281), tensor(-1.0256), tensor(-1.0161), tensor(-0.3639), tensor(0.0672), tensor(-1.2325), tensor(-0.7862), tensor(-0.1867), tensor(-0.1958), tensor(-0.1715), tensor(-0.0918), tensor(0.0242), tensor(-0.2284), tensor(-0.0882), tensor(-1.1885), tensor(0.2086), tensor(-0.1112), tensor(-0.8847), tensor(0.3045), tensor(-0.1466), tensor(-0.3156), tensor(-0.2740), tensor(-0.7460), tensor(-0.4511), tensor(-0.9414), tensor(-0.0861), tensor(-0.4881), tensor(-0.3680), tensor(-0.4501), tensor(-0.3408), tensor(-0.6282), tensor(-0.9791), tensor(-1.1099), tensor(-1.0043), tensor(-0.2698), tensor(-0.1156), tensor(-0.3251), tensor(-0.6457), tensor(-0.0314), tensor(-0.3867), tensor(0

124it [1:39:14, 50.11s/it]


Start Epoch 124
Rewards: [tensor(-0.0016), tensor(-0.0013), tensor(0.3636), tensor(-0.2018), tensor(-0.8407), tensor(-0.0776), tensor(-0.1866), tensor(-1.1242), tensor(-0.8778), tensor(-0.6997), tensor(-0.7452), tensor(-0.8344), tensor(-0.2188), tensor(-0.7873), tensor(-1.0221), tensor(0.1425), tensor(-0.1009), tensor(-0.5103), tensor(-0.1563), tensor(-1.0256), tensor(-0.0621), tensor(-0.2419), tensor(-0.6544), tensor(-0.5102), tensor(-0.7348), tensor(0.1222), tensor(-0.9560), tensor(0.0737), tensor(-1.0536), tensor(-1.1694), tensor(-0.3816), tensor(-0.2259), tensor(-1.0483), tensor(-0.8989), tensor(-1.0126), tensor(-0.9483), tensor(-0.9162), tensor(-1.0823), tensor(-0.7385), tensor(-0.7953), tensor(-0.1738), tensor(-0.8402), tensor(0.0543), tensor(-0.2646), tensor(-0.5839), tensor(-0.8625), tensor(-0.3877), tensor(-0.5023), tensor(-0.7896), tensor(-0.2659), tensor(-0.6693), tensor(-1.1543), tensor(-0.3917), tensor(-0.7590), tensor(-0.6012), tensor(-0.8929), tensor(-0.3085), tensor(-0

125it [1:40:00, 48.99s/it]


Start Epoch 125
Rewards: [tensor(-0.6258), tensor(-0.1283), tensor(-0.5165), tensor(-0.8955), tensor(-0.4380), tensor(-0.7629), tensor(0.1419), tensor(0.1368), tensor(-1.0060), tensor(-0.0016), tensor(-0.0278), tensor(-0.8718), tensor(-0.4233), tensor(-0.5694), tensor(-0.7684), tensor(-1.4331), tensor(-0.6081), tensor(0.5693), tensor(0.1887), tensor(-0.1519), tensor(-0.8605), tensor(-0.1657), tensor(-0.5539), tensor(-0.8578), tensor(-0.8018), tensor(-0.4832), tensor(-0.2425), tensor(-0.2906), tensor(-0.2256), tensor(-1.0126), tensor(0.1254), tensor(-0.1330), tensor(-0.0258), tensor(-0.3803), tensor(-0.2309), tensor(-0.5585), tensor(-0.0212), tensor(-0.5867), tensor(-1.0783), tensor(-0.6659), tensor(-0.8278), tensor(-0.1809), tensor(-1.0866), tensor(-0.1329), tensor(-0.6351), tensor(0.0861), tensor(-0.5545), tensor(-0.5412), tensor(-1.0622), tensor(-0.2539), tensor(-0.8105), tensor(0.3009), tensor(-1.1058), tensor(-1.1825), tensor(-1.2362), tensor(-0.5637), tensor(0.0971), tensor(-0.76

126it [1:40:49, 49.11s/it]


Start Epoch 126
Rewards: [tensor(-0.2539), tensor(-0.0952), tensor(-1.0276), tensor(-0.3309), tensor(-0.8113), tensor(0.0415), tensor(-0.0384), tensor(-1.3268), tensor(-0.1784), tensor(0.0879), tensor(-1.0924), tensor(-0.5615), tensor(-0.4235), tensor(-0.8223), tensor(-1.0488), tensor(-0.2480), tensor(-0.3271), tensor(-0.2455), tensor(-0.3892), tensor(-0.0247), tensor(-0.7226), tensor(-0.7029), tensor(-0.9114), tensor(0.1674), tensor(-1.0281), tensor(-0.1299), tensor(0.1527), tensor(0.0588), tensor(-0.6034), tensor(0.2098), tensor(-0.7562), tensor(-0.1882), tensor(-0.4715), tensor(-0.6364), tensor(-0.5774), tensor(-0.9117), tensor(-0.2323), tensor(-0.2425), tensor(0.1763), tensor(-1.3993), tensor(-0.1597), tensor(-0.5382), tensor(-0.1626), tensor(-0.8223), tensor(-0.4103), tensor(-0.6737), tensor(-0.1485), tensor(-0.9791), tensor(-1.3351), tensor(-0.3639), tensor(-0.1329), tensor(-0.5382), tensor(-0.0258), tensor(-0.8753), tensor(-0.0972), tensor(-1.0057), tensor(-0.6385), tensor(-0.9

127it [1:41:42, 50.11s/it]


Start Epoch 127
Rewards: [tensor(-1.0663), tensor(-0.3009), tensor(-0.4601), tensor(-0.1137), tensor(-1.1106), tensor(-0.5142), tensor(0.1444), tensor(-0.4805), tensor(0.1587), tensor(-0.0132), tensor(-0.9059), tensor(-0.7629), tensor(-0.6617), tensor(-1.1450), tensor(-0.6751), tensor(-0.4662), tensor(-0.9910), tensor(-1.0029), tensor(-0.2359), tensor(-0.4769), tensor(-0.9651), tensor(-0.3987), tensor(-0.9651), tensor(-0.2428), tensor(0.2703), tensor(-1.3190), tensor(-1.2618), tensor(-1.1708), tensor(-0.6987), tensor(-1.0294), tensor(-0.8142), tensor(-0.3018), tensor(-0.7869), tensor(-0.4275), tensor(-0.3480), tensor(-0.1787), tensor(-0.9643), tensor(-0.3109), tensor(-1.0831), tensor(-0.0526), tensor(-0.7873), tensor(-0.3605), tensor(-0.2176), tensor(-0.4360), tensor(0.2641), tensor(-0.4717), tensor(-0.0041), tensor(-0.7898), tensor(-1.1812), tensor(-0.3166), tensor(-0.5685), tensor(-0.6790), tensor(-0.4743), tensor(0.0416), tensor(0.3604), tensor(-0.9764), tensor(-0.4505), tensor(0.2

128it [1:42:27, 48.64s/it]


Start Epoch 128
Rewards: [tensor(-0.1157), tensor(-0.8939), tensor(-0.7996), tensor(-0.9945), tensor(-0.5694), tensor(-0.4145), tensor(-0.5343), tensor(-0.9349), tensor(-1.0739), tensor(-0.3579), tensor(-0.7028), tensor(-0.9962), tensor(-0.4635), tensor(-0.9347), tensor(-0.1637), tensor(-0.1867), tensor(-0.7269), tensor(0.1752), tensor(0.3508), tensor(-0.9791), tensor(-0.3833), tensor(-1.2950), tensor(0.0417), tensor(-0.9454), tensor(-0.5487), tensor(-0.4436), tensor(-1.1965), tensor(-1.2043), tensor(-0.4892), tensor(-0.7836), tensor(-1.1543), tensor(-0.4901), tensor(-0.0952), tensor(-1.3709), tensor(0.3508), tensor(-0.2205), tensor(-1.1113), tensor(-0.1933), tensor(-0.6385), tensor(-0.8367), tensor(-0.9968), tensor(-0.8794), tensor(-0.3258), tensor(-0.1897), tensor(-0.8372), tensor(-1.0643), tensor(-0.1913), tensor(-0.2712), tensor(-0.5577), tensor(0.1747), tensor(-0.6853), tensor(-1.0098), tensor(-0.5170), tensor(-0.2267), tensor(-1.2606), tensor(-0.0992), tensor(-0.1960), tensor(-0

129it [1:43:20, 49.90s/it]


Start Epoch 129
Rewards: [tensor(0.2317), tensor(-1.0379), tensor(-0.9827), tensor(-0.4877), tensor(-0.1186), tensor(-0.6179), tensor(-0.5474), tensor(-1.1277), tensor(-0.8499), tensor(-0.4731), tensor(-0.0231), tensor(-0.7385), tensor(-0.9863), tensor(-0.5594), tensor(-0.8235), tensor(-0.1150), tensor(-0.7385), tensor(-1.2313), tensor(-0.3615), tensor(-0.7684), tensor(-0.6585), tensor(-0.2149), tensor(-0.8386), tensor(-0.0903), tensor(-0.9312), tensor(-0.3307), tensor(-1.1917), tensor(-0.1606), tensor(0.1902), tensor(-0.5722), tensor(-0.3085), tensor(0.0861), tensor(-0.4805), tensor(0.2752), tensor(-1.1908), tensor(-0.4730), tensor(-0.1833), tensor(-0.2488), tensor(-0.5545), tensor(-1.0539), tensor(-1.0244), tensor(-0.3875), tensor(-0.5841), tensor(-0.3350), tensor(-0.5382), tensor(-0.2236), tensor(0.3457), tensor(-0.9454), tensor(0.2755), tensor(-0.1690), tensor(-0.8402), tensor(-0.2862), tensor(-0.6385), tensor(-1.1233), tensor(-1.1106), tensor(-0.8185), tensor(-0.0950), tensor(-0.

130it [1:44:05, 48.52s/it]


Start Epoch 130
Rewards: [tensor(-0.5029), tensor(-1.0201), tensor(-0.2957), tensor(-0.0730), tensor(-1.2541), tensor(-0.4069), tensor(-0.5637), tensor(-1.0177), tensor(-0.3075), tensor(-0.6153), tensor(-0.1964), tensor(-1.3790), tensor(-0.0319), tensor(-0.3009), tensor(-0.6202), tensor(-1.0294), tensor(0.0485), tensor(-1.2181), tensor(0.0581), tensor(0.2280), tensor(0.0012), tensor(-0.9854), tensor(-1.0934), tensor(0.0860), tensor(-1.2043), tensor(-0.6733), tensor(-0.3301), tensor(0.0280), tensor(-1.1216), tensor(0.3271), tensor(0.0100), tensor(-0.3805), tensor(-0.7772), tensor(-0.6449), tensor(-0.1821), tensor(-0.8588), tensor(-0.3379), tensor(-0.0905), tensor(-0.2345), tensor(-0.7309), tensor(-0.8476), tensor(-0.5248), tensor(-0.1827), tensor(-0.5823), tensor(0.2144), tensor(-0.3267), tensor(-0.8253), tensor(-0.6896), tensor(0.1493), tensor(-0.8654), tensor(-0.2515), tensor(-0.8169), tensor(-0.7279), tensor(-0.7452), tensor(-0.6429), tensor(-0.0016), tensor(0.2264), tensor(-0.3647)

131it [1:44:49, 47.10s/it]


Start Epoch 131
Rewards: [tensor(-0.7444), tensor(-0.1844), tensor(-0.0068), tensor(-0.3208), tensor(-1.0460), tensor(-0.7100), tensor(-0.1214), tensor(-0.9059), tensor(-0.0901), tensor(-0.6109), tensor(-0.2845), tensor(-1.2313), tensor(-0.0303), tensor(-0.4511), tensor(-0.8742), tensor(-0.4730), tensor(-0.7501), tensor(-0.0319), tensor(-1.1106), tensor(-0.4901), tensor(-0.1208), tensor(-0.8257), tensor(-0.8408), tensor(-0.6656), tensor(-0.7279), tensor(0.2630), tensor(-0.1420), tensor(0.0640), tensor(-0.5959), tensor(-1.2090), tensor(-0.9071), tensor(-1.0826), tensor(-0.9550), tensor(-1.4790), tensor(-0.6507), tensor(0.0012), tensor(-0.5170), tensor(0.4741), tensor(-1.1834), tensor(0.4151), tensor(-0.7024), tensor(-0.3389), tensor(-1.0779), tensor(-1.1931), tensor(-0.0188), tensor(0.0029), tensor(-0.5327), tensor(-0.3944), tensor(-0.2659), tensor(-0.1751), tensor(-1.4462), tensor(-0.2272), tensor(-0.8885), tensor(-0.6878), tensor(-0.5995), tensor(-0.4795), tensor(-0.6385), tensor(0.1

132it [1:45:41, 48.47s/it]


Start Epoch 132
Rewards: [tensor(0.1405), tensor(0.0277), tensor(-0.8093), tensor(-1.0401), tensor(-0.9437), tensor(-0.4590), tensor(-0.2705), tensor(-1.2396), tensor(-0.5857), tensor(-0.2109), tensor(-0.5545), tensor(-0.7187), tensor(0.1642), tensor(-0.1833), tensor(-0.4364), tensor(-0.4601), tensor(-1.2846), tensor(-0.3390), tensor(-0.2788), tensor(-0.4650), tensor(-0.0854), tensor(-0.6500), tensor(-0.2787), tensor(-0.0943), tensor(-0.3955), tensor(-0.4921), tensor(-0.8110), tensor(-0.6077), tensor(-0.7713), tensor(-0.0864), tensor(-0.5560), tensor(-0.9890), tensor(-1.2003), tensor(-1.3953), tensor(-0.7387), tensor(-0.2204), tensor(-0.9306), tensor(-0.2109), tensor(-1.1155), tensor(-0.7885), tensor(-0.7334), tensor(-0.7869), tensor(-0.2872), tensor(-0.4332), tensor(-0.8578), tensor(-0.8606), tensor(-0.7226), tensor(-0.8578), tensor(-1.2023), tensor(-1.0265), tensor(-0.8452), tensor(-0.6690), tensor(0.3516), tensor(-0.5207), tensor(0.3102), tensor(-0.3954), tensor(-1.0732), tensor(-0

133it [1:46:31, 49.04s/it]


Start Epoch 133
Rewards: [tensor(-0.4103), tensor(-0.0815), tensor(0.0897), tensor(0.0549), tensor(-0.8344), tensor(-0.4208), tensor(-0.3654), tensor(0.2522), tensor(-0.9275), tensor(-0.3654), tensor(-0.5562), tensor(0.1127), tensor(-0.9711), tensor(-0.4796), tensor(-0.8452), tensor(-0.0835), tensor(-0.7215), tensor(-0.2827), tensor(-0.5069), tensor(-0.4086), tensor(-1.2325), tensor(-0.2786), tensor(-0.6047), tensor(-1.1965), tensor(-0.7733), tensor(-1.0790), tensor(0.0843), tensor(-0.8989), tensor(-0.9910), tensor(-0.5849), tensor(-1.1405), tensor(-0.2997), tensor(-0.1299), tensor(-0.4664), tensor(-0.7590), tensor(-0.6660), tensor(-0.2745), tensor(-1.3895), tensor(-0.4076), tensor(-0.9025), tensor(-0.2132), tensor(-1.1965), tensor(-1.2912), tensor(-0.7956), tensor(-0.5524), tensor(-0.9761), tensor(-1.4462), tensor(-0.2218), tensor(-1.0029), tensor(-0.2911), tensor(-0.8018), tensor(0.2123), tensor(-0.1330), tensor(-0.2875), tensor(-0.9651), tensor(0.2496), tensor(-0.3006), tensor(-1.0

134it [1:47:21, 49.45s/it]


Start Epoch 134
Rewards: [tensor(-0.4963), tensor(-0.3987), tensor(-1.2345), tensor(-1.0619), tensor(0.2979), tensor(-0.4789), tensor(-0.7005), tensor(-0.6373), tensor(-0.7956), tensor(-1.1493), tensor(-0.5781), tensor(-0.7733), tensor(-0.0832), tensor(-0.6245), tensor(-0.6385), tensor(0.0280), tensor(-0.1130), tensor(-0.6918), tensor(-0.3917), tensor(-0.5320), tensor(-0.6256), tensor(-1.0804), tensor(0.0964), tensor(-1.4123), tensor(-0.6587), tensor(-1.0095), tensor(-1.1931), tensor(-0.6714), tensor(-0.4662), tensor(-0.8535), tensor(-1.1113), tensor(-0.5533), tensor(-0.1879), tensor(-0.7187), tensor(-1.0412), tensor(-1.0536), tensor(-0.5082), tensor(-0.0911), tensor(-0.3943), tensor(-0.4743), tensor(-0.8696), tensor(-0.5938), tensor(-0.3765), tensor(-0.2357), tensor(-0.1712), tensor(-1.1331), tensor(-0.9854), tensor(-0.4830), tensor(-1.1931), tensor(0.0432), tensor(-0.7111), tensor(0.3995), tensor(-0.6385), tensor(-0.5827), tensor(-0.9059), tensor(-0.0667), tensor(-1.2146), tensor(-1

135it [1:48:09, 48.85s/it]


Start Epoch 135
Rewards: [tensor(-0.0864), tensor(-0.6385), tensor(-0.1637), tensor(-0.4310), tensor(-0.6158), tensor(-1.2869), tensor(-0.2717), tensor(-0.2673), tensor(0.1902), tensor(-1.0000), tensor(-0.6379), tensor(-1.2129), tensor(-0.6889), tensor(-0.7801), tensor(-0.9111), tensor(-1.0148), tensor(0.1291), tensor(-0.4669), tensor(-0.6600), tensor(-1.0347), tensor(-0.6431), tensor(-0.9414), tensor(-0.7360), tensor(-1.1574), tensor(-0.6385), tensor(-0.1121), tensor(-0.1404), tensor(-1.0514), tensor(-0.5207), tensor(-1.2470), tensor(0.1693), tensor(-0.7628), tensor(-0.4669), tensor(-0.6201), tensor(-0.4050), tensor(-1.1483), tensor(-0.8369), tensor(-0.8960), tensor(-0.4228), tensor(-0.5428), tensor(-0.2425), tensor(-0.5669), tensor(0.3912), tensor(-1.0100), tensor(0.0100), tensor(-0.6685), tensor(-1.2061), tensor(0.0793), tensor(-1.0721), tensor(-0.4789), tensor(-0.7772), tensor(-0.4103), tensor(-1.2109), tensor(-1.0721), tensor(-1.0190), tensor(-0.2661), tensor(-0.8664), tensor(-0.

136it [1:49:00, 49.63s/it]


Start Epoch 136
Rewards: [tensor(-0.4830), tensor(-0.5066), tensor(-0.8397), tensor(-0.7801), tensor(-0.9236), tensor(-0.2935), tensor(-0.5305), tensor(-0.1238), tensor(-0.8043), tensor(-0.3622), tensor(-0.1244), tensor(-1.1229), tensor(-0.2100), tensor(-0.3380), tensor(-0.3193), tensor(-0.3307), tensor(-0.6058), tensor(-0.0750), tensor(-0.7862), tensor(-0.2188), tensor(-0.7898), tensor(-0.9606), tensor(-0.6532), tensor(-1.1124), tensor(-0.8499), tensor(-0.6957), tensor(0.3365), tensor(-0.1822), tensor(-0.7418), tensor(-0.2135), tensor(-0.9342), tensor(-0.2431), tensor(-0.5141), tensor(-0.2929), tensor(-0.6444), tensor(-0.7452), tensor(-0.4892), tensor(-0.4364), tensor(-0.7745), tensor(-1.1864), tensor(-0.0847), tensor(-0.8093), tensor(-0.3477), tensor(-0.1893), tensor(-0.2126), tensor(-0.7223), tensor(-0.4542), tensor(-1.1119), tensor(-0.3325), tensor(-0.9672), tensor(-0.2826), tensor(-1.1769), tensor(-0.5507), tensor(-0.1864), tensor(-0.2221), tensor(-0.2126), tensor(-0.5538), tenso

137it [1:49:58, 52.05s/it]


Start Epoch 137
Rewards: [tensor(0.0323), tensor(-0.9644), tensor(-0.1032), tensor(0.0143), tensor(-0.6624), tensor(0.1222), tensor(-0.4948), tensor(-0.7685), tensor(-0.5096), tensor(0.2630), tensor(-0.2439), tensor(0.1493), tensor(-0.2686), tensor(-0.1387), tensor(0.1887), tensor(-0.6635), tensor(-0.5797), tensor(-0.8169), tensor(-0.4007), tensor(-0.9949), tensor(-0.7415), tensor(-0.0937), tensor(-0.7956), tensor(-1.2061), tensor(-1.3292), tensor(0.1693), tensor(-0.9710), tensor(-1.1493), tensor(-0.7662), tensor(-0.9030), tensor(-0.0278), tensor(-0.4832), tensor(0.0269), tensor(-0.9890), tensor(-0.4750), tensor(-1.3900), tensor(-0.8596), tensor(-0.8201), tensor(-0.7488), tensor(-0.0355), tensor(-1.3227), tensor(-0.2566), tensor(-1.0164), tensor(-0.1985), tensor(-1.0615), tensor(-0.9317), tensor(-0.1479), tensor(-1.1069), tensor(-0.9854), tensor(0.0251), tensor(-0.2906), tensor(-0.3578), tensor(-0.9854), tensor(-0.9759), tensor(-1.0644), tensor(0.1803), tensor(-0.9666), tensor(0.2506)

138it [1:50:46, 50.74s/it]


Start Epoch 138
Rewards: [tensor(0.1491), tensor(-0.0976), tensor(0.0239), tensor(0.2061), tensor(-0.6997), tensor(-0.3325), tensor(0.1001), tensor(-0.6754), tensor(-0.0327), tensor(-0.8625), tensor(-0.5749), tensor(-0.5487), tensor(-0.1214), tensor(-0.5471), tensor(-0.0499), tensor(-0.1624), tensor(0.2282), tensor(-0.3408), tensor(-1.1410), tensor(-0.4907), tensor(-0.5118), tensor(0.5131), tensor(-0.0212), tensor(0.1909), tensor(-0.7685), tensor(-1.0043), tensor(-1.1931), tensor(-0.4730), tensor(-0.4625), tensor(-0.2886), tensor(-1.3392), tensor(-0.2978), tensor(-1.0732), tensor(-0.7309), tensor(0.3604), tensor(-0.6256), tensor(-1.2703), tensor(-0.6258), tensor(-0.7052), tensor(-0.5685), tensor(-0.7414), tensor(-0.1189), tensor(-0.6617), tensor(0.1293), tensor(-0.8715), tensor(-1.0009), tensor(-0.8734), tensor(-0.9424), tensor(0.0221), tensor(-0.0895), tensor(-0.1606), tensor(-0.6163), tensor(-0.8141), tensor(-0.7698), tensor(-0.5413), tensor(0.0462), tensor(-1.0323), tensor(-0.6240)

139it [1:51:31, 49.20s/it]


Start Epoch 139
Rewards: [tensor(-0.5797), tensor(-1.1924), tensor(-0.6779), tensor(-0.8283), tensor(-0.1108), tensor(-0.0467), tensor(-1.1562), tensor(-0.3376), tensor(-0.4635), tensor(-0.3624), tensor(-0.3025), tensor(-0.1251), tensor(-0.0142), tensor(-0.5291), tensor(-0.0355), tensor(-0.8021), tensor(0.2353), tensor(-0.4351), tensor(0.4182), tensor(0.1533), tensor(-0.0610), tensor(-0.8141), tensor(-0.8773), tensor(-1.1079), tensor(-0.2480), tensor(-0.9387), tensor(-0.3624), tensor(-0.1365), tensor(-0.2455), tensor(-0.6728), tensor(-0.5382), tensor(-0.5547), tensor(-0.5857), tensor(-0.8950), tensor(-0.5827), tensor(0.0531), tensor(-0.1532), tensor(-0.3251), tensor(-0.5666), tensor(-0.0220), tensor(-0.3289), tensor(-0.1269), tensor(-1.1651), tensor(-0.2431), tensor(-0.8005), tensor(-0.8729), tensor(-0.0288), tensor(-0.0479), tensor(-0.6970), tensor(-0.1624), tensor(-0.8705), tensor(-0.3391), tensor(-1.1352), tensor(-0.2659), tensor(-0.4881), tensor(-1.2367), tensor(-1.0986), tensor(-

140it [1:52:15, 47.56s/it]


Start Epoch 140
Rewards: [tensor(-0.0889), tensor(-0.8630), tensor(-1.0893), tensor(-0.6504), tensor(-0.8213), tensor(-0.4534), tensor(-0.7684), tensor(-0.6896), tensor(-1.2912), tensor(-0.5792), tensor(0.6414), tensor(-1.3433), tensor(-0.5277), tensor(-0.6330), tensor(-0.7822), tensor(-0.2936), tensor(-0.7444), tensor(-0.8885), tensor(-0.6345), tensor(-0.0296), tensor(-0.1893), tensor(-0.5953), tensor(-1.0400), tensor(-0.0331), tensor(-0.1229), tensor(-1.1864), tensor(-0.2475), tensor(-0.8386), tensor(-0.6426), tensor(-0.6685), tensor(0.0533), tensor(-0.5279), tensor(0.3151), tensor(0.0091), tensor(-0.1960), tensor(-0.3668), tensor(-0.5377), tensor(-0.2751), tensor(-0.1283), tensor(-0.3143), tensor(-1.1931), tensor(-0.5158), tensor(-0.6009), tensor(-0.6772), tensor(-0.3517), tensor(-0.7897), tensor(-0.8540), tensor(-0.4450), tensor(-0.6816), tensor(0.0221), tensor(-0.1485), tensor(-0.1633), tensor(-0.6153), tensor(-0.5459), tensor(-1.1829), tensor(-0.8274), tensor(-0.3389), tensor(0.

141it [1:53:07, 48.94s/it]


Start Epoch 141
Rewards: [tensor(-0.8773), tensor(-0.8550), tensor(-0.0633), tensor(-0.1365), tensor(-0.7444), tensor(-1.0852), tensor(-1.1915), tensor(-0.7898), tensor(-0.7953), tensor(-1.1229), tensor(-0.5978), tensor(-0.9188), tensor(-0.5971), tensor(0.0158), tensor(-1.0951), tensor(-0.6659), tensor(-1.2123), tensor(-0.8223), tensor(-0.1711), tensor(-0.6734), tensor(-0.3622), tensor(0.0066), tensor(-0.9454), tensor(-0.5248), tensor(-0.9390), tensor(-1.2582), tensor(-0.8407), tensor(-0.5503), tensor(-0.3618), tensor(0.1127), tensor(-0.0212), tensor(-0.2869), tensor(-0.2301), tensor(-0.5549), tensor(-0.8540), tensor(-0.3326), tensor(-0.6603), tensor(-0.8499), tensor(-1.0751), tensor(1.2242), tensor(-0.7476), tensor(-0.6451), tensor(-1.1171), tensor(-1.2846), tensor(-0.5154), tensor(-0.2453), tensor(-1.2795), tensor(0.0407), tensor(-0.1633), tensor(-0.6385), tensor(-0.6449), tensor(-0.2188), tensor(-0.3708), tensor(-0.2356), tensor(-0.9533), tensor(-1.1673), tensor(0.1693), tensor(-1.

142it [1:53:59, 49.79s/it]


Start Epoch 142
Rewards: [tensor(-1.0177), tensor(-0.1532), tensor(-0.3558), tensor(-0.9427), tensor(-0.1852), tensor(-0.2285), tensor(0.1894), tensor(-0.0247), tensor(-1.2057), tensor(-0.3517), tensor(0.1405), tensor(-0.7387), tensor(-0.3558), tensor(0.0585), tensor(-0.9010), tensor(0.2309), tensor(-0.9150), tensor(-0.1044), tensor(-0.7360), tensor(-0.8987), tensor(-1.2385), tensor(0.0432), tensor(0.5177), tensor(-0.8359), tensor(-0.3343), tensor(-1.1450), tensor(-0.5930), tensor(-0.1722), tensor(-0.2931), tensor(0.0435), tensor(-0.7231), tensor(-1.0177), tensor(-0.0117), tensor(-0.2860), tensor(-1.0341), tensor(-0.3506), tensor(-0.1717), tensor(-0.4196), tensor(-0.6407), tensor(-0.5462), tensor(-0.1285), tensor(-1.1106), tensor(-0.8818), tensor(-0.2221), tensor(-1.1551), tensor(-1.0412), tensor(0.2086), tensor(-0.5834), tensor(-0.5503), tensor(-0.3661), tensor(-0.8490), tensor(-0.7459), tensor(-0.6645), tensor(-1.3268), tensor(-0.4273), tensor(-0.2368), tensor(-0.4743), tensor(-0.75

143it [1:54:48, 49.67s/it]


Start Epoch 143
Rewards: [tensor(-0.3240), tensor(-0.4356), tensor(0.1077), tensor(-0.4864), tensor(-0.6385), tensor(-0.1751), tensor(-0.8344), tensor(-0.1833), tensor(-0.0479), tensor(-0.8019), tensor(-0.5664), tensor(-0.5207), tensor(0.1001), tensor(0.3406), tensor(-0.6843), tensor(-0.1392), tensor(-1.0601), tensor(-0.7854), tensor(-0.4233), tensor(-1.0256), tensor(-1.0015), tensor(-1.2606), tensor(-0.9249), tensor(-0.3379), tensor(-0.0633), tensor(-1.0150), tensor(-0.9154), tensor(-0.1387), tensor(-0.2862), tensor(-0.4224), tensor(-0.0772), tensor(-0.8499), tensor(-0.1161), tensor(-1.1156), tensor(-1.0884), tensor(-0.6769), tensor(0.0649), tensor(-0.5507), tensor(-0.2754), tensor(-0.2852), tensor(-1.0619), tensor(-0.7061), tensor(0.3988), tensor(-0.6256), tensor(0.0134), tensor(-0.3407), tensor(-1.0758), tensor(-0.3578), tensor(-1.1585), tensor(-0.2991), tensor(0.3402), tensor(-0.2869), tensor(-0.6130), tensor(0.0581), tensor(-0.3391), tensor(-0.8630), tensor(-0.5771), tensor(-0.64

144it [1:55:37, 49.53s/it]


Start Epoch 144
Rewards: [tensor(-0.2935), tensor(-0.2712), tensor(-0.1868), tensor(-1.1749), tensor(-0.3193), tensor(-0.4066), tensor(-0.9628), tensor(0.4885), tensor(-0.0854), tensor(-0.5226), tensor(-0.0384), tensor(0.3344), tensor(-0.9251), tensor(-0.5917), tensor(-0.7961), tensor(-1.0958), tensor(0.3406), tensor(0.2021), tensor(0.2282), tensor(-0.8688), tensor(-1.1410), tensor(-0.5382), tensor(-0.1129), tensor(-1.1309), tensor(-0.1109), tensor(-0.9393), tensor(-0.3654), tensor(-0.2173), tensor(-1.1113), tensor(0.2283), tensor(-0.4273), tensor(-1.1820), tensor(-0.2323), tensor(-0.4379), tensor(-1.2040), tensor(-0.5909), tensor(-1.2394), tensor(-1.0730), tensor(-0.9821), tensor(-0.6970), tensor(-0.8412), tensor(-1.2470), tensor(-0.0131), tensor(-0.9997), tensor(-0.1032), tensor(-0.9804), tensor(-0.8725), tensor(0.0296), tensor(-0.8185), tensor(-0.0735), tensor(-0.1121), tensor(-0.6009), tensor(-0.6918), tensor(-0.0041), tensor(-0.6809), tensor(0.1127), tensor(-0.4332), tensor(-0.12

145it [1:56:22, 48.10s/it]


Start Epoch 145
Rewards: [tensor(0.0585), tensor(0.0091), tensor(-0.8669), tensor(-0.5971), tensor(-1.2040), tensor(-0.9424), tensor(0.3292), tensor(-0.3247), tensor(-1.1427), tensor(-0.1663), tensor(-0.5744), tensor(-0.4669), tensor(-0.6385), tensor(-0.5749), tensor(-1.2038), tensor(0.4593), tensor(-0.5462), tensor(-1.3953), tensor(-0.7226), tensor(-0.2906), tensor(-0.6174), tensor(-0.4476), tensor(-0.8587), tensor(-0.9895), tensor(-0.8142), tensor(-1.0518), tensor(-0.0598), tensor(-0.2787), tensor(-0.8339), tensor(-0.5118), tensor(-0.7138), tensor(-0.4601), tensor(-0.1812), tensor(-0.5857), tensor(0.1070), tensor(-0.5867), tensor(-0.5611), tensor(-0.2368), tensor(-0.8756), tensor(-0.8045), tensor(-0.0013), tensor(-0.2979), tensor(-0.9986), tensor(-0.1214), tensor(-1.1474), tensor(0.0158), tensor(-1.1576), tensor(-0.7705), tensor(-1.0958), tensor(-0.6693), tensor(0.0585), tensor(-0.0467), tensor(-0.7873), tensor(-0.7005), tensor(-1.1043), tensor(-0.9031), tensor(-0.2165), tensor(-0.7

146it [1:57:10, 47.91s/it]


Start Epoch 146
Rewards: [tensor(-0.0979), tensor(-1.1863), tensor(-1.0177), tensor(-0.5348), tensor(-0.5209), tensor(-0.5097), tensor(-0.3944), tensor(-1.2824), tensor(-1.0378), tensor(-0.4830), tensor(0.0251), tensor(-0.3674), tensor(-0.8289), tensor(-0.2267), tensor(0.0227), tensor(-0.5279), tensor(-0.5020), tensor(-0.5308), tensor(-0.7339), tensor(-0.4796), tensor(-0.0314), tensor(-0.7662), tensor(0.0654), tensor(-0.4244), tensor(-0.2877), tensor(-0.3879), tensor(-1.0784), tensor(-0.6050), tensor(-1.2703), tensor(-0.7854), tensor(-1.1642), tensor(-0.2477), tensor(-0.8669), tensor(-0.9249), tensor(-1.0784), tensor(-0.3193), tensor(-0.8043), tensor(-0.2906), tensor(-0.7930), tensor(-0.7841), tensor(-0.5487), tensor(-0.4356), tensor(0.0277), tensor(-0.8553), tensor(-0.3818), tensor(-0.8312), tensor(-1.3145), tensor(-1.1944), tensor(-0.8293), tensor(-0.5626), tensor(-0.1812), tensor(-0.5080), tensor(-0.9533), tensor(-0.3771), tensor(0.3635), tensor(-0.1044), tensor(-0.1960), tensor(-0

147it [1:58:00, 48.76s/it]


Start Epoch 147
Rewards: [tensor(-0.3615), tensor(-1.0000), tensor(-0.6743), tensor(-0.6543), tensor(-0.6449), tensor(0.0462), tensor(-0.3605), tensor(-0.7942), tensor(-1.2181), tensor(-0.2862), tensor(0.3475), tensor(-0.7660), tensor(-0.7119), tensor(-0.2056), tensor(0.1413), tensor(-1.0744), tensor(-0.7801), tensor(-0.2806), tensor(-0.2109), tensor(-0.8226), tensor(-0.6256), tensor(-1.1171), tensor(-0.5685), tensor(0.0567), tensor(0.2180), tensor(-1.1965), tensor(-0.3320), tensor(-0.7476), tensor(-1.2056), tensor(-0.8615), tensor(-0.2142), tensor(-0.9949), tensor(-0.9631), tensor(-0.8811), tensor(-0.2188), tensor(0.2955), tensor(0.1642), tensor(-1.1834), tensor(-0.1503), tensor(-0.0750), tensor(-1.2892), tensor(-0.5209), tensor(-1.0643), tensor(-0.1503), tensor(-0.5225), tensor(-0.3305), tensor(-0.2455), tensor(-0.0358), tensor(-0.4669), tensor(-0.1106), tensor(-0.5469), tensor(-0.2884), tensor(-0.1626), tensor(-1.7994), tensor(-0.6752), tensor(-1.4229), tensor(-0.8010), tensor(-0.9

148it [1:58:47, 48.15s/it]


Start Epoch 148
Rewards: [tensor(-0.6587), tensor(-0.3332), tensor(-0.1485), tensor(-0.6635), tensor(-0.4921), tensor(-0.7100), tensor(-0.3247), tensor(-0.9855), tensor(-0.2855), tensor(-0.0295), tensor(-0.9599), tensor(-0.1299), tensor(-0.5812), tensor(-1.0756), tensor(0.2264), tensor(-0.7138), tensor(-0.6444), tensor(-0.5953), tensor(-1.2065), tensor(-0.7996), tensor(-0.8491), tensor(-1.2129), tensor(-1.0006), tensor(-0.6245), tensor(-0.4602), tensor(0.1364), tensor(-0.9730), tensor(-0.3477), tensor(-0.7667), tensor(-0.6464), tensor(-1.2022), tensor(-0.0776), tensor(-0.4742), tensor(0.3604), tensor(-0.3181), tensor(-0.8253), tensor(-0.3050), tensor(-0.8369), tensor(-0.2483), tensor(-0.9820), tensor(-0.8644), tensor(0.0897), tensor(-0.5272), tensor(-0.1833), tensor(-0.8818), tensor(-0.8500), tensor(-1.5563), tensor(-0.7419), tensor(0.3344), tensor(-0.2520), tensor(-0.3135), tensor(-1.0045), tensor(-0.9890), tensor(-1.0958), tensor(-0.9275), tensor(-0.8142), tensor(-0.4810), tensor(-0

149it [1:59:38, 48.85s/it]


Start Epoch 149
Rewards: [tensor(-0.5480), tensor(-0.4122), tensor(-0.6929), tensor(-1.2099), tensor(-1.2515), tensor(-0.2053), tensor(0.5804), tensor(-0.3241), tensor(-0.7174), tensor(-0.4150), tensor(-0.3344), tensor(-0.4621), tensor(-1.0567), tensor(-0.2659), tensor(-0.1754), tensor(-0.4447), tensor(-1.3227), tensor(0.0765), tensor(-0.4892), tensor(-0.6896), tensor(-0.2883), tensor(-0.5539), tensor(-1.0804), tensor(0.0897), tensor(0.1799), tensor(-0.0218), tensor(-0.8109), tensor(-1.1944), tensor(-0.7684), tensor(-0.9186), tensor(0.3151), tensor(-0.1571), tensor(0.3516), tensor(-0.5611), tensor(-0.4253), tensor(-0.4750), tensor(-0.7862), tensor(-0.6014), tensor(-0.8344), tensor(-0.5633), tensor(-0.0597), tensor(-0.9218), tensor(-1.0378), tensor(-0.4050), tensor(-1.1493), tensor(-1.0015), tensor(-0.6330), tensor(-0.4736), tensor(0.0416), tensor(-0.0910), tensor(-0.3665), tensor(0.1776), tensor(0.0381), tensor(-1.0102), tensor(-0.2931), tensor(-1.3900), tensor(0.0989), tensor(-0.6759

150it [2:00:27, 48.96s/it]


Start Epoch 150
Rewards: [tensor(-0.4705), tensor(-0.0772), tensor(-0.6201), tensor(-0.2646), tensor(0.1030), tensor(-0.3094), tensor(-0.9643), tensor(-1.4777), tensor(-0.1238), tensor(0.1030), tensor(-0.2221), tensor(-0.8966), tensor(-0.4905), tensor(-0.3012), tensor(-0.8711), tensor(-0.1032), tensor(-0.3001), tensor(-1.4329), tensor(-0.0701), tensor(-0.7609), tensor(-1.0383), tensor(-1.1337), tensor(-0.8412), tensor(-0.6364), tensor(0.1444), tensor(-0.4628), tensor(-0.1674), tensor(-0.5823), tensor(-1.1473), tensor(-0.6620), tensor(-0.0815), tensor(-1.1483), tensor(-0.8994), tensor(-0.1601), tensor(-0.2176), tensor(-1.2504), tensor(-1.1544), tensor(-0.4627), tensor(-0.0911), tensor(-0.8302), tensor(-0.4389), tensor(-0.1080), tensor(-0.3803), tensor(-0.9869), tensor(-0.5244), tensor(-0.5068), tensor(-0.9152), tensor(-0.3428), tensor(-0.3307), tensor(-0.2902), tensor(-0.0384), tensor(-1.0256), tensor(-1.0006), tensor(-0.1435), tensor(0.0485), tensor(-0.1742), tensor(-0.6130), tensor(-

151it [2:01:14, 48.55s/it]


Start Epoch 151
Rewards: [tensor(-0.0428), tensor(-0.2794), tensor(-1.1820), tensor(-0.0251), tensor(0.0415), tensor(-1.1864), tensor(-0.7755), tensor(0.2098), tensor(-0.5380), tensor(-1.2028), tensor(-0.4512), tensor(-0.4546), tensor(-0.6685), tensor(-0.3408), tensor(-0.6317), tensor(-0.5710), tensor(0.3475), tensor(-1.1447), tensor(-0.7705), tensor(-1.3268), tensor(-0.7402), tensor(-0.5812), tensor(-0.8122), tensor(-1.1047), tensor(0.2474), tensor(-0.3880), tensor(-1.2001), tensor(-0.2176), tensor(-0.4149), tensor(-1.2067), tensor(-0.2562), tensor(0.0879), tensor(-0.8258), tensor(-0.4457), tensor(-1.2846), tensor(-1.0299), tensor(-0.5050), tensor(-0.2014), tensor(-0.2259), tensor(-0.7119), tensor(-0.0058), tensor(0.4182), tensor(-1.0739), tensor(-0.5377), tensor(-0.4208), tensor(-0.0659), tensor(0.2474), tensor(-0.1381), tensor(-0.9424), tensor(-0.4635), tensor(-0.0512), tensor(-0.8697), tensor(-0.7737), tensor(-0.8257), tensor(-0.8368), tensor(-1.2367), tensor(-0.1692), tensor(-0.2

152it [2:02:00, 47.62s/it]


Start Epoch 152
Rewards: [tensor(-1.0847), tensor(-0.4274), tensor(-0.0202), tensor(-0.6379), tensor(-0.5633), tensor(-1.0295), tensor(-0.2029), tensor(-0.5027), tensor(-0.8253), tensor(-0.3665), tensor(-0.6543), tensor(-1.1484), tensor(0.2144), tensor(-0.2020), tensor(-1.1864), tensor(-0.5834), tensor(-0.6635), tensor(-0.4264), tensor(0.0364), tensor(-0.5556), tensor(-0.7801), tensor(-1.2105), tensor(-0.2014), tensor(-0.5666), tensor(-0.8742), tensor(-0.5343), tensor(-0.8903), tensor(-0.1177), tensor(-0.7027), tensor(-0.1626), tensor(0.2353), tensor(-0.9664), tensor(-1.0642), tensor(-1.0987), tensor(-0.2906), tensor(-0.9842), tensor(-1.2038), tensor(-1.3286), tensor(-0.4805), tensor(-0.3958), tensor(-1.2017), tensor(0.3151), tensor(-0.2888), tensor(-0.3293), tensor(-1.2912), tensor(-1.2347), tensor(0.0523), tensor(-0.5080), tensor(-0.6290), tensor(-0.5857), tensor(-1.0201), tensor(-0.6330), tensor(-0.4629), tensor(-0.2100), tensor(-1.0200), tensor(-0.2114), tensor(0.3995), tensor(-0.

153it [2:02:51, 48.61s/it]


Start Epoch 153
Rewards: [tensor(-0.6624), tensor(-0.3001), tensor(-0.2056), tensor(-0.5633), tensor(-0.1921), tensor(-0.5615), tensor(-1.0128), tensor(-0.7501), tensor(-1.0185), tensor(-1.1053), tensor(-0.8452), tensor(-0.1657), tensor(-0.4101), tensor(-0.4356), tensor(-0.2918), tensor(0.0330), tensor(-0.5695), tensor(-1.1340), tensor(0.1766), tensor(-0.3332), tensor(-0.0212), tensor(-0.9087), tensor(-0.8452), tensor(-0.8197), tensor(-0.8534), tensor(-0.7898), tensor(-0.3009), tensor(0.0672), tensor(-0.3241), tensor(-1.7994), tensor(-0.4233), tensor(-0.5240), tensor(-0.1936), tensor(-1.0032), tensor(-0.1249), tensor(-0.6731), tensor(-0.3837), tensor(-1.1944), tensor(-0.5685), tensor(-0.9651), tensor(-0.4185), tensor(-0.9644), tensor(-0.3666), tensor(-0.2901), tensor(-0.9820), tensor(-0.7912), tensor(-0.1601), tensor(-1.1829), tensor(-0.6047), tensor(-0.1311), tensor(-0.2219), tensor(-0.4600), tensor(-0.2986), tensor(-0.0331), tensor(-0.5995), tensor(-0.7425), tensor(-0.4076), tensor(

154it [2:03:41, 48.97s/it]


Start Epoch 154
Rewards: [tensor(-0.5695), tensor(0.0239), tensor(-0.7417), tensor(-0.6585), tensor(-1.1580), tensor(-0.9344), tensor(-0.6070), tensor(-0.9863), tensor(-0.5412), tensor(-1.1834), tensor(-0.6880), tensor(-0.7102), tensor(-0.8238), tensor(-0.2142), tensor(-1.0615), tensor(-0.5154), tensor(-0.8921), tensor(-1.0016), tensor(-0.1626), tensor(-0.4921), tensor(-0.3605), tensor(-0.5666), tensor(-0.9404), tensor(-0.4782), tensor(-0.1601), tensor(-0.1749), tensor(-0.4507), tensor(0.4485), tensor(-0.5538), tensor(-0.1189), tensor(-0.6346), tensor(-1.0622), tensor(-0.6656), tensor(0.0585), tensor(-1.0100), tensor(-0.2888), tensor(-0.8004), tensor(-0.7596), tensor(-1.4410), tensor(-0.1961), tensor(-0.7313), tensor(-0.1238), tensor(-0.9604), tensor(-0.8019), tensor(-0.4341), tensor(-0.1156), tensor(-0.2406), tensor(-0.5857), tensor(-0.7235), tensor(0.0698), tensor(-0.5094), tensor(-0.4050), tensor(-0.0661), tensor(-0.6617), tensor(-0.0972), tensor(0.2278), tensor(-0.5466), tensor(-1

155it [2:04:25, 47.70s/it]


Start Epoch 155
Rewards: [tensor(-0.7313), tensor(-0.8794), tensor(-0.5877), tensor(-0.8955), tensor(-0.1787), tensor(-0.9010), tensor(0.0332), tensor(-1.0831), tensor(0.0432), tensor(-0.8769), tensor(-1.1352), tensor(-0.2997), tensor(-0.7809), tensor(-0.5118), tensor(-0.7996), tensor(-0.6001), tensor(-0.9726), tensor(-0.0479), tensor(-0.1599), tensor(-0.5459), tensor(0.2061), tensor(-0.5834), tensor(-0.9273), tensor(0.0953), tensor(-0.4196), tensor(-0.5219), tensor(-0.2167), tensor(-1.1069), tensor(-0.6385), tensor(-0.9628), tensor(-0.4752), tensor(-0.9461), tensor(-0.4570), tensor(-0.6918), tensor(-0.3012), tensor(-0.8696), tensor(-0.1606), tensor(-0.5266), tensor(-0.3193), tensor(-0.4235), tensor(-0.1936), tensor(-1.0514), tensor(-0.8810), tensor(-0.7569), tensor(-0.2173), tensor(-0.8987), tensor(-0.4600), tensor(-0.6127), tensor(-0.0882), tensor(-0.8309), tensor(-0.1859), tensor(-1.1427), tensor(-0.3827), tensor(-0.8500), tensor(-0.7024), tensor(-0.8831), tensor(-0.5538), tensor(-

156it [2:05:15, 48.15s/it]


Start Epoch 156
Rewards: [tensor(-0.5049), tensor(-0.5614), tensor(-0.0531), tensor(0.0610), tensor(-0.8514), tensor(-1.1931), tensor(-0.2562), tensor(-0.8289), tensor(-0.6543), tensor(-0.2931), tensor(-0.1363), tensor(-0.9139), tensor(-0.9951), tensor(-0.7187), tensor(-0.2659), tensor(-0.0805), tensor(-0.0496), tensor(-1.1586), tensor(-0.6014), tensor(-1.2292), tensor(-0.3251), tensor(-1.0744), tensor(-0.5118), tensor(-0.8619), tensor(-0.8710), tensor(-0.6587), tensor(-0.7669), tensor(-0.2884), tensor(-0.4664), tensor(-0.9091), tensor(-0.2560), tensor(-0.6544), tensor(-0.9879), tensor(-1.0965), tensor(0.1730), tensor(-0.1219), tensor(-0.8630), tensor(-0.3603), tensor(-0.2056), tensor(-0.5710), tensor(-0.8847), tensor(-0.3070), tensor(-0.5557), tensor(-0.2018), tensor(-0.4805), tensor(-0.9380), tensor(-0.5877), tensor(-1.2276), tensor(0.0623), tensor(-0.8022), tensor(-1.0390), tensor(-0.6617), tensor(-1.0669), tensor(-0.3345), tensor(-0.6366), tensor(0.6373), tensor(-1.2846), tensor(-

157it [2:06:00, 47.38s/it]


Start Epoch 157
Rewards: [tensor(-0.6345), tensor(-1.1134), tensor(-0.8197), tensor(-0.3827), tensor(-0.0543), tensor(0.1691), tensor(0.1763), tensor(-0.1189), tensor(-1.2367), tensor(-0.9317), tensor(-0.8309), tensor(-0.5117), tensor(-0.8010), tensor(-0.8741), tensor(-0.2622), tensor(-0.8151), tensor(0.2283), tensor(-0.5841), tensor(-0.4621), tensor(0.0296), tensor(-1.3895), tensor(-0.7976), tensor(-0.8476), tensor(-0.3376), tensor(-0.7419), tensor(-0.0041), tensor(-0.2661), tensor(-1.0244), tensor(-0.0013), tensor(-0.0985), tensor(-0.9154), tensor(-0.6690), tensor(-0.4673), tensor(-0.6432), tensor(-0.5309), tensor(0.0828), tensor(-0.4233), tensor(0.3286), tensor(-1.2187), tensor(-0.2673), tensor(-0.2219), tensor(-1.1943), tensor(-0.6952), tensor(-0.6695), tensor(-0.2368), tensor(-0.9815), tensor(-0.1178), tensor(-0.6919), tensor(0.0330), tensor(0.4885), tensor(-0.3216), tensor(-0.3428), tensor(-1.0043), tensor(-0.5103), tensor(-0.7480), tensor(-0.4052), tensor(-0.4948), tensor(0.028

158it [2:06:40, 45.13s/it]


Start Epoch 158
Rewards: [tensor(-0.9026), tensor(-0.3456), tensor(0.0268), tensor(-0.2204), tensor(-0.0903), tensor(-0.7334), tensor(-1.0145), tensor(-0.8535), tensor(-0.9767), tensor(-0.7723), tensor(-0.8625), tensor(-0.4907), tensor(-0.2566), tensor(-0.6208), tensor(-0.3693), tensor(-0.3995), tensor(-0.9685), tensor(-1.2398), tensor(-0.8975), tensor(-0.3816), tensor(-0.4300), tensor(-0.2751), tensor(-0.6282), tensor(-0.6364), tensor(-0.4155), tensor(-0.1315), tensor(-1.1543), tensor(-0.5849), tensor(-0.6262), tensor(-0.0847), tensor(-0.9604), tensor(-0.6243), tensor(-1.0538), tensor(-0.6009), tensor(-0.1690), tensor(-0.7102), tensor(-0.2219), tensor(0.2755), tensor(0.0428), tensor(-0.0314), tensor(-0.8150), tensor(-0.5849), tensor(-0.6520), tensor(-0.5272), tensor(-0.4886), tensor(-0.0046), tensor(-0.9890), tensor(-1.1389), tensor(-1.2064), tensor(-0.5082), tensor(-0.5010), tensor(-0.5192), tensor(-1.1272), tensor(-0.9979), tensor(-0.7897), tensor(0.2214), tensor(-0.0927), tensor(-

159it [2:07:23, 44.35s/it]


Start Epoch 159
Rewards: [tensor(-0.7298), tensor(-0.3681), tensor(-0.2786), tensor(-0.9398), tensor(-0.8407), tensor(0.1887), tensor(-0.2812), tensor(0.2962), tensor(-0.0895), tensor(-0.6666), tensor(-0.4313), tensor(-0.9479), tensor(-0.3805), tensor(0.2803), tensor(-0.4304), tensor(-0.1626), tensor(0.0155), tensor(-0.7723), tensor(-0.9557), tensor(-0.8776), tensor(-0.9873), tensor(1.2242), tensor(-0.0141), tensor(-1.2055), tensor(-0.9648), tensor(-0.8109), tensor(-0.9547), tensor(-0.1081), tensor(-0.2615), tensor(0.0432), tensor(0.0053), tensor(-0.9283), tensor(-0.1626), tensor(0.1693), tensor(-0.2353), tensor(0.0861), tensor(-0.5029), tensor(-1.2123), tensor(0.0877), tensor(-0.4729), tensor(-0.6653), tensor(-0.4635), tensor(-0.7649), tensor(-0.3477), tensor(-0.5266), tensor(-1.0478), tensor(-0.4601), tensor(-0.8148), tensor(-1.0520), tensor(-1.0669), tensor(-1.1749), tensor(-0.5652), tensor(-0.6896), tensor(-0.7745), tensor(-0.4790), tensor(-0.8390), tensor(0.3971), tensor(-0.6653)

160it [2:08:05, 43.83s/it]


Start Epoch 160
Rewards: [tensor(-0.1571), tensor(-1.1192), tensor(-1.0378), tensor(-0.7873), tensor(-0.5839), tensor(-0.4948), tensor(0.0189), tensor(0.0654), tensor(-1.0030), tensor(-0.8535), tensor(-0.3816), tensor(0.0828), tensor(0.1902), tensor(-0.0371), tensor(-1.0040), tensor(-0.1372), tensor(-0.6163), tensor(-0.9962), tensor(0.2086), tensor(-1.2347), tensor(-0.8588), tensor(-1.0984), tensor(-0.3576), tensor(-1.3145), tensor(-1.1621), tensor(-0.4461), tensor(-0.9154), tensor(-0.3666), tensor(-0.0336), tensor(-0.3576), tensor(-0.3937), tensor(-0.2686), tensor(-0.3639), tensor(-0.1601), tensor(-0.0882), tensor(-0.2427), tensor(-0.6585), tensor(-0.1706), tensor(-0.8890), tensor(-0.7347), tensor(-0.3937), tensor(-1.2592), tensor(-0.2855), tensor(-1.3268), tensor(-0.0701), tensor(-0.8490), tensor(-0.9312), tensor(-0.1161), tensor(-0.2462), tensor(-0.2165), tensor(0.3402), tensor(-0.4016), tensor(-0.6685), tensor(-0.1817), tensor(-1.1944), tensor(-1.1543), tensor(-1.0777), tensor(-0.

161it [2:08:57, 46.27s/it]


Start Epoch 161
Rewards: [tensor(-0.3880), tensor(0.2180), tensor(-0.6751), tensor(-0.6666), tensor(-0.4715), tensor(-0.8278), tensor(0.0051), tensor(-1.3964), tensor(-0.6754), tensor(-0.7448), tensor(-0.3545), tensor(-1.4086), tensor(-1.1234), tensor(0.3604), tensor(-0.6880), tensor(-1.0831), tensor(-0.0633), tensor(-0.9312), tensor(0.2677), tensor(-0.2055), tensor(-0.9236), tensor(-0.6158), tensor(-0.1189), tensor(-1.1950), tensor(-1.1372), tensor(-0.6752), tensor(-0.7590), tensor(-0.4793), tensor(-1.1868), tensor(-0.6609), tensor(-0.6319), tensor(-0.1302), tensor(0.0301), tensor(-0.5311), tensor(-0.1657), tensor(-0.4436), tensor(-0.7869), tensor(-0.2436), tensor(-0.5082), tensor(-0.3917), tensor(-0.9208), tensor(-0.2475), tensor(-0.7859), tensor(-0.2279), tensor(0.0905), tensor(-0.2698), tensor(-0.0496), tensor(-1.0312), tensor(-0.9209), tensor(-0.2284), tensor(-0.3426), tensor(-0.3833), tensor(-0.4830), tensor(-0.6013), tensor(-0.9617), tensor(-0.2368), tensor(-0.6330), tensor(-0.

162it [2:09:41, 45.62s/it]


Start Epoch 162
Rewards: [tensor(0.1368), tensor(-0.4830), tensor(-0.2954), tensor(-0.3579), tensor(-0.4501), tensor(-0.0805), tensor(-0.1834), tensor(-0.5850), tensor(-1.2011), tensor(-0.0701), tensor(-0.1251), tensor(0.1387), tensor(-0.5553), tensor(0.4166), tensor(-0.8746), tensor(-0.2291), tensor(-0.9859), tensor(-0.5857), tensor(-0.0479), tensor(-0.3517), tensor(-1.0756), tensor(-0.0138), tensor(0.0897), tensor(-0.2887), tensor(-0.5082), tensor(-0.3006), tensor(0.2703), tensor(-1.3968), tensor(0.0416), tensor(-1.1352), tensor(-0.5565), tensor(-0.8296), tensor(-0.5360), tensor(-0.6426), tensor(-0.3693), tensor(0.1674), tensor(-0.8147), tensor(-1.0164), tensor(-0.5125), tensor(0.2203), tensor(-1.2244), tensor(-0.8499), tensor(-1.2087), tensor(-0.3379), tensor(-0.3624), tensor(0.2752), tensor(-0.2285), tensor(-0.1646), tensor(-0.8619), tensor(-0.1347), tensor(-0.1220), tensor(-0.5102), tensor(-0.5420), tensor(-1.2367), tensor(-1.0419), tensor(-0.9863), tensor(-0.0750), tensor(-0.027

163it [2:10:26, 45.50s/it]


Start Epoch 163
Rewards: [tensor(-0.8226), tensor(-0.4743), tensor(-0.1637), tensor(-0.3708), tensor(-0.8296), tensor(-0.1156), tensor(-1.2067), tensor(-0.4546), tensor(-0.6426), tensor(-0.6809), tensor(-0.5551), tensor(-0.9077), tensor(-0.4673), tensor(-0.3682), tensor(-0.1684), tensor(-0.1879), tensor(0.0118), tensor(-0.9847), tensor(-1.0184), tensor(-0.8794), tensor(-1.3268), tensor(0.1631), tensor(-0.2284), tensor(0.1222), tensor(0.0268), tensor(-0.6379), tensor(-0.1706), tensor(-0.2562), tensor(-0.9986), tensor(-0.5794), tensor(-0.3704), tensor(-0.0041), tensor(0.2180), tensor(-1.0790), tensor(-0.4490), tensor(-0.8185), tensor(-0.4196), tensor(-0.9663), tensor(-0.9205), tensor(-0.6644), tensor(-0.8831), tensor(-0.2364), tensor(-0.4790), tensor(-0.4590), tensor(-1.2161), tensor(-0.5694), tensor(-0.9030), tensor(-0.1784), tensor(-0.0058), tensor(-0.1044), tensor(-0.8436), tensor(-0.4310), tensor(-0.5556), tensor(-0.1503), tensor(-0.0166), tensor(-0.3454), tensor(-0.5578), tensor(-0

164it [2:11:09, 44.46s/it]


Start Epoch 164
Rewards: [tensor(-0.4310), tensor(-0.9360), tensor(-0.3603), tensor(0.5181), tensor(-0.3267), tensor(-0.7206), tensor(-0.5718), tensor(-0.8885), tensor(-0.0574), tensor(0.3271), tensor(0.0251), tensor(-0.5657), tensor(-0.7620), tensor(0.0581), tensor(-0.0314), tensor(0.1363), tensor(0.1531), tensor(-0.8073), tensor(0.1747), tensor(-0.6208), tensor(-0.3964), tensor(0.1553), tensor(-0.3470), tensor(-0.2149), tensor(-0.3264), tensor(0.1368), tensor(-0.3018), tensor(-0.7859), tensor(-0.5425), tensor(-0.2100), tensor(-0.0058), tensor(-0.6319), tensor(-0.6593), tensor(-0.5797), tensor(-1.0148), tensor(-0.1009), tensor(0.0296), tensor(-0.7521), tensor(-0.3578), tensor(-0.4832), tensor(-1.1355), tensor(-0.6160), tensor(-0.2935), tensor(-0.6429), tensor(-0.1249), tensor(-0.0621), tensor(-0.8625), tensor(-0.1507), tensor(-0.9158), tensor(-0.9154), tensor(-1.1551), tensor(-0.6759), tensor(-0.0833), tensor(-0.6706), tensor(-1.1931), tensor(-1.4601), tensor(-0.9617), tensor(-1.2040

165it [2:11:51, 43.87s/it]


Start Epoch 165
Rewards: [tensor(-0.6685), tensor(-0.7047), tensor(-0.7912), tensor(-0.3457), tensor(-0.0646), tensor(-0.0679), tensor(-0.5348), tensor(-0.3267), tensor(0.2061), tensor(-1.1047), tensor(-0.1049), tensor(-0.4662), tensor(-1.0256), tensor(0.0475), tensor(-0.6772), tensor(-1.2028), tensor(-0.7176), tensor(-0.5863), tensor(-0.5241), tensor(-1.1286), tensor(-0.1262), tensor(-0.7667), tensor(-1.1863), tensor(-0.1761), tensor(-0.7529), tensor(-0.4389), tensor(-0.3216), tensor(-0.1606), tensor(-0.2036), tensor(-0.6245), tensor(-0.2842), tensor(-0.6012), tensor(-0.2242), tensor(-0.3379), tensor(-0.1736), tensor(-0.1784), tensor(-0.7745), tensor(-0.4277), tensor(-0.5092), tensor(-0.5069), tensor(-0.5657), tensor(-0.5407), tensor(-0.5857), tensor(-0.2798), tensor(-0.4007), tensor(-0.8776), tensor(-0.6262), tensor(-1.0299), tensor(-0.2356), tensor(-0.2978), tensor(-1.1450), tensor(-0.6878), tensor(0.1368), tensor(-0.4795), tensor(-0.9506), tensor(0.2755), tensor(-0.2567), tensor(0

166it [2:12:39, 45.03s/it]


Start Epoch 166
Rewards: [tensor(-0.7705), tensor(-1.1207), tensor(-0.0499), tensor(-0.1532), tensor(-0.2364), tensor(-0.8339), tensor(-0.1381), tensor(-0.2858), tensor(-0.6116), tensor(-0.7309), tensor(0.1445), tensor(-0.1778), tensor(-0.1503), tensor(-0.4433), tensor(-0.6821), tensor(-1.0351), tensor(-1.1574), tensor(-0.2224), tensor(-0.5360), tensor(-0.8654), tensor(-0.2614), tensor(-1.3955), tensor(0.0698), tensor(0.0242), tensor(-0.2800), tensor(-0.5599), tensor(-0.1293), tensor(-0.9770), tensor(-0.8741), tensor(-0.5751), tensor(-0.8696), tensor(-0.8386), tensor(-0.4915), tensor(-0.7733), tensor(-1.1058), tensor(-0.6262), tensor(-0.0112), tensor(-0.5010), tensor(-0.2957), tensor(-0.4601), tensor(-0.5319), tensor(-0.2114), tensor(-1.2562), tensor(-0.5938), tensor(-0.0815), tensor(-1.0035), tensor(-0.3902), tensor(-0.0429), tensor(-0.0685), tensor(-0.8494), tensor(-0.8632), tensor(-0.8864), tensor(-0.6934), tensor(-0.4972), tensor(-0.5872), tensor(-0.4376), tensor(-0.8490), tensor(

167it [2:13:23, 44.81s/it]


Start Epoch 167
Rewards: [tensor(-0.1775), tensor(-0.9577), tensor(-0.4854), tensor(-1.0029), tensor(0.0989), tensor(-1.4689), tensor(-0.2886), tensor(-0.9911), tensor(-0.3896), tensor(-0.0218), tensor(-0.5081), tensor(-0.2370), tensor(-0.9248), tensor(-0.7737), tensor(-1.1321), tensor(-0.1392), tensor(0.1052), tensor(-0.2954), tensor(-0.5539), tensor(-0.1456), tensor(-0.8367), tensor(-0.4433), tensor(-0.1706), tensor(-1.2980), tensor(-0.6563), tensor(0.2522), tensor(-0.6997), tensor(-0.5081), tensor(-0.3063), tensor(-0.1381), tensor(-0.1049), tensor(-0.6438), tensor(-0.3896), tensor(-1.0200), tensor(-0.3198), tensor(-0.1606), tensor(-0.3289), tensor(-0.5557), tensor(-1.2582), tensor(-1.3268), tensor(-0.0142), tensor(-0.9815), tensor(-0.8312), tensor(-0.1013), tensor(-0.6665), tensor(-0.6385), tensor(-0.2053), tensor(-0.4720), tensor(-0.1806), tensor(-1.0401), tensor(-0.8253), tensor(0.3971), tensor(-0.5027), tensor(-0.6751), tensor(-0.6923), tensor(0.0165), tensor(0.0640), tensor(-0.

168it [2:14:11, 45.67s/it]


Start Epoch 168
Rewards: [tensor(-0.2835), tensor(0.4593), tensor(-0.5827), tensor(0.1574), tensor(-0.8302), tensor(-0.6808), tensor(0.3146), tensor(-1.0185), tensor(-0.2787), tensor(-1.2165), tensor(-0.0943), tensor(-0.6012), tensor(-0.4800), tensor(0.0702), tensor(-0.1461), tensor(-0.9251), tensor(-0.3615), tensor(-0.0247), tensor(-0.9089), tensor(-0.2835), tensor(0.0320), tensor(-0.0648), tensor(-0.7947), tensor(-0.9824), tensor(-0.4715), tensor(-0.4418), tensor(-0.0730), tensor(-0.4835), tensor(-0.9317), tensor(-1.0442), tensor(-0.4501), tensor(-0.5737), tensor(-0.8386), tensor(-1.2457), tensor(-0.7316), tensor(-0.1868), tensor(-0.1626), tensor(0.1368), tensor(-0.3009), tensor(-0.0238), tensor(-0.9890), tensor(-0.0808), tensor(0.0857), tensor(-0.4005), tensor(-1.2670), tensor(-0.9365), tensor(-0.6838), tensor(-0.1519), tensor(-1.0296), tensor(-1.0281), tensor(-0.8274), tensor(0.0239), tensor(-1.1388), tensor(0.1127), tensor(-0.4662), tensor(-0.3583), tensor(0.1527), tensor(-0.2053

169it [2:14:59, 46.37s/it]


Start Epoch 169
Rewards: [tensor(-0.1369), tensor(-0.8481), tensor(-0.8553), tensor(-1.4515), tensor(-0.7761), tensor(-0.3364), tensor(-0.4730), tensor(-1.1689), tensor(-0.2015), tensor(-0.5657), tensor(-0.4066), tensor(-1.3563), tensor(-0.2219), tensor(0.1010), tensor(-0.3650), tensor(-0.1358), tensor(-1.0106), tensor(-0.8122), tensor(-0.5126), tensor(-0.1960), tensor(-0.8612), tensor(-1.0981), tensor(-0.4501), tensor(-1.2037), tensor(-0.0936), tensor(-0.7504), tensor(-0.6923), tensor(-0.9617), tensor(-0.7873), tensor(-0.6314), tensor(-1.1335), tensor(-0.7243), tensor(-0.4667), tensor(-0.3936), tensor(-0.3216), tensor(-0.4962), tensor(-0.0603), tensor(-0.5221), tensor(-0.9382), tensor(-0.2971), tensor(-1.0104), tensor(-1.0984), tensor(-0.8386), tensor(-0.2625), tensor(-0.3050), tensor(-1.1377), tensor(-0.4830), tensor(-0.1671), tensor(-0.3343), tensor(-0.4273), tensor(-0.6600), tensor(-0.0479), tensor(-0.4790), tensor(-0.5614), tensor(-0.9191), tensor(-0.0467), tensor(-0.2291), tenso

170it [2:15:46, 46.78s/it]


Start Epoch 170
Rewards: [tensor(-0.7697), tensor(-0.8366), tensor(-0.3958), tensor(-1.0148), tensor(-0.1130), tensor(-0.3059), tensor(-0.6919), tensor(-0.5117), tensor(-1.0832), tensor(-0.1626), tensor(-1.0281), tensor(-0.7521), tensor(-0.7021), tensor(0.5131), tensor(-0.2774), tensor(-0.3739), tensor(-0.4581), tensor(-0.9461), tensor(-0.1922), tensor(-0.2971), tensor(-0.4438), tensor(-0.3241), tensor(-0.4684), tensor(-0.2259), tensor(-0.2109), tensor(-0.0408), tensor(-0.8929), tensor(-0.5277), tensor(-0.5320), tensor(-0.2517), tensor(-0.5264), tensor(-1.2068), tensor(-0.3201), tensor(-0.7678), tensor(-0.4228), tensor(-0.5066), tensor(-0.4443), tensor(-1.1388), tensor(-0.3408), tensor(-0.6752), tensor(-0.4891), tensor(-0.5585), tensor(-0.2721), tensor(-0.6385), tensor(-0.5320), tensor(-0.8339), tensor(-0.6045), tensor(-0.6528), tensor(-0.9293), tensor(-0.8043), tensor(-1.0132), tensor(-0.9285), tensor(-0.9052), tensor(0.2278), tensor(-0.6236), tensor(0.1587), tensor(-1.2534), tensor(

171it [2:16:34, 47.05s/it]


Start Epoch 171
Rewards: [tensor(-0.4627), tensor(-0.6759), tensor(-0.2721), tensor(-0.2135), tensor(-0.8514), tensor(0.1902), tensor(-0.0621), tensor(-0.7460), tensor(-0.7862), tensor(-0.3805), tensor(-0.9347), tensor(-0.5190), tensor(-0.7640), tensor(-0.5411), tensor(-0.2406), tensor(-0.4438), tensor(-0.5754), tensor(-0.5713), tensor(-0.4100), tensor(-1.0674), tensor(0.0490), tensor(-0.0645), tensor(-0.7005), tensor(-0.8664), tensor(-0.3944), tensor(-1.1237), tensor(-0.6153), tensor(0.0768), tensor(-1.0439), tensor(-0.0228), tensor(-0.2887), tensor(-0.5514), tensor(-0.6385), tensor(-0.8307), tensor(-0.5637), tensor(-1.1012), tensor(-0.8654), tensor(-1.1110), tensor(-0.5415), tensor(-0.1326), tensor(-0.2952), tensor(-0.5621), tensor(-0.9556), tensor(-0.1081), tensor(-0.8174), tensor(0.2761), tensor(-0.4891), tensor(-0.6603), tensor(-0.6280), tensor(-0.8005), tensor(-1.1670), tensor(-0.2794), tensor(-0.4219), tensor(0.0472), tensor(0.2703), tensor(-0.2036), tensor(0.0947), tensor(-1.1

172it [2:17:29, 49.35s/it]


Start Epoch 172
Rewards: [tensor(-1.1950), tensor(-0.9269), tensor(-0.6593), tensor(-0.4501), tensor(-0.5246), tensor(-0.9517), tensor(-0.7873), tensor(-0.9759), tensor(-0.3896), tensor(-0.1938), tensor(-0.5428), tensor(0.1693), tensor(-1.0177), tensor(-0.6153), tensor(-0.5551), tensor(-0.0772), tensor(-0.5877), tensor(-0.9718), tensor(0.1887), tensor(-0.7500), tensor(-0.9347), tensor(-0.4083), tensor(-0.7100), tensor(0.2630), tensor(-0.0450), tensor(-0.9414), tensor(-0.9187), tensor(-0.5122), tensor(-0.9152), tensor(0.2303), tensor(0.1318), tensor(-0.9589), tensor(0.1825), tensor(-0.7298), tensor(0.2283), tensor(-0.0966), tensor(-0.4921), tensor(-1.2409), tensor(-0.8535), tensor(-0.5308), tensor(-0.3380), tensor(-0.2740), tensor(-0.0853), tensor(-1.0607), tensor(-0.2625), tensor(-0.5154), tensor(-0.3477), tensor(-0.3289), tensor(-0.7912), tensor(-0.5565), tensor(-1.0567), tensor(-0.3704), tensor(-1.2980), tensor(0.0768), tensor(0.5804), tensor(-0.3165), tensor(-0.8718), tensor(-1.107

173it [2:18:16, 48.73s/it]


Start Epoch 173
Rewards: [tensor(-1.0781), tensor(-0.5514), tensor(-0.0908), tensor(-1.2436), tensor(-0.8874), tensor(-0.5578), tensor(-0.8711), tensor(-0.7562), tensor(-0.8278), tensor(-0.0220), tensor(-1.1286), tensor(-0.1397), tensor(0.0029), tensor(-0.6034), tensor(-0.5490), tensor(0.4741), tensor(-0.7206), tensor(-0.3192), tensor(-0.7028), tensor(-0.6116), tensor(-1.1183), tensor(-0.1655), tensor(-0.6651), tensor(-0.9155), tensor(-0.1392), tensor(-1.1960), tensor(-1.0754), tensor(-0.3286), tensor(-0.7417), tensor(-0.6007), tensor(-0.2100), tensor(0.0132), tensor(-0.5331), tensor(-0.0131), tensor(-0.5685), tensor(-0.0898), tensor(-0.2176), tensor(-1.3249), tensor(-0.5074), tensor(0.6414), tensor(-0.1108), tensor(-0.9716), tensor(-0.1488), tensor(-0.5781), tensor(0.2501), tensor(-0.6751), tensor(-0.9651), tensor(-0.6225), tensor(-0.9873), tensor(-0.3681), tensor(-0.1753), tensor(-0.8811), tensor(0.1887), tensor(-0.2877), tensor(-0.9583), tensor(-0.8676), tensor(-0.4579), tensor(-1.

174it [2:19:03, 48.30s/it]


Start Epoch 174
Rewards: [tensor(0.2061), tensor(-0.9827), tensor(-0.9685), tensor(-1.0644), tensor(-0.9131), tensor(-0.3353), tensor(-1.2140), tensor(-0.1150), tensor(-0.9060), tensor(-0.4356), tensor(-0.4715), tensor(-1.2160), tensor(-0.5715), tensor(-0.4596), tensor(-0.1189), tensor(-0.0867), tensor(-1.0042), tensor(-0.4915), tensor(-0.7664), tensor(-1.3900), tensor(-0.7979), tensor(-0.5995), tensor(-0.9030), tensor(-0.5841), tensor(-0.1204), tensor(-0.9974), tensor(-0.2151), tensor(-0.2028), tensor(-0.0474), tensor(-1.0887), tensor(-0.1293), tensor(-0.7470), tensor(-0.2520), tensor(-1.1546), tensor(-0.6879), tensor(-1.2644), tensor(0.3636), tensor(-0.1690), tensor(-0.6324), tensor(-0.4120), tensor(-1.0501), tensor(-0.0117), tensor(-1.1882), tensor(-0.7198), tensor(-0.6929), tensor(-1.1113), tensor(-0.4720), tensor(0.2309), tensor(-0.7459), tensor(-0.9089), tensor(-0.3618), tensor(-1.0781), tensor(-1.0831), tensor(-0.5023), tensor(-0.1626), tensor(-0.2414), tensor(-1.4086), tensor(

175it [2:19:50, 47.91s/it]


Start Epoch 175
Rewards: [tensor(-0.1242), tensor(-0.6458), tensor(-0.4476), tensor(-0.1299), tensor(-0.5309), tensor(-0.8654), tensor(-0.0808), tensor(0.1596), tensor(-0.7174), tensor(-1.0100), tensor(0.0019), tensor(-0.5154), tensor(-0.5015), tensor(0.1902), tensor(-1.1745), tensor(-0.0661), tensor(-0.2622), tensor(-0.1219), tensor(0.0425), tensor(-0.9187), tensor(-0.3457), tensor(-0.2056), tensor(0.2641), tensor(-0.5102), tensor(0.0425), tensor(0.0490), tensor(-0.9599), tensor(-0.5874), tensor(-1.1106), tensor(-0.0141), tensor(-0.7470), tensor(-0.2018), tensor(-0.5096), tensor(-0.1189), tensor(-0.3576), tensor(-0.4235), tensor(-0.8293), tensor(-0.7970), tensor(-1.2152), tensor(-0.6772), tensor(-0.5117), tensor(0.4603), tensor(-0.3682), tensor(-1.0954), tensor(-1.2367), tensor(-0.9479), tensor(-0.2906), tensor(-1.1703), tensor(-0.5657), tensor(-0.1657), tensor(-0.7360), tensor(-0.8778), tensor(-1.0204), tensor(-0.9751), tensor(-0.1827), tensor(-0.8975), tensor(-1.1451), tensor(-1.20

176it [2:20:40, 48.37s/it]


Start Epoch 176
Rewards: [tensor(-1.1533), tensor(-0.8382), tensor(-1.1237), tensor(0.0132), tensor(-0.2855), tensor(-0.7080), tensor(0.0051), tensor(-1.1105), tensor(-0.6532), tensor(-0.7027), tensor(-1.2470), tensor(-0.5320), tensor(0.1318), tensor(-0.9282), tensor(-0.6736), tensor(-1.1515), tensor(-0.1365), tensor(-0.4103), tensor(-0.1742), tensor(-0.1175), tensor(-1.1543), tensor(-0.6648), tensor(-1.2912), tensor(0.1425), tensor(-0.2173), tensor(-1.1829), tensor(-0.2211), tensor(0.4885), tensor(-0.7705), tensor(-1.3842), tensor(-0.9217), tensor(-0.3837), tensor(-0.4313), tensor(-0.2826), tensor(0.6373), tensor(-0.5219), tensor(-0.3149), tensor(-0.9886), tensor(-0.4948), tensor(-0.0016), tensor(-0.3818), tensor(-0.6779), tensor(-1.0390), tensor(-0.1738), tensor(0.4064), tensor(-0.1299), tensor(-0.4507), tensor(0.1943), tensor(-1.0126), tensor(-0.2578), tensor(-0.8105), tensor(-0.1299), tensor(-0.5117), tensor(0.3475), tensor(-0.0973), tensor(-0.3818), tensor(-1.1229), tensor(-0.131

177it [2:21:28, 48.22s/it]


Start Epoch 177
Rewards: [tensor(-0.5621), tensor(-0.4731), tensor(-0.8921), tensor(-0.0961), tensor(-0.4669), tensor(-0.4150), tensor(0.0877), tensor(-0.7873), tensor(0.1364), tensor(-1.1317), tensor(-0.1013), tensor(-0.6952), tensor(-1.2824), tensor(-0.8174), tensor(-0.1156), tensor(-0.6330), tensor(-1.0756), tensor(-0.4853), tensor(-1.3900), tensor(-0.8213), tensor(-0.5669), tensor(0.2979), tensor(-0.5277), tensor(-0.7928), tensor(-1.0209), tensor(-0.4881), tensor(-0.7349), tensor(-0.8339), tensor(-1.1124), tensor(-1.0544), tensor(-0.8122), tensor(0.0549), tensor(-0.3325), tensor(-0.9273), tensor(-0.2578), tensor(0.0675), tensor(-0.8437), tensor(-0.2567), tensor(0.2164), tensor(-0.3668), tensor(0.1289), tensor(-0.2774), tensor(-1.2023), tensor(0.1052), tensor(-0.5066), tensor(-0.8319), tensor(-0.1189), tensor(-0.6062), tensor(-0.3457), tensor(-0.9063), tensor(-0.1868), tensor(0.0533), tensor(-0.7047), tensor(0.1533), tensor(-0.3376), tensor(-0.7571), tensor(-0.0543), tensor(-1.2347

178it [2:22:23, 50.18s/it]


Start Epoch 178
Rewards: [tensor(-0.4891), tensor(-0.2284), tensor(-1.1450), tensor(-1.0460), tensor(-0.2346), tensor(-0.3576), tensor(-1.1474), tensor(-0.7769), tensor(-1.3227), tensor(-0.1626), tensor(-0.0854), tensor(-0.7873), tensor(-1.2457), tensor(-0.7172), tensor(-1.1542), tensor(-1.2527), tensor(-0.7559), tensor(0.1491), tensor(-0.2284), tensor(-0.6045), tensor(-0.0750), tensor(-0.7027), tensor(-0.1133), tensor(-0.5674), tensor(-0.2483), tensor(-0.4264), tensor(-0.1229), tensor(-0.7187), tensor(-0.5925), tensor(-0.1190), tensor(-1.1673), tensor(-0.5695), tensor(0.0239), tensor(-0.9827), tensor(0.2061), tensor(0.0280), tensor(-0.4351), tensor(-0.9771), tensor(-0.0833), tensor(-0.5938), tensor(-0.0598), tensor(0.0323), tensor(-0.2990), tensor(-0.6280), tensor(-0.7854), tensor(-0.3622), tensor(-0.2845), tensor(-0.3023), tensor(-0.5331), tensor(0.0286), tensor(0.0206), tensor(-0.6385), tensor(-0.7898), tensor(-0.7266), tensor(-1.3968), tensor(-0.3407), tensor(-0.6716), tensor(-0.5

179it [2:23:10, 49.32s/it]


Start Epoch 179
Rewards: [tensor(-0.0750), tensor(-0.6346), tensor(-1.2470), tensor(-0.4050), tensor(-0.2954), tensor(-0.6919), tensor(-0.3521), tensor(-0.2575), tensor(-0.7024), tensor(-1.1825), tensor(-0.0847), tensor(0.2803), tensor(-0.1751), tensor(-1.1377), tensor(-0.5857), tensor(0.0587), tensor(-1.0536), tensor(-0.9672), tensor(-0.1032), tensor(-0.3457), tensor(-0.4596), tensor(0.0268), tensor(-0.1204), tensor(-0.6385), tensor(-0.2256), tensor(-0.6812), tensor(-0.7831), tensor(-1.1931), tensor(0.0155), tensor(-1.1195), tensor(-1.0240), tensor(-1.0296), tensor(-0.7769), tensor(-0.1347), tensor(-0.7638), tensor(0.0091), tensor(-0.2368), tensor(-0.3289), tensor(-0.0621), tensor(0.3636), tensor(-0.3739), tensor(-0.1938), tensor(-0.3232), tensor(-0.5445), tensor(-0.9643), tensor(-0.9414), tensor(-0.1601), tensor(-0.7342), tensor(-1.4689), tensor(-1.1580), tensor(-0.9345), tensor(-0.7885), tensor(-1.0811), tensor(-0.4005), tensor(-0.5701), tensor(-1.0855), tensor(-0.7118), tensor(-0.

180it [2:23:54, 47.76s/it]


Start Epoch 180
Rewards: [tensor(-0.7187), tensor(0.0737), tensor(-1.1463), tensor(-0.4294), tensor(-0.3633), tensor(-0.9283), tensor(-0.2434), tensor(-0.6653), tensor(-0.3984), tensor(-0.6538), tensor(-0.3615), tensor(-0.3631), tensor(-0.1787), tensor(0.1043), tensor(-0.9713), tensor(-0.3615), tensor(-0.1137), tensor(-1.0939), tensor(-0.3326), tensor(-0.2219), tensor(-0.4100), tensor(-0.3109), tensor(-1.5759), tensor(-0.2578), tensor(0.2280), tensor(0.0531), tensor(-0.9751), tensor(-0.5812), tensor(-0.6877), tensor(-0.3066), tensor(-0.0992), tensor(-0.8334), tensor(-0.3896), tensor(-0.9071), tensor(-0.6379), tensor(0.2086), tensor(-0.9350), tensor(-0.3867), tensor(-0.2499), tensor(-0.5080), tensor(-0.7430), tensor(-1.1377), tensor(-0.6578), tensor(-0.4123), tensor(-0.5834), tensor(-0.7922), tensor(-0.4501), tensor(-0.4762), tensor(-0.9533), tensor(0.1943), tensor(-0.4796), tensor(-0.2954), tensor(-1.2138), tensor(-1.0276), tensor(-0.6379), tensor(-0.8110), tensor(-0.3306), tensor(-0.

181it [2:24:40, 47.37s/it]


Start Epoch 181
Rewards: [tensor(0.1438), tensor(-0.5657), tensor(-0.0041), tensor(0.1070), tensor(-0.2242), tensor(-1.4086), tensor(-0.3987), tensor(-0.2884), tensor(-1.2762), tensor(-1.1105), tensor(-1.1171), tensor(-0.9269), tensor(-0.5701), tensor(-0.9117), tensor(-0.7444), tensor(-1.1694), tensor(-0.2842), tensor(-1.2395), tensor(-0.4972), tensor(-0.1103), tensor(-0.6319), tensor(-0.8499), tensor(-1.0148), tensor(-1.2367), tensor(-1.3968), tensor(0.4166), tensor(-0.5553), tensor(-0.7750), tensor(-0.9169), tensor(-0.8454), tensor(-0.6058), tensor(-0.2978), tensor(-0.8296), tensor(-1.2028), tensor(-0.8769), tensor(-0.6518), tensor(-0.8005), tensor(-0.3426), tensor(-0.5797), tensor(-1.2730), tensor(-0.0243), tensor(-0.3711), tensor(-0.4891), tensor(-0.5190), tensor(-1.0762), tensor(-1.2184), tensor(-0.4140), tensor(-0.2777), tensor(-0.3152), tensor(-0.7215), tensor(-0.6172), tensor(-0.6685), tensor(-0.9847), tensor(-0.6919), tensor(-0.8293), tensor(-0.3320), tensor(-0.0645), tensor(

182it [2:25:30, 47.94s/it]


Start Epoch 182
Rewards: [tensor(-0.0142), tensor(-0.4793), tensor(-0.0532), tensor(-0.3181), tensor(-0.9191), tensor(0.2203), tensor(-0.6282), tensor(-0.4511), tensor(-0.8955), tensor(-0.8126), tensor(-1.0419), tensor(-0.0595), tensor(-1.5759), tensor(-0.1108), tensor(-0.5309), tensor(-0.1460), tensor(-0.5480), tensor(-0.5771), tensor(-0.6385), tensor(-1.1638), tensor(-0.2884), tensor(-0.5562), tensor(-1.0708), tensor(-0.5599), tensor(-0.2935), tensor(-0.5096), tensor(0.4175), tensor(-0.0735), tensor(-0.3818), tensor(-0.2957), tensor(-0.4273), tensor(-1.2542), tensor(-0.5744), tensor(-0.0251), tensor(-0.4145), tensor(-0.6538), tensor(-0.7697), tensor(-0.5341), tensor(-1.1700), tensor(-0.5462), tensor(-0.3879), tensor(-0.6385), tensor(-0.0480), tensor(-0.8195), tensor(-1.1944), tensor(-1.0016), tensor(-0.5685), tensor(-0.6034), tensor(-1.0751), tensor(-0.8746), tensor(-0.0821), tensor(-0.5585), tensor(-0.7885), tensor(-0.1358), tensor(-0.5539), tensor(-0.7873), tensor(-0.3964), tensor

183it [2:26:16, 47.57s/it]


Start Epoch 183
Rewards: [tensor(-0.6772), tensor(0.1438), tensor(-1.2470), tensor(-0.8201), tensor(-0.7521), tensor(-0.4188), tensor(-0.7387), tensor(-0.6821), tensor(-0.2155), tensor(-0.4069), tensor(0.0206), tensor(-0.0803), tensor(0.3516), tensor(-0.1742), tensor(-0.6540), tensor(-0.8476), tensor(0.1909), tensor(-0.8369), tensor(-0.5259), tensor(-0.1791), tensor(-0.4389), tensor(-0.4360), tensor(-0.8587), tensor(-1.0294), tensor(-0.6256), tensor(-0.7801), tensor(-0.2475), tensor(0.4175), tensor(0.0689), tensor(-0.9349), tensor(-1.0940), tensor(-0.3630), tensor(-0.7594), tensor(-0.7713), tensor(0.5131), tensor(-0.3332), tensor(-0.3152), tensor(-0.9412), tensor(-0.4579), tensor(-1.0033), tensor(-0.3267), tensor(-1.2950), tensor(-1.2093), tensor(-0.6752), tensor(0.2282), tensor(-0.4832), tensor(-0.6772), tensor(-0.2709), tensor(-0.6141), tensor(-1.1377), tensor(0.0252), tensor(0.0897), tensor(-0.7444), tensor(-0.0979), tensor(0.2752), tensor(-0.6821), tensor(-0.0265), tensor(-1.0847)

184it [2:27:03, 47.41s/it]


Start Epoch 184
Rewards: [tensor(-0.7295), tensor(-0.3708), tensor(-0.9349), tensor(-0.4360), tensor(-0.5611), tensor(-1.0777), tensor(-0.2409), tensor(-1.1737), tensor(-0.5308), tensor(-0.9713), tensor(-0.4397), tensor(-0.0399), tensor(-0.7733), tensor(-0.2259), tensor(0.0242), tensor(-0.6262), tensor(-0.2800), tensor(-0.1893), tensor(-0.7674), tensor(-0.4277), tensor(-0.3723), tensor(-0.8392), tensor(-0.1674), tensor(-1.2459), tensor(-0.9589), tensor(-0.7678), tensor(-0.8729), tensor(-0.3312), tensor(-0.9986), tensor(0.2203), tensor(-0.6385), tensor(-1.1175), tensor(-0.4341), tensor(-0.9617), tensor(-0.3301), tensor(-0.8742), tensor(-0.1461), tensor(0.3286), tensor(-0.2869), tensor(-0.1532), tensor(-0.3116), tensor(-0.0855), tensor(-0.4940), tensor(0.0585), tensor(-0.4249), tensor(-0.8319), tensor(0.0971), tensor(-0.5170), tensor(-0.4364), tensor(-0.9718), tensor(0.0531), tensor(-1.3268), tensor(0.1747), tensor(-0.0701), tensor(0.0678), tensor(0.1425), tensor(-1.2698), tensor(0.3292

185it [2:27:50, 47.23s/it]


Start Epoch 185
Rewards: [tensor(-0.1606), tensor(-0.5068), tensor(-1.1407), tensor(0.0074), tensor(-0.7594), tensor(-0.8992), tensor(0.1066), tensor(0.3475), tensor(-1.1864), tensor(-1.2002), tensor(-0.6201), tensor(-1.0042), tensor(-1.1812), tensor(-0.6118), tensor(-0.0450), tensor(0.4593), tensor(-0.3605), tensor(-0.2979), tensor(-0.6034), tensor(-0.2455), tensor(-1.2127), tensor(-0.5560), tensor(-0.7620), tensor(-0.0621), tensor(-0.1626), tensor(-0.5207), tensor(-0.7569), tensor(-0.4380), tensor(0.0698), tensor(-0.9344), tensor(0.4541), tensor(-0.0428), tensor(-0.6388), tensor(-0.6585), tensor(0.1309), tensor(0.3102), tensor(-0.7729), tensor(-1.0113), tensor(-1.2419), tensor(-1.1706), tensor(-0.2338), tensor(-1.0334), tensor(-0.1842), tensor(-0.3332), tensor(0.0532), tensor(0.1318), tensor(-0.5726), tensor(-0.3149), tensor(-0.9176), tensor(0.1257), tensor(-1.0128), tensor(-0.6070), tensor(-0.8794), tensor(0.0189), tensor(-0.2100), tensor(0.3344), tensor(-0.6659), tensor(-0.5840), 

186it [2:28:36, 46.79s/it]


Start Epoch 186
Rewards: [tensor(-0.6733), tensor(-0.4853), tensor(-1.3227), tensor(-0.1626), tensor(-0.8201), tensor(-0.9890), tensor(-0.0949), tensor(-1.1931), tensor(-0.3141), tensor(-0.2797), tensor(-0.0714), tensor(-1.0281), tensor(0.1169), tensor(-0.8705), tensor(0.4166), tensor(0.1493), tensor(-0.9624), tensor(-0.6385), tensor(-0.8711), tensor(-0.6451), tensor(0.0585), tensor(-1.2160), tensor(-0.1738), tensor(-1.2534), tensor(-0.4546), tensor(-0.2431), tensor(-0.1637), tensor(-0.3201), tensor(0.0765), tensor(-0.2843), tensor(-0.4436), tensor(-0.7149), tensor(0.0971), tensor(-0.8356), tensor(-1.3315), tensor(-0.1156), tensor(-1.0520), tensor(-0.0979), tensor(-0.5412), tensor(-0.4076), tensor(-0.2738), tensor(-1.3790), tensor(-0.6706), tensor(0.0296), tensor(-0.2270), tensor(-0.2952), tensor(-0.3633), tensor(-0.6282), tensor(-0.8756), tensor(-0.5695), tensor(-0.6743), tensor(-0.3816), tensor(-0.2382), tensor(-0.4273), tensor(-1.3227), tensor(-1.0483), tensor(-0.7500), tensor(-1.0

187it [2:29:26, 47.63s/it]


Start Epoch 187
Rewards: [tensor(0.1127), tensor(-1.0221), tensor(0.2140), tensor(-0.9866), tensor(-0.0730), tensor(-0.7360), tensor(-0.6532), tensor(-0.2997), tensor(-0.5611), tensor(-0.9879), tensor(-1.1915), tensor(-0.0220), tensor(-0.7138), tensor(-0.7518), tensor(-0.0384), tensor(-0.1913), tensor(-0.1882), tensor(-0.1922), tensor(-0.0474), tensor(-0.0867), tensor(-0.5023), tensor(-0.4140), tensor(-0.6889), tensor(-0.0648), tensor(-1.0564), tensor(-0.0901), tensor(-1.2072), tensor(-0.4210), tensor(-0.6751), tensor(-0.1011), tensor(0.3912), tensor(0.0765), tensor(-0.4351), tensor(-0.0595), tensor(-0.3875), tensor(-0.5926), tensor(-0.9317), tensor(-0.6385), tensor(-1.1500), tensor(-0.4715), tensor(-0.5762), tensor(-0.4300), tensor(-0.3739), tensor(-0.5669), tensor(-0.8367), tensor(-0.2428), tensor(-1.0908), tensor(-0.4476), tensor(-1.0281), tensor(-0.1674), tensor(-0.8784), tensor(-1.0853), tensor(0.0286), tensor(0.1413), tensor(-0.3216), tensor(-0.1565), tensor(-0.4457), tensor(-1.

188it [2:30:10, 46.77s/it]


Start Epoch 188
Rewards: [tensor(-0.6585), tensor(0.3365), tensor(-0.0859), tensor(-0.1456), tensor(-0.6199), tensor(-0.9556), tensor(0.1001), tensor(-0.5762), tensor(-0.8369), tensor(-0.5797), tensor(-1.0057), tensor(-0.5646), tensor(-1.1838), tensor(-0.6081), tensor(-0.2941), tensor(-0.2015), tensor(-1.2200), tensor(-0.2943), tensor(-0.3354), tensor(-0.2382), tensor(-0.2475), tensor(-0.9235), tensor(-0.9939), tensor(-0.5771), tensor(-0.7750), tensor(-0.8499), tensor(-1.1339), tensor(-0.2053), tensor(-0.5490), tensor(-1.1944), tensor(-0.0905), tensor(-0.2787), tensor(-1.1500), tensor(-0.9103), tensor(-0.5066), tensor(-0.5611), tensor(-0.0910), tensor(-0.0212), tensor(-0.5096), tensor(0.0828), tensor(-0.1787), tensor(-0.5571), tensor(-0.1347), tensor(-0.8992), tensor(-0.5154), tensor(-0.7868), tensor(-0.8293), tensor(-0.0936), tensor(-0.4684), tensor(-0.8289), tensor(-0.4050), tensor(-0.8890), tensor(0.1070), tensor(-0.2802), tensor(-0.6364), tensor(-0.0639), tensor(-0.9966), tensor(-

189it [2:30:57, 46.71s/it]


Start Epoch 189
Rewards: [tensor(-1.2347), tensor(-0.2788), tensor(-0.6488), tensor(-0.5867), tensor(-0.9937), tensor(-0.0949), tensor(-0.2419), tensor(-1.0097), tensor(-0.5020), tensor(-0.1821), tensor(-0.7684), tensor(-0.2343), tensor(-0.9952), tensor(-1.0390), tensor(-0.8366), tensor(-0.1293), tensor(-1.0221), tensor(0.1887), tensor(-1.2129), tensor(-0.8921), tensor(-0.9162), tensor(-1.1944), tensor(-1.2457), tensor(-1.0148), tensor(0.1254), tensor(-1.0758), tensor(-0.0847), tensor(-1.0581), tensor(-0.3913), tensor(-1.1195), tensor(-0.8913), tensor(-0.8436), tensor(-0.4092), tensor(-0.6262), tensor(-0.7543), tensor(-0.1646), tensor(-0.9257), tensor(0.3402), tensor(-0.3068), tensor(0.2915), tensor(-0.5244), tensor(-0.6385), tensor(-0.7342), tensor(-0.5973), tensor(-0.1347), tensor(-0.8746), tensor(-0.9599), tensor(-0.4590), tensor(-0.4650), tensor(-1.2795), tensor(0.1533), tensor(0.6373), tensor(-0.3987), tensor(-0.2272), tensor(-0.4541), tensor(-0.9294), tensor(-1.0359), tensor(-0.

190it [2:31:41, 45.98s/it]


Start Epoch 190
Rewards: [tensor(-0.3354), tensor(-0.5695), tensor(-0.6385), tensor(-0.5382), tensor(-0.8238), tensor(-0.8174), tensor(0.2603), tensor(-0.0854), tensor(-0.3343), tensor(-0.3818), tensor(-0.7316), tensor(-0.9759), tensor(-0.7496), tensor(-1.2152), tensor(0.0581), tensor(-0.8579), tensor(-0.9367), tensor(-0.0681), tensor(0.3971), tensor(-0.0847), tensor(-0.2301), tensor(-1.2247), tensor(-0.6385), tensor(0.2203), tensor(0.0581), tensor(-0.1717), tensor(-0.5074), tensor(-0.7956), tensor(-0.4762), tensor(-0.2180), tensor(-0.6877), tensor(-0.8097), tensor(-0.1108), tensor(-0.3995), tensor(-0.5425), tensor(-0.9269), tensor(-0.2368), tensor(-0.5615), tensor(-0.3258), tensor(0.1001), tensor(-1.0030), tensor(-0.3408), tensor(-0.9854), tensor(-0.3477), tensor(-1.1106), tensor(-0.9506), tensor(-0.2997), tensor(-0.0238), tensor(-0.8386), tensor(-1.2040), tensor(-0.7418), tensor(-0.3766), tensor(-0.6878), tensor(-0.9890), tensor(-0.3135), tensor(-0.7809), tensor(-0.8351), tensor(-1.

191it [2:32:31, 47.24s/it]


Start Epoch 191
Rewards: [tensor(-0.0526), tensor(-1.1484), tensor(-0.5953), tensor(-0.2180), tensor(-1.1500), tensor(-0.5578), tensor(-0.1985), tensor(-0.1057), tensor(-0.2218), tensor(-0.3615), tensor(-1.2127), tensor(-0.6853), tensor(-0.6540), tensor(-0.3765), tensor(0.0828), tensor(-1.1090), tensor(-0.2174), tensor(-0.2428), tensor(-0.1283), tensor(-0.2462), tensor(0.1491), tensor(-0.5142), tensor(-0.3007), tensor(-1.2028), tensor(-0.2712), tensor(-0.4730), tensor(-0.7152), tensor(-1.3203), tensor(0.0435), tensor(-0.2862), tensor(0.1052), tensor(-0.1921), tensor(0.0131), tensor(-0.8540), tensor(-0.3549), tensor(-1.0395), tensor(-0.4921), tensor(-0.4450), tensor(-0.9360), tensor(-0.9093), tensor(-0.1316), tensor(-0.0336), tensor(-0.1516), tensor(-0.4436), tensor(-0.1960), tensor(-0.0492), tensor(-0.0574), tensor(-1.0209), tensor(-0.2475), tensor(-0.5082), tensor(-0.3461), tensor(-0.2743), tensor(0.0843), tensor(-0.3321), tensor(-1.0340), tensor(-1.1502), tensor(-0.6385), tensor(-0.

192it [2:33:17, 46.74s/it]


Start Epoch 192
Rewards: [tensor(-0.4433), tensor(-0.9412), tensor(-0.9866), tensor(-0.9092), tensor(-0.2859), tensor(-0.7102), tensor(-1.0240), tensor(-0.6838), tensor(-0.5549), tensor(0.0879), tensor(-0.2205), tensor(0.0897), tensor(-0.1013), tensor(-0.5126), tensor(-0.4332), tensor(-0.6351), tensor(-0.2221), tensor(-0.0632), tensor(-0.4907), tensor(-1.2028), tensor(0.1222), tensor(-0.1220), tensor(-0.1633), tensor(-0.9155), tensor(-0.2777), tensor(-0.7143), tensor(-1.3268), tensor(-1.1399), tensor(-0.9759), tensor(-0.3321), tensor(-0.8549), tensor(-0.6058), tensor(-1.1166), tensor(-0.5480), tensor(-0.1996), tensor(-0.2142), tensor(0.1405), tensor(-0.5669), tensor(-0.4750), tensor(0.0506), tensor(-0.5482), tensor(-0.5214), tensor(-0.3833), tensor(-0.4185), tensor(-0.0760), tensor(-1.2846), tensor(-0.1996), tensor(-0.3345), tensor(-0.1057), tensor(-0.5490), tensor(-0.5664), tensor(-0.2462), tensor(-1.0045), tensor(-0.3407), tensor(-1.2601), tensor(-1.0783), tensor(-0.1456), tensor(-1

193it [2:34:02, 46.35s/it]


Start Epoch 193
Rewards: [tensor(-0.8022), tensor(-0.9787), tensor(-0.6752), tensor(-0.4891), tensor(-0.6538), tensor(-0.6466), tensor(-0.5474), tensor(-0.5428), tensor(-0.1468), tensor(-0.2251), tensor(-0.9091), tensor(0.0074), tensor(-1.0128), tensor(-0.8543), tensor(-0.6664), tensor(-0.4762), tensor(-0.4103), tensor(-1.0651), tensor(-0.4793), tensor(0.2985), tensor(-1.2980), tensor(-0.2368), tensor(0.0425), tensor(-0.9007), tensor(0.0828), tensor(-0.5226), tensor(0.1909), tensor(-1.2644), tensor(-0.3771), tensor(-0.5850), tensor(-0.5241), tensor(-0.2390), tensor(-0.7098), tensor(-1.2912), tensor(-0.1624), tensor(-0.7028), tensor(-0.5192), tensor(-0.4324), tensor(-0.4501), tensor(-0.2236), tensor(-0.3014), tensor(-0.8431), tensor(-0.9890), tensor(-0.3209), tensor(-0.2285), tensor(-1.1749), tensor(-0.1283), tensor(-0.3652), tensor(0.0549), tensor(-0.6733), tensor(-0.9651), tensor(-1.0847), tensor(-0.9952), tensor(-0.5259), tensor(-0.7723), tensor(0.1405), tensor(-0.9771), tensor(-0.1

194it [2:34:49, 46.42s/it]


Start Epoch 194
Rewards: [tensor(0.0623), tensor(-1.1262), tensor(-0.6160), tensor(-0.7769), tensor(-1.1229), tensor(-0.9729), tensor(-0.2174), tensor(-0.5694), tensor(-0.4864), tensor(-0.2455), tensor(-0.1637), tensor(0.0793), tensor(-0.1302), tensor(-0.7005), tensor(-0.5445), tensor(-0.4100), tensor(-0.6716), tensor(-0.8113), tensor(-0.3660), tensor(-1.0460), tensor(-0.3665), tensor(-0.5774), tensor(-0.6987), tensor(-1.0256), tensor(-0.9020), tensor(-0.8869), tensor(-0.0251), tensor(0.1730), tensor(-0.4501), tensor(-0.3240), tensor(-0.6728), tensor(-0.7018), tensor(-0.6449), tensor(-0.2109), tensor(0.1077), tensor(-0.6863), tensor(-0.1663), tensor(-0.6635), tensor(-0.7347), tensor(0.2108), tensor(-0.8831), tensor(-0.5368), tensor(-0.5553), tensor(-0.9968), tensor(-1.0729), tensor(-0.8253), tensor(0.0012), tensor(-1.0519), tensor(0.0332), tensor(-0.0853), tensor(-0.0905), tensor(-0.5514), tensor(0.1405), tensor(-1.1427), tensor(-0.6050), tensor(-0.6201), tensor(-0.0218), tensor(-0.78

195it [2:35:35, 46.18s/it]


Start Epoch 195
Rewards: [tensor(-0.1269), tensor(-0.0121), tensor(-0.0526), tensor(-0.6910), tensor(-0.2308), tensor(-0.5611), tensor(-0.5726), tensor(-0.8334), tensor(-0.1059), tensor(-0.8386), tensor(-0.8307), tensor(-0.1133), tensor(-0.8831), tensor(-0.2357), tensor(-1.1749), tensor(-1.2142), tensor(-0.0428), tensor(0.3988), tensor(0.2955), tensor(-0.6385), tensor(-0.6779), tensor(-0.4140), tensor(-1.2181), tensor(-0.5049), tensor(-0.4921), tensor(-0.1657), tensor(-0.6346), tensor(-0.9787), tensor(-0.5578), tensor(-1.2466), tensor(-0.7488), tensor(-0.4016), tensor(-0.5823), tensor(-0.2614), tensor(-0.4304), tensor(-0.5480), tensor(-0.6987), tensor(-0.2962), tensor(-0.8917), tensor(-0.1214), tensor(-0.0474), tensor(-0.2114), tensor(-0.9052), tensor(0.0132), tensor(-0.0950), tensor(-0.0633), tensor(-0.5551), tensor(-0.2884), tensor(-0.3264), tensor(-0.9150), tensor(-1.1306), tensor(-0.5309), tensor(-0.3875), tensor(-0.4782), tensor(0.0158), tensor(-0.8307), tensor(-0.1913), tensor(-

196it [2:36:20, 47.86s/it]


In [22]:
device = ppo_trainer.accelerator.device
if ppo_trainer.accelerator.num_processes == 1:
    device = 0 if torch.cuda.is_available() else "cpu"  # to avoid a `pipeline` bug

In [28]:
#### get a batch from the dataset
bs = 16
game_data = dict()
ppo_train_dataset.set_format("pandas")
df_batch = ppo_train_dataset[:].sample(bs)
game_data["query"] = df_batch["query"].tolist()
query_tensors = df_batch["input_ids"].tolist()

response_tensors_ref, response_tensors = [], []

#### get response from gpt2 and gpt2_ref
for i in range(bs):
    # gen_len = output_length_sampler()
    generation_kwargs["max_new_tokens"] = 32
    output = ref_model.generate(
        torch.tensor(query_tensors[i]).unsqueeze(dim=0).to(device), **generation_kwargs
    ).squeeze()[-32:]
    response_tensors_ref.append(output)
    output = model.generate(
        torch.tensor(query_tensors[i]).unsqueeze(dim=0).to(device), **generation_kwargs
    ).squeeze()[-32:]
    response_tensors.append(output)

#### decode responses
game_data["response (before)"] = [tokenizer.decode(response_tensors_ref[i]) for i in range(bs)]
game_data["response (after)"] = [tokenizer.decode(response_tensors[i]) for i in range(bs)]

#### sentiment analysis of query/response pairs before/after
texts = [q + r for q, r in zip(game_data["query"], game_data["response (before)"])]
game_data["rewards (before)"] = [output[1]["score"] for output in sentiment_pipe(texts, **sent_kwargs)]

texts = [q + r for q, r in zip(game_data["query"], game_data["response (after)"])]
game_data["rewards (after)"] = [output[1]["score"] for output in sentiment_pipe(texts, **sent_kwargs)]

# store results in a dataframe
df_results = pd.DataFrame(game_data)
df_results

Unnamed: 0,query,response (before),response (after),rewards (before),rewards (after)
0,"#<unk>##<unk>#<unk>,<unk>,<unk>,<unk>,<unk>,<u...",<pad> <unk></s>,"<pad> <unk>,<unk>,<unk>,<unk>,<unk>,<unk>,<unk...",-0.820149,-0.820149
1,"<unk>?#<unk>#<unk>,<unk>,<unk>,<unk>“<unk>”!#<...",<pad> <unk></s>,"<pad> <unk>,<unk>,<unk></s>",-0.113685,-0.113685
2,"<unk>,<unk>..L<unk>#<unk>##<unk>#</s>",<pad> <unk></s>,"<pad> <unk>,<unk>?</s>",-0.961194,-0.770309
3,"#<unk>WTT<unk>#<unk>!<unk>,WTT<unk>, #<unk>#,<...",<pad> <unk>!!!<unk></s>,<pad> <unk> <unk> <unk> <unk> <unk> <unk></s>,-1.202836,-1.202836
4,"<unk>,<unk>#<unk>#L<unk>bot</s>",<pad> <unk></s>,"<pad> <unk>,<unk></s>",-1.191703,-1.107892
5,#<unk>900<unk>##<unk>900<unk>#:<unk>6<unk>25<u...,"<pad> <unk>,<unk>,<unk>,<unk></s>","<pad> <unk>,900<unk> <unk></s>",-0.665839,-0.665839
6,"#<unk>#4<unk>8<unk>,#<unk>#<unk>,<unk>7<unk>,<...",<pad> <unk> <unk> <unk> <unk> <unk> <unk> <unk...,"<pad> <unk>,<unk>,<unk>,<unk>,<unk>,<unk></s>",-1.178802,-1.178802
7,#<unk>#<unk>:<unk>!#<unk>#<unk>......<unk>“<un...,"<pad> <unk>,<unk></s>","<pad> <unk>,<unk></s>",-0.162369,-0.162369
8,"#<unk>#<unk>2<unk>26<unk>,<unk>,<unk>,<unk>27<...","<pad> <unk>,<unk></s>",<pad> <unk></s>,-0.915403,-0.915403
9,"TTG<unk>#TTG<unk>eStar#<unk>,<unk>,<unk>,<unk>...",<pad> <unk></s>,"<pad> <unk>,<unk>,<unk>,<unk></s>",-0.624466,-0.624466


In [29]:
print("mean:")
display(df_results[["rewards (before)", "rewards (after)"]].mean())
print()
print("median:")
display(df_results[["rewards (before)", "rewards (after)"]].median())

mean:


rewards (before)   -0.668355
rewards (after)    -0.651187
dtype: float64


median:


rewards (before)   -0.660078
rewards (after)    -0.660078
dtype: float64

In [30]:
model.save_pretrained("ppo-tuned-t5-small")
tokenizer.save_pretrained("ppo-tuned-t5-small")

('ppo-tuned-t5-small/tokenizer_config.json',
 'ppo-tuned-t5-small/special_tokens_map.json',
 'ppo-tuned-t5-small/spiece.model',
 'ppo-tuned-t5-small/added_tokens.json',
 'ppo-tuned-t5-small/tokenizer.json')