In [1]:
import gymnasium as gym
import math
import random
import matplotlib
import matplotlib.pyplot as plt
from collections import namedtuple, deque
from itertools import count, chain
import tqdm
import copy

import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.distributions.categorical import Categorical
from torch.autograd import Variable


from diffusers import DDIMScheduler, DPMSolverMultistepScheduler, EulerAncestralDiscreteScheduler

from nosaveddata.nsd_utils.save_hypers import Hypers, nsd_Module
from nosaveddata.nsd_utils.nsd_csv import add_to_csv
from nosaveddata.nsd_utils.networks import params_count, params_and_grad_norm, seed_np_torch
from nosaveddata.nsd_utils.einstein import Rearrange

from nosaveddata.builders.mlp import *
from nosaveddata.builders.weight_init import *
from nosaveddata.builders.resnet import IMPALA_Resnet, DQN_Conv, IMPALA_YY

from nosaveddata import *


from utils.experience_replay import *


import locale
locale.getpreferredencoding = lambda: "UTF-8"

import wandb


# Environment configuration
#env_name = 'Kangaroo'
#SEED = 8712

env_name = 'Assault'
SEED = 7783


wandb.init(
    project="Atari-100k-BBF",
    name=f"BBF-Diffusion-{env_name}",

    #id='rotdmtc5',
    #resume='must',

    config={
        "learning_rate": 1e-4,
        "architecture": "BBF",
        "dataset": "Assault",
        "epochs": 100,
    },

    reinit=False
)



# Optimization
batch_size = 32
lr=1e-4
lr_diffusion=1e-4
lr_energy=1e-4
M=16
energy_beta=3
HIDDENS=2048

eps=1e-8


# Target network EMA rate
critic_ema_decay=0.995


# Return function
initial_gamma=torch.tensor(1-0.97).log()
final_gamma=torch.tensor(1-0.997).log()

initial_n = 10
final_n = 3

num_buckets=51


# Reset Schedule and Buffer
reset_every=40000 # grad steps, not steps.
schedule_max_step=reset_every//4
total_steps=102000

prefetch_cap=1 # actually, no prefetch is being done


Transition = namedtuple('Transition',
                        ('state', 'reward', 'action', 'c_flag'))
memory = PrioritizedReplay_nSteps_Sqrt(total_steps+5, total_steps=schedule_max_step, prefetch_cap=prefetch_cap)



def save_checkpoint(net, model_target, optimizer, step, path):
    torch.save({
            'model_state_dict': net.state_dict(),
            'model_target_state_dict': model_target.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'step': step,
            }, path)


  from .autonotebook import tqdm as notebook_tqdm
  torch.utils._pytree._register_pytree_node(
[34m[1mwandb[0m: Currently logged in as: [33msnykralafk[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [2]:
# Adapted from: https://github.com/weipu-zhang/STORM/blob/main/env_wrapper.py
class MaxLast2FrameSkipWrapper(Hypers, gym.Wrapper):
    def __init__(self, env, skip=4, noops=30, seed=0):
        super().__init__(env=env)
        self.env.action_space.seed(seed)
        
    def reset(self, **kwargs):
        kwargs["seed"] = self.seed
        obs, _ = self.env.reset(**kwargs)

        return obs, _
        
    def noop_steps(self, states):
        noops = random.randint(0,self.noops)
        
        for i in range(noops):
            state = self.step(np.array([0]))[0]
            state = preprocess(state)
            states.append(state)
        return states

    def step(self, action):
        total_reward = 0
        self.obs_buffer = deque(maxlen=2)
        for _ in range(self.skip):
            obs, reward, done, truncated, info = self.env.step(action)
            self.obs_buffer.append(obs)
            total_reward += reward

            terminated = np.logical_or(done, truncated)
            #if terminated.any():
            #    for i in range(len(terminated)):
            #       obs[i] = self.reset()[0][i]
            if done or truncated:
                break
        if len(self.obs_buffer) == 1:
            obs = self.obs_buffer[0]
        else:
            obs = np.max(np.stack(self.obs_buffer), axis=0)
        return obs, total_reward, done, truncated, info
        # Life loss is calculated on the training code

env = gym.vector.make(f"{env_name}NoFrameskip-v4", num_envs=1)
env = MaxLast2FrameSkipWrapper(env,seed=SEED)


#n_actions = env.action_space.n
n_actions = env.action_space[0].n

state, info = env.reset()
n_observations = len(state)

seed_np_torch(SEED)

  gym.logger.warn(


In [3]:

def renormalize(tensor, has_batch=False):
    shape = tensor.shape
    tensor = tensor.view(tensor.shape[0], -1)
    max_value,_ = torch.max(tensor, -1, keepdim=True)
    min_value,_ = torch.min(tensor, -1, keepdim=True)
    return ((tensor - min_value) / (max_value - min_value + 1e-5)).view(shape)


class DQN(nn.Module):
    def __init__(self, n_actions, hiddens=HIDDENS, mlp_layers=1, scale_width=4,
                 n_atoms=51, Vmin=-10, Vmax=10):
        super().__init__()
        self.support = torch.linspace(Vmin, Vmax, n_atoms).cuda()
        
        self.hiddens=hiddens
        self.scale_width=scale_width
        self.act = nn.ReLU()
        
        
        self.encoder_cnn = IMPALA_Resnet(scale_width=scale_width, norm=False, init=init_xavier, act=self.act)
        
        

        # Single layer dense that maps the flattened encoded representation into hiddens.
        self.projection = MLP(13824, med_hiddens=hiddens, out_hiddens=hiddens,
                              last_init=init_xavier, layers=1)
        self.prediction = MLP(hiddens, out_hiddens=hiddens, layers=1, last_init=init_xavier)
                                              
        self.transition = nn.Sequential(DQN_Conv(32*scale_width+n_actions, 32*scale_width, 3, 1, 1, norm=False, init=init_xavier, act=self.act),
                                        DQN_Conv(32*scale_width, 32*scale_width, 3, 1, 1, norm=False, init=init_xavier, act=self.act))

        # Single layer dense that maps hiddens into the output dim according to:
        # 1. https://arxiv.org/pdf/1707.06887.pdf -- Distributional Reinforcement Learning
        # 2. https://arxiv.org/pdf/1511.06581.pdf -- Dueling DQN
        self.a = MLP(hiddens, out_hiddens=n_actions*num_buckets, layers=1, in_act=self.act, last_init=init_xavier)
        self.v = MLP(hiddens, out_hiddens=num_buckets, layers=1, in_act=self.act, last_init=init_xavier)
    
        params_count(self, 'DQN')
    
    def forward(self, X, y_action):
        X, z = self.encode(X)
        
        
        q, action = self.q_head(X)
        z_pred = self.get_transition(z, y_action)

        return q, action, X[:,1:].clone().detach(), z_pred
    

    def env_step(self, X):
        with torch.no_grad():
            X, _ = self.encode(X)
            _, action = self.q_head(X)
            
            return action.detach()
    

    def encode(self, X):
        batch, seq = X.shape[:2]
        self.batch = batch
        self.seq = seq
        X = self.encoder_cnn(X.contiguous().view(self.batch*self.seq, *(X.shape[2:])))
        X = renormalize(X).contiguous().view(self.batch, self.seq, *X.shape[-3:])
        X = X.contiguous().view(self.batch, self.seq, *X.shape[-3:])
        z = X.clone()
        X = X.flatten(-3,-1)
        X = self.projection(X)
        return X, z

    def get_transition(self, z, action):
        z = z.contiguous().view(-1, *z.shape[-3:])
        
        action = F.one_hot(action.clone(), n_actions).view(-1, n_actions)
        action = action.view(-1, 5, n_actions, 1, 1).expand(-1, 5, n_actions, *z.shape[-2:])

        z_pred = torch.cat( (z, action[:,0]), 1)
        z_pred = self.transition(z_pred)
        z_pred = renormalize(z_pred)
        
        z_preds=[z_pred.clone()]
        

        for k in range(4):
            z_pred = torch.cat( (z_pred, action[:,k+1]), 1)
            z_pred = self.transition(z_pred)
            z_pred = renormalize(z_pred)
            
            z_preds.append(z_pred)
        
        
        z_pred = torch.stack(z_preds,1)

        z_pred = self.projection(z_pred.flatten(-3,-1)).view(self.batch,5,-1)
        z_pred = self.prediction(z_pred)
        
        return z_pred

    
    def q_head(self, X):
        q = self.dueling_dqn(X)
        action = (q*self.support).sum(-1).argmax(-1)
        
        return q, action

    def get_max_action(self, X):
        with torch.no_grad():
            X, _ = self.encode(X)
            q = self.dueling_dqn(X)
            
            action = (q*self.support).sum(-1).argmax(-1)
            return action

    def evaluate(self, X, action):
        with torch.no_grad():
            X, _ = self.encode(X)
            
            q = self.dueling_dqn(X)
            
            action = action[:,:,None,None].expand_as(q)[:,:,0][:,:,None]
            q = q.gather(-2,action)
            
            return q

    def dueling_dqn(self, X):
        X = F.relu(X)
        
        a = self.a(X).view(self.batch, -1, n_actions, num_buckets)
        v = self.v(X).view(self.batch, -1, 1, num_buckets)
        
        q = v + a - a.mean(-2,keepdim=True)
        q = F.softmax(q,-1)
        
        return q
    
    def network_ema(self, rand_network, target_network, alpha=0.5):
        for param, param_target in zip(rand_network.parameters(), target_network.parameters()):
            param_target.data = alpha * param_target.data + (1 - alpha) * param.data.clone()

    def hard_reset(self, random_model, alpha=0.5):
        with torch.no_grad():
            
            self.network_ema(random_model.encoder_cnn, self.encoder_cnn, alpha)
            self.network_ema(random_model.transition, self.transition, alpha)

            self.network_ema(random_model.projection, self.projection, 0)
            self.network_ema(random_model.prediction, self.prediction, 0)

            self.network_ema(random_model.a, self.a, 0)
            self.network_ema(random_model.v, self.v, 0)



def copy_states(source, target):
    for key, _ in zip(source.state_dict()['state'].keys(), target.state_dict()['state'].keys()):

        target.state_dict()['state'][key]['exp_avg_sq'] = copy.deepcopy(source.state_dict()['state'][key]['exp_avg_sq'])
        target.state_dict()['state'][key]['exp_avg'] = copy.deepcopy(source.state_dict()['state'][key]['exp_avg'])
        target.state_dict()['state'][key]['step'] = copy.deepcopy(source.state_dict()['state'][key]['step'])
        
def target_model_ema(model, model_target):
    with torch.no_grad():
        for param, param_target in zip(model.parameters(), model_target.parameters()):
            param_target.data = critic_ema_decay * param_target.data + (1.0 - critic_ema_decay) * param.data.clone()


class EnergyModel(nsd_Module):
    def __init__(self, hiddens, n_actions, condition_dim, t_dim=32, Vmin=-10, Vmax=10, n_atoms=51, act=nn.ReLU()):
        super().__init__()
        self.support = torch.linspace(Vmin, Vmax, n_atoms).cuda()
        self.t_emb = MLP(t_dim, out_hiddens=t_dim, last_init=init_xavier, out_act=nn.Identity())
        
        self.a = MLP(hiddens+n_actions+t_dim+condition_dim, out_hiddens=n_actions*num_buckets, layers=1, in_act=self.act, last_init=init_xavier)
        self.v = MLP(hiddens+n_actions+t_dim+condition_dim, out_hiddens=num_buckets, layers=1, in_act=self.act, last_init=init_xavier)
        
        params_count(self, 'Energy Model')
        
    def forward(self, energy, t, condition):
        self.batch = 1 if energy.shape[0]==1 else batch_size
        t_emb = sinusoidal_embedding(t, self.t_dim)[:,None].repeat_interleave(energy.shape[-2], -2)
        t_emb = self.t_emb(t_emb)
        condition = condition[:,None].repeat_interleave(energy.shape[-2], -2)
        
        X = torch.cat((energy, t_emb, condition), -1)
        X = F.relu(X)
        
        a = self.a(X).view(self.batch, -1, n_actions, num_buckets)
        v = self.v(X).view(self.batch, -1, 1, num_buckets)
        
        
        q = v + a - a.mean(-2,keepdim=True)
        q = F.softmax(q,-1)
        
        action = (q*self.support).sum(-1).argmax(-1)
        
        return q, action

    def hard_reset(self, random_model, alpha=0):
        with torch.no_grad():
            
            network_ema(self.a, random_model.a, alpha)
            network_ema(self.v, random_model.v, alpha)
            network_ema(self.t_emb, random_model.t_emb, alpha)


class Diffusion(nsd_Module):
    def __init__(self, d_z, n_actions, d_model, num_blks=2, seq_len=5,
                 zero_cond_prob=0.25):
        super().__init__()
        
        self.unet = UNet_DiT_1D(d_z+n_actions, d_model, num_blks, d_model//64, seq_len=seq_len)

        self.s0_proj = MLP(d_z, out_hiddens=d_model, last_init=init_xavier, out_act=nn.SiLU())
        
    def forward(self, X, t, s0):
        
        zero_prob = torch.bernoulli(torch.ones(s0.shape[0])*(1-self.zero_cond_prob)).cuda()
        s0_emb = self.s0_proj(s0*zero_prob[:,None])
        
        return self.unet(X, t, s0_emb)

    def denoise(self, X, t, s0):
        s0_emb = self.s0_proj(s0)
        
        return self.unet(X, t, s0_emb)

    def hard_reset(self, random_model, alpha=0.5):
        with torch.no_grad():
            
            network_ema(self.unet, random_model.unet, alpha)
            network_ema(self.s0_proj, random_model.s0_proj, alpha)




model=DQN(n_actions).cuda()
model_target=DQN(n_actions).cuda()
diffusion_model = Diffusion(HIDDENS, n_actions, 512, num_blks=6).cuda()
energy_model = EnergyModel(HIDDENS, n_actions, HIDDENS).cuda()

model_target.load_state_dict(model.state_dict())



# Testing only
#with torch.no_grad():
#    q, action, X, z_pred = model(torch.randn(4,1,12,96,72, device='cuda', dtype=torch.float), torch.randint(0,n_actions,(4,5),device='cuda').long())
#z = model.encode(torch.randn(4,5,12,96,72, device='cuda'))[0]

# I believe the authors have actually miscalculated the params count on the paper.
# My training time is lower than theirs while having more parameters, and the same architecture is used as is their original code

IMPALA ResNet Parameters: 1.56M
DQN Parameters: 35.21M
IMPALA ResNet Parameters: 1.56M
DQN Parameters: 35.21M
UNet DIT Parameters: 233.96M
Energy Model Parameters: 1.69M


<All keys matched successfully>

In [4]:
perception_modules=[model.encoder_cnn, model.transition]
actor_modules=[model.prediction, model.projection, model.a, model.v]

params_wm=[]
for module in perception_modules:
    for param in module.parameters():
        if param.requires_grad==True: # They all require grad
            params_wm.append(param)

params_ac=[]
for module in actor_modules:
    for param in module.parameters():
        if param.requires_grad==True:
            params_ac.append(param)


optimizer = torch.optim.AdamW(chain(params_wm, params_ac),
                                lr=lr, weight_decay=0.1, eps=1.5e-4)
optimizer_diffusion = torch.optim.AdamW(diffusion_model.parameters(),
                                lr=lr_diffusion, weight_decay=0.)
optimizer_energy = torch.optim.AdamW(energy_model.parameters(),
                                lr=lr_energy, weight_decay=0.)

In [5]:
import torchvision.transforms as transforms

train_tfms = transforms.Compose([
                         transforms.Resize((96,72)),
                        ])


def preprocess(state):
    state=torch.tensor(state, dtype=torch.float, device='cuda') / 255
    state=train_tfms(state.permute(0,3,1,2))
    return state

# https://github.com/google/dopamine/blob/master/dopamine/jax/agents/dqn/dqn_agent.py
def linearly_decaying_epsilon(decay_period, step, warmup_steps, epsilon):
    steps_left = decay_period + warmup_steps - step
    bonus = (1.0 - epsilon) * steps_left / decay_period
    bonus = np.clip(bonus, 0., 1. - epsilon)
    return epsilon + bonus


def epsilon_greedy(states, step, grad_step, actions_to_step, final_eps=0, num_envs=1):

    if grad_step>=5500:
        s0, _ = model_target.encode(torch.cat(list(states),-3).unsqueeze(0))
        s0 = s0[:,0]
        
        latent = denoise(s0, std, samples=1)
        latent = latent.view(-1, *latent.shape[-2:])
        
        #energy, action = model_target.q_head(latent[...,:HIDDENS])
        #action = action.cpu()
        action = latent[...,-n_actions:].argmax(-1).cpu().squeeze()
        
        return list(action.to(torch.int64).split(1))
        
        
    
    epsilon = linearly_decaying_epsilon(2001, step, 2000, final_eps)

    if random.random() < epsilon:
        action = torch.randint(0, n_actions, (num_envs,), dtype=torch.int64).squeeze(0)
        actions_to_step.append(action)
    else:
        Q_action = model_target.env_step(torch.cat(list(states),-3).unsqueeze(0))
        action = Q_action.view(num_envs).squeeze(0).to(torch.int64).cpu()
        actions_to_step.append(action)
        
    return actions_to_step


In [6]:
#noise_scheduler = EulerAncestralDiscreteScheduler(num_train_timesteps=1000, beta_start=0.00085, beta_end=0.012)
noise_scheduler = DPMSolverMultistepScheduler(num_train_timesteps=1000, beta_start=0.00085, beta_end=0.012, use_karras_sigmas=False, solver_order=2)

def calculate_guidance(latent, t, s0, guidance_scale, samples, t_stopgrad=2):
    with torch.enable_grad():
        latent.requires_grad_(True)
        
        Q_t, a_t = energy_model(latent, torch.tensor([t]*latent.shape[0], device='cuda'), s0)
        a_t = a_t[...,None,None].expand_as(Q_t)[:,:,0][:,:,None]
        #print(f"{Q_t.shape, a_t.shape, latent.shape}")
        #print(f"{latent[...,-n_actions:]}")
        latent_action = latent[...,-n_actions:].argmax(-1).view(Q_t.shape[0],-1)[...,None,None].expand_as(Q_t)[:,:,0][:,:,None]
        Q_t = Q_t.gather(-2, latent_action)
        print(f"{(Q_t*model_target.support).sum(-1).sum()}")
        
        grad = guidance_scale * torch.autograd.grad(torch.sum(Q_t*model_target.support), latent)[0]
        
        latent.detach()
        
        grad[t<t_stopgrad] = 0
        return grad

        #return latent
    

def denoise(s0, std, inference_timesteps=30, guidance_scale=0.1, samples=1):
#def denoise(s0, inference_timesteps=100, guidance_scale=1.4, samples=1):
    # Thank you, Stable Diffusion
    #diffusion_model.eval()
    s0 = s0.repeat_interleave(samples, 0)

    z = torch.randn(s0.shape[0], 5, HIDDENS+n_actions, device='cuda') * noise_scheduler.init_noise_sigma
    
    
    # Run Model
    noise_scheduler.set_timesteps(inference_timesteps)
    
    
    for i, t in enumerate(noise_scheduler.timesteps):
        #z[:,0] = latent # from diffuser
        z_input = z

        z_input = noise_scheduler.scale_model_input(z_input, t)
        model_output = diffusion_model.denoise(z_input, torch.tensor([t]*z.shape[0], device='cuda'), s0)

        if guidance_scale!=0:
            
            #std_t = torch.sqrt(1.-torch.exp(2*torch.log(noise_scheduler.alphas_cumprod[999-t]))).view(-1,1,1)+eps
            std_t = torch.sqrt(1.-torch.exp(2*torch.log(noise_scheduler.alphas_cumprod[t.long()]))).view(-1,1,1)+eps
            std_t = std_t.cuda()

            sched_scale = (1-noise_scheduler.alphas_cumprod[t]).sqrt()
            model_output = model_output + sched_scale*calculate_guidance(model_output, t, s0, guidance_scale, samples)
            
            #model_output = model_output + calculate_guidance(model_output, t, s0, guidance_scale, samples)
            
            #model_output = -(model_output + calculate_guidance(model_output, t, s0, guidance_scale)*std_t)
            #model_output = model_output + calculate_guidance(model_output, t, s0, guidance_scale)*std_t
            
        
        
        #variance = noise_scheduler._get_variance(timestep, prev_timestep)
        #std_dev_t = eta * variance ** (0.5)

        # 2. compute previous image: x_t -> x_t-1
        z = noise_scheduler.step(
                model_output, t, z, return_dict=False# eta=1, use_clipped_model_output=False,
            )[0]
    #z[:,0] = latent
    noise_scheduler.set_timesteps(1000)
    #diffusion_model.train()
    print()
    return (z.view(-1,samples,*z.shape[1:]) if samples>1 else z)*std
    #return (z.view(-1,samples,*z.shape[1:]) if samples>1 else z)




def train_diffusion(latent, s0, same_traj, std, is_w, logs):
    
    diffusion_model.train()

    with torch.autocast(device_type='cuda', dtype=torch.bfloat16, enabled=False):
    
        
        t = torch.randint(0,1000, (latent.shape[0],), device='cuda')
    
        noise = torch.randn_like(latent)
        t = noise_scheduler.timesteps.cuda()[t]
        perturbed_latent = noise_scheduler.add_noise(latent/std, noise, t)
        score = diffusion_model(perturbed_latent, t, s0)
        loss = mse(score, noise).sum(-2).view(batch_size,-1).mean(-1)
        #loss = (loss*is_w).mean()
        loss = loss.mean()
        

    loss.backward()
    #scaler_diffusion.scale(loss).backward()
    #scaler_diffusion.unscale_(optimizer_diffusion)
    torch.nn.utils.clip_grad_norm_(diffusion_model.parameters(), max_norm=10)
    #scaler_diffusion.step(optimizer_diffusion)
    #scaler_diffusion.update()
    optimizer_diffusion.step()
    optimizer_diffusion.zero_grad()
    
    logs['diffusion loss'] = loss.detach().cpu()
    return logs


def train_energy(latent, s0, same_traj, std, is_w, logs):
    
    #diffusion_model.eval()
    with torch.autocast(device_type='cuda', dtype=torch.bfloat16, enabled=False):
        with torch.no_grad():
            
            latent = denoise(s0, std=std, guidance_scale=0, samples=M)
            #latent = denoise(s0, guidance_scale=0, samples=M)
            #print(f"denoised {latent.shape}")
            latent = latent.view(-1, *latent.shape[-2:])
            #print(f"denoised view {latent.shape}")
            s0=s0.repeat_interleave(M,0)
            #print(f"s0 {s0.shape}")
            
            #energy, action = model_target.q_head(latent, alpha=3, seq=SEQ_LEN-1)
            energy, action = model_target.q_head(latent[...,:HIDDENS])
            latent_action = latent[...,-n_actions:].argmax(-1).view(batch_size,-1)[...,None,None].expand_as(energy)[:,:,0][:,:,None]
            #print(f"energy {energy.shape}")
            
            action = action[:,:,None,None].expand_as(energy)[:,:,0][:,:,None]
            #print(f"{action.shape, latent_action.shape}")
            energy = energy.gather(-2,latent_action)
            energy = (energy*model_target.support).sum(-1).view(batch_size,M,-1).sum(-1)
            
            #print(f"energy argmax {energy.shape}")
            
            energy = F.softmax(energy*energy_beta, -1)
            
            #print(f"energy softmax {energy.shape}")
    
        
        
        t = torch.randint(0,1000, (batch_size,), device='cuda')[:,None].repeat_interleave(M, 0)
        
        #print(f"latent pre {latent.shape}")
        latent = latent.view(batch_size*M, 5, -1)
        #print(f"latent view {latent.shape}")
        noise = torch.randn_like(latent)
        t = noise_scheduler.timesteps.cuda()[t]
        
        perturbed_latent = noise_scheduler.add_noise(latent/std, noise, t)
        
        energy_pred, action = energy_model(perturbed_latent, t.squeeze(), s0)#.squeeze()
        
        #print(f"energy pred: {energy_pred.shape, latent[...,-n_actions:].shape, action.shape}")
        latent_action = latent[...,-n_actions:].argmax(-1).view(batch_size,-1)[...,None,None].expand_as(energy_pred)[:,:,0][:,:,None]
        #print(f"{energy_pred.shape, latent_action.shape}")
        energy_pred = energy_pred.gather(-2,latent_action)#.view(batch_size, M, -1).sum(-1)
        energy_pred = (energy_pred*model_target.support).sum(-1).view(batch_size, M, -1).sum(-1)
        
        energy_pred = F.softmax(energy_pred, -1)
        
        loss = -(energy*torch.log(energy_pred+eps)).sum(-1) # (bz, M)
        loss = loss.mean()
        #loss = (loss*is_w).mean()
    
    loss.backward()
    #scaler_energy.scale(loss).backward()
    #scaler_energy.unscale_(optimizer_energy)
    #torch.nn.utils.clip_grad_norm_(energy_model.parameters(), max_norm=10)
    #scaler_energy.step(optimizer_energy)
    #scaler_energy.update()
    optimizer_energy.step()
    optimizer_energy.zero_grad()

    
    logs['energy'] = loss.detach().cpu()
    return logs   

In [7]:
# https://github.com/google/dopamine/blob/master/dopamine/jax/agents/rainbow/rainbow_agent.py
def project_distribution(supports, weights, target_support):
    with torch.no_grad():
        v_min, v_max = target_support[0], target_support[-1]
        # `N` in Eq7.
        num_dims = target_support.shape[-1]
        # delta_z = `\Delta z` in Eq7.
        delta_z = (v_max - v_min) / (num_buckets-1)
        # clipped_support = `[\hat{T}_{z_j}]^{V_max}_{V_min}` in Eq7.
        clipped_support = supports.clip(v_min, v_max)
        # numerator = `|clipped_support - z_i|` in Eq7.
        numerator = (clipped_support[:,None] - target_support[None,:,None].repeat_interleave(clipped_support.shape[0],0)).abs()
        quotient = 1 - (numerator / delta_z)
        # clipped_quotient = `[1 - numerator / (\Delta z)]_0^1` in Eq7.
        clipped_quotient = quotient.clip(0, 1)
        # inner_prod = `\sum_{j=0}^{N-1} clipped_quotient * p_j(x', \pi(x'))` in Eq7.
        inner_prod = (clipped_quotient * weights[:,None]).sum(-1)
        #inner_prod = (clipped_quotient).sum(-1) * weights
        return inner_prod.squeeze()


mse = torch.nn.MSELoss(reduction='none')
std = 1

scaler = torch.cuda.amp.GradScaler()
def optimize(step, grad_step, n):
    global std
    
    model.train()
    model_target.train()

    with torch.autocast(device_type='cuda', dtype=torch.bfloat16, enabled=False):
        with torch.no_grad():
            states, next_states, rewards, action, c_flag, idxs, is_w = memory.sample(n, batch_size, grad_step)
            std = 0.99*std + 0.01*states.std(-1).mean()
            diffusion_actions = action
        terminal=1-c_flag
        #print(f"STUFF HERE {states.shape, rewards.shape, c_flag.shape, action.shape, n}")
    
    
        q, max_action, _, z_pred = model(states[:,0][:,None], action[:,:5].long())
        z = model_target.encode(states[:,:6])[0].detach()
        
        
        max_action  = model.get_max_action(next_states[:,n-1][:,None])
        next_values = model_target.evaluate(next_states[:,n-1][:,None].contiguous(), max_action)
        

        action = action[:,0,None].expand(batch_size,num_buckets)
        action=action[:,None]
        with torch.no_grad():
            gammas_one=torch.ones(batch_size,n,1,dtype=torch.float,device='cuda')
            gamma_step = 1-torch.tensor(( (schedule_max_step - min(grad_step, schedule_max_step)) / schedule_max_step) * (initial_gamma-final_gamma) + final_gamma).exp()
            gammas=gammas_one*gamma_step

            
            returns = []
            for t in range(n):
                ret = 0
                for u in reversed(range(t, n)):
                    ret += torch.prod(c_flag[:,t+1:u+1],-2)*torch.prod(gammas[:,t:u],-2)*rewards[:,u+1]
                returns.append(ret)
            returns = torch.stack(returns,1)
        
        plot_vs = returns.clone().sum(-1)
        
        same_traj = (torch.prod(c_flag[:,:n],-2)).squeeze()
        
        returns = returns[:,0]
        returns = returns + torch.prod(gammas[0,:10],-2).squeeze()*same_traj[:,None]*model.support[None,:]
        returns = returns.squeeze()
        
        next_values = next_values[:,0]

        log_probs = torch.log(q[:,0].gather(-2, action)[:,None] + eps).contiguous()
        
        
        dist = project_distribution(returns, next_values.squeeze(), model.support)
        
        loss = -(dist*(log_probs.squeeze())).sum(-1).view(batch_size,-1).sum(-1)
        dqn_loss = loss.clone().mean()
        td_error = (loss + torch.nan_to_num((dist*torch.log(dist))).sum(-1)).mean()

        
        batched_loss = loss.clone()
        
        
        z_tgt = F.normalize(z[:,1:], 2, dim=-1, eps=1e-5)
        z_pred = F.normalize(z_pred, 2, dim=-1, eps=1e-5)

        
        recon_loss = (mse(z_pred.contiguous().view(-1,HIDDENS), z_tgt.contiguous().view(-1,HIDDENS))).sum(-1)
        recon_loss = 5*(recon_loss.view(batch_size, -1).mean(-1))*same_traj
        
        
        loss += recon_loss
        
        loss = (loss*is_w).mean() # mean across batch axis

    loss.backward()

    param_norm, grad_norm = params_and_grad_norm(model)
    #scaler.scale(loss).backward()
    #scaler.unscale_(optimizer)
    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)
    #scaler.step(optimizer)
    #scaler.update()
    
    optimizer.step()
    optimizer.zero_grad()
    
    #memory.set_priority(idxs, batched_loss)
    memory.set_priority(idxs, batched_loss, same_traj)
    

    lr = optimizer.param_groups[0]['lr']
    
    logs = {'loss': loss, 'dqn_loss': dqn_loss, 'recon_loss': recon_loss.mean(), 'lr': lr, 'returns': plot_vs.mean(),
               'buffer rewards': rewards.mean(0).sum(), 'is_w': is_w.mean(),
               'gamma': gamma_step, 'td_error': td_error, 'param_norm': param_norm.sum(), 'grad_norm': grad_norm.sum()}

    z = z[:,:5]
    diffusion_actions = F.one_hot(diffusion_actions[:,:5], n_actions)
    latent = torch.cat((z, diffusion_actions), -1)

    if grad_step>1500:
        logs = train_diffusion(latent, z[:,0], same_traj, std, is_w, logs)
    if grad_step>3000:
        logs = train_energy(latent, z[:,0], same_traj, std, is_w, logs)

    
    wandb.log(logs)




scores=[]
memory.free()
step=0
#model.share_memory()
grad_step=0

In [14]:
import torch
import math

m=8
n_sim=16
m=torch.tensor(m)

j = math.floor(n_sim/(torch.log2(m)*(m)))

j

0

In [33]:
reset_every=40000
idx = torch.tensor([24000, 4000, 35000, 20000, 16000, 15000, 41000, 55000, 56000, 9000, 96000])

steps = idx - reset_every*(idx//reset_every)
mask = (steps<reset_every*0.4).float() + (steps>reset_every*0.8).float()
mask

tensor([0., 1., 1., 0., 0., 1., 1., 1., 0., 1., 0.])

In [37]:
import torch

a=torch.tensor([[0.0945, 0.4497, 0.1342, 0.0141, 0.2003, 0.0612, 0.0460],
        [0.0865, 0.5353, 0.1180, 0.0130, 0.1491, 0.0535, 0.0445],
        [0.1096, 0.4248, 0.1380, 0.0168, 0.1932, 0.0678, 0.0498]],
       device='cuda:0')

a.argmax(-1)

tensor([1, 1, 1], device='cuda:0')

In [8]:
step=0


progress_bar = tqdm.tqdm(total=total_steps)

while step<(10):
    state, info = env.reset()
    state = preprocess(state)

    states = deque(maxlen=4)
    for i in range(4):
        states.append(state)
    
    actions_to_step = []
    
    eps_reward=torch.tensor([0], dtype=torch.float)
    
    reward=np.array([0])
    done_flag=np.array([False])
    terminated=np.array([False])

    last_lives=np.array([0])
    life_loss=np.array([0])
    resetted=np.array([0])

    last_grad_update=0
    while step<(total_steps):
        progress_bar.update(1)
        model_target.train()
        
        len_memory = len(memory)
        
        #if resetted[0]>0:
        #    states = env.noop_steps(states)
            
        
        
        if len(actions_to_step)==0:
            actions_to_step = epsilon_greedy(torch.cat(list(states),-3).unsqueeze(0), len_memory, grad_step, actions_to_step)
        action = actions_to_step.pop(0).squeeze()
        
        memory.push(torch.cat(list(states),-3).detach().cpu(), torch.tensor(reward,dtype=torch.float), action,
                    torch.tensor(np.logical_or(done_flag, life_loss),dtype=torch.bool))
        #print('action', action, action.shape)
        
        state, reward, terminated, truncated, info = env.step([action.numpy()])
        state = preprocess(state)
        states.append(state)
        
        eps_reward+=reward
        reward = reward.clip(-1, 1)


        
        done_flag = np.logical_or(terminated, truncated)
        lives = info['lives']
        life_loss = (last_lives-lives).clip(min=0)
        resetted = (lives-last_lives).clip(min=0)
        last_lives = lives

        
        n = int(initial_n * (final_n/initial_n)**(min(grad_step,schedule_max_step) / schedule_max_step))
        n = np.array(n).item()
        
        memory.priority[len_memory] = memory.max_priority()
        

        if len_memory>2000:
            for i in range(2):
                optimize(step, grad_step, n)
                target_model_ema(model, model_target)
                grad_step+=1

        
        if ((step+1)%10000)==0:
            save_checkpoint(model, model_target, optimizer, step,
                            'checkpoints/atari_last.pth')
        
            
        
        if grad_step>reset_every:
            #eval()
            print('Reseting on step', step, grad_step)
            
            #seed_np_torch(random.randint(SEED-1000, SEED+1000)+step)
            random_model = DQN(n_actions).cuda()
            model.hard_reset(random_model)
            
            #seed_np_torch(random.randint(SEED-1000, SEED+1000)+step)
            random_model = DQN(n_actions).cuda()
            model_target.hard_reset(random_model)
            
            random_model = Diffusion(n_actions).cuda()
            diffusion_model.hard_reset(random_model)

            random_model = EnergyModel(n_actions).cuda()
            energy_model.hard_reset(random_model)
            
            seed_np_torch(SEED)
            
            random_model=None
            grad_step=0

            actor_modules=[model.prediction, model.projection, model.a, model.v]
            params_ac=[]
            for module in actor_modules:
                for param in module.parameters():
                    params_ac.append(param)
                    

            perception_modules=[model.encoder_cnn, model.transition]
            params_wm=[]
            for module in perception_modules:
                for param in module.parameters():
                    params_wm.append(param)
            
            optimizer_aux = torch.optim.AdamW(params_wm, lr=lr, weight_decay=0.1, eps=1.5e-4)
            copy_states(optimizer, optimizer_aux)
            optimizer = torch.optim.AdamW(chain(params_wm, params_ac),
                                lr=lr, weight_decay=0.1, eps=1.5e-4)
            copy_states(optimizer_aux, optimizer)

        
            optimizer_energy = torch.optim.AdamW(energy_model.parameters(),
                                lr=lr_energy, weight_decay=0.)
        
        
        
        step+=1
        
        log_t = done_flag.astype(float).nonzero()[0]
        
        if len(log_t)>0:
            for log in log_t:
                wandb.log({'eps_reward': eps_reward[log].sum()})
                scores.append(eps_reward[log].clone())
            eps_reward[log_t]=0

save_checkpoint(model, model_target, optimizer, step, f'checkpoints/{env_name}_diff.pth')


  gamma_step = 1-torch.tensor(( (schedule_max_step - min(grad_step, schedule_max_step)) / schedule_max_step) * (initial_gamma-final_gamma) + final_gamma).exp()
  y = torch.nn.functional.scaled_dot_product_attention(q, k, v, attn_mask=None, dropout_p=self.dropout if self.training else 0, is_causal=is_causal)
  3%|▎         | 3503/102000 [07:00<17:02:38,  1.61it/s]





  3%|▎         | 3504/102000 [07:03<31:32:36,  1.15s/it]





  3%|▎         | 3505/102000 [07:05<41:54:56,  1.53s/it]





  3%|▎         | 3506/102000 [07:07<49:12:55,  1.80s/it]





  3%|▎         | 3507/102000 [07:10<54:23:03,  1.99s/it]





  3%|▎         | 3508/102000 [07:12<58:09:04,  2.13s/it]





  3%|▎         | 3509/102000 [07:15<60:25:33,  2.21s/it]





  3%|▎         | 3510/102000 [07:17<62:14:57,  2.28s/it]





  3%|▎         | 3511/102000 [07:20<63:16:28,  2.31s/it]





  3%|▎         | 3512/102000 [07:22<64:16:47,  2.35s/it]





  3%|▎         | 3513/102000 [07:24<64:55:04,  2.37s/it]





  3%|▎         | 3514/102000 [07:27<65:21:43,  2.39s/it]





  3%|▎         | 3515/102000 [07:29<65:21:13,  2.39s/it]





  3%|▎         | 3516/102000 [07:32<65:41:54,  2.40s/it]





  3%|▎         | 3517/102000 [07:34<65:57:17,  2.41s/it]





  3%|▎         | 3518/102000 [07:37<65:53:11,  2.41s/it]





  3%|▎         | 3519/102000 [07:39<65:41:45,  2.40s/it]





  3%|▎         | 3520/102000 [07:41<66:06:00,  2.42s/it]





  3%|▎         | 3521/102000 [07:44<66:07:36,  2.42s/it]





  3%|▎         | 3522/102000 [07:46<66:03:05,  2.41s/it]





  3%|▎         | 3523/102000 [07:49<66:11:40,  2.42s/it]





  3%|▎         | 3524/102000 [07:51<66:19:14,  2.42s/it]





  3%|▎         | 3525/102000 [07:54<66:33:58,  2.43s/it]





  3%|▎         | 3526/102000 [07:56<66:20:30,  2.43s/it]





  3%|▎         | 3527/102000 [07:58<66:15:38,  2.42s/it]





  3%|▎         | 3528/102000 [08:01<66:26:01,  2.43s/it]





  3%|▎         | 3529/102000 [08:03<66:29:34,  2.43s/it]





  3%|▎         | 3530/102000 [08:06<66:29:41,  2.43s/it]





  3%|▎         | 3531/102000 [08:08<66:28:51,  2.43s/it]





  3%|▎         | 3532/102000 [08:10<66:30:14,  2.43s/it]





  3%|▎         | 3533/102000 [08:13<65:59:58,  2.41s/it]





  3%|▎         | 3534/102000 [08:15<66:03:45,  2.42s/it]





  3%|▎         | 3535/102000 [08:18<65:56:22,  2.41s/it]





  3%|▎         | 3536/102000 [08:20<66:00:14,  2.41s/it]





  3%|▎         | 3537/102000 [08:23<66:16:02,  2.42s/it]





  3%|▎         | 3538/102000 [08:25<66:11:53,  2.42s/it]





  3%|▎         | 3539/102000 [08:27<66:21:39,  2.43s/it]





  3%|▎         | 3540/102000 [08:30<66:36:37,  2.44s/it]





  3%|▎         | 3541/102000 [08:32<66:26:24,  2.43s/it]





  3%|▎         | 3542/102000 [08:35<66:34:37,  2.43s/it]





  3%|▎         | 3543/102000 [08:37<66:43:19,  2.44s/it]





  3%|▎         | 3544/102000 [08:40<66:02:29,  2.41s/it]





  3%|▎         | 3545/102000 [08:42<66:19:15,  2.43s/it]





  3%|▎         | 3546/102000 [08:44<66:16:37,  2.42s/it]





  3%|▎         | 3547/102000 [08:47<66:18:47,  2.42s/it]





  3%|▎         | 3548/102000 [08:49<66:24:38,  2.43s/it]





  3%|▎         | 3549/102000 [08:52<66:37:51,  2.44s/it]





  3%|▎         | 3550/102000 [08:54<66:29:29,  2.43s/it]





  3%|▎         | 3551/102000 [08:57<66:28:30,  2.43s/it]





  3%|▎         | 3552/102000 [08:59<66:28:42,  2.43s/it]





  3%|▎         | 3553/102000 [09:01<66:25:10,  2.43s/it]





  3%|▎         | 3554/102000 [09:04<66:28:25,  2.43s/it]





  3%|▎         | 3555/102000 [09:06<66:36:09,  2.44s/it]





  3%|▎         | 3556/102000 [09:09<66:43:36,  2.44s/it]





  3%|▎         | 3557/102000 [09:11<66:39:04,  2.44s/it]





  3%|▎         | 3558/102000 [09:14<66:31:43,  2.43s/it]





  3%|▎         | 3559/102000 [09:16<66:43:40,  2.44s/it]





  3%|▎         | 3560/102000 [09:19<66:45:40,  2.44s/it]





  3%|▎         | 3561/102000 [09:21<66:44:04,  2.44s/it]





  3%|▎         | 3562/102000 [09:23<66:44:28,  2.44s/it]





  3%|▎         | 3563/102000 [09:26<66:57:32,  2.45s/it]





  3%|▎         | 3564/102000 [09:28<66:55:26,  2.45s/it]





  3%|▎         | 3565/102000 [09:31<66:22:52,  2.43s/it]





  3%|▎         | 3566/102000 [09:33<66:30:58,  2.43s/it]





  3%|▎         | 3567/102000 [09:36<66:35:35,  2.44s/it]





  3%|▎         | 3568/102000 [09:38<66:42:56,  2.44s/it]





  3%|▎         | 3569/102000 [09:40<66:30:34,  2.43s/it]





  4%|▎         | 3570/102000 [09:43<66:12:28,  2.42s/it]





  4%|▎         | 3571/102000 [09:45<66:09:54,  2.42s/it]





  4%|▎         | 3572/102000 [09:48<66:18:11,  2.43s/it]





  4%|▎         | 3573/102000 [09:50<66:16:33,  2.42s/it]





  4%|▎         | 3574/102000 [09:53<66:13:36,  2.42s/it]





  4%|▎         | 3575/102000 [09:55<66:28:41,  2.43s/it]





  4%|▎         | 3576/102000 [09:57<66:40:00,  2.44s/it]





  4%|▎         | 3577/102000 [10:00<66:34:45,  2.44s/it]





  4%|▎         | 3578/102000 [10:02<66:29:37,  2.43s/it]





  4%|▎         | 3579/102000 [10:05<66:37:18,  2.44s/it]





  4%|▎         | 3580/102000 [10:07<66:38:38,  2.44s/it]





  4%|▎         | 3581/102000 [10:10<66:42:57,  2.44s/it]





  4%|▎         | 3582/102000 [10:12<66:45:36,  2.44s/it]





  4%|▎         | 3583/102000 [10:14<66:38:25,  2.44s/it]





  4%|▎         | 3584/102000 [10:17<66:27:07,  2.43s/it]





  4%|▎         | 3585/102000 [10:19<66:32:09,  2.43s/it]





  4%|▎         | 3586/102000 [10:22<66:33:41,  2.43s/it]





  4%|▎         | 3587/102000 [10:24<66:42:13,  2.44s/it]





  4%|▎         | 3588/102000 [10:27<66:49:16,  2.44s/it]





  4%|▎         | 3589/102000 [10:29<66:41:25,  2.44s/it]





  4%|▎         | 3590/102000 [10:32<66:31:00,  2.43s/it]





  4%|▎         | 3591/102000 [10:34<66:23:14,  2.43s/it]





  4%|▎         | 3592/102000 [10:36<66:28:16,  2.43s/it]





  4%|▎         | 3593/102000 [10:39<66:34:08,  2.44s/it]





  4%|▎         | 3594/102000 [10:41<66:11:11,  2.42s/it]





  4%|▎         | 3595/102000 [10:44<66:05:38,  2.42s/it]





  4%|▎         | 3596/102000 [10:46<66:07:25,  2.42s/it]





  4%|▎         | 3597/102000 [10:48<65:59:13,  2.41s/it]





  4%|▎         | 3598/102000 [10:51<66:08:34,  2.42s/it]





  4%|▎         | 3599/102000 [10:53<66:11:16,  2.42s/it]





  4%|▎         | 3600/102000 [10:56<66:15:13,  2.42s/it]





  4%|▎         | 3601/102000 [10:58<66:17:58,  2.43s/it]





  4%|▎         | 3602/102000 [11:01<65:53:36,  2.41s/it]





  4%|▎         | 3603/102000 [11:03<66:01:47,  2.42s/it]





  4%|▎         | 3604/102000 [11:05<66:13:56,  2.42s/it]





  4%|▎         | 3605/102000 [11:08<66:13:45,  2.42s/it]





  4%|▎         | 3606/102000 [11:10<66:14:13,  2.42s/it]





  4%|▎         | 3607/102000 [11:13<65:49:16,  2.41s/it]





  4%|▎         | 3608/102000 [11:15<65:55:42,  2.41s/it]





  4%|▎         | 3609/102000 [11:17<65:55:44,  2.41s/it]





  4%|▎         | 3610/102000 [11:20<65:56:03,  2.41s/it]





  4%|▎         | 3611/102000 [11:22<66:14:02,  2.42s/it]





  4%|▎         | 3612/102000 [11:25<66:14:18,  2.42s/it]





  4%|▎         | 3613/102000 [11:27<65:57:59,  2.41s/it]





  4%|▎         | 3614/102000 [11:30<65:53:20,  2.41s/it]





  4%|▎         | 3615/102000 [11:32<65:44:41,  2.41s/it]





  4%|▎         | 3616/102000 [11:34<65:54:34,  2.41s/it]





  4%|▎         | 3617/102000 [11:37<66:02:11,  2.42s/it]





  4%|▎         | 3618/102000 [11:39<66:02:37,  2.42s/it]





  4%|▎         | 3619/102000 [11:42<66:10:33,  2.42s/it]





  4%|▎         | 3620/102000 [11:44<66:13:48,  2.42s/it]





  4%|▎         | 3621/102000 [11:47<66:26:24,  2.43s/it]





  4%|▎         | 3622/102000 [11:49<66:29:30,  2.43s/it]





  4%|▎         | 3623/102000 [11:51<66:27:11,  2.43s/it]





  4%|▎         | 3624/102000 [11:54<66:00:17,  2.42s/it]





  4%|▎         | 3625/102000 [11:56<66:10:25,  2.42s/it]





  4%|▎         | 3626/102000 [11:59<65:59:22,  2.41s/it]





  4%|▎         | 3627/102000 [12:01<65:54:21,  2.41s/it]





  4%|▎         | 3628/102000 [12:03<65:49:37,  2.41s/it]





  4%|▎         | 3629/102000 [12:06<66:10:40,  2.42s/it]





  4%|▎         | 3630/102000 [12:08<66:12:21,  2.42s/it]





  4%|▎         | 3631/102000 [12:11<66:21:42,  2.43s/it]





  4%|▎         | 3632/102000 [12:13<66:03:27,  2.42s/it]





  4%|▎         | 3633/102000 [12:16<66:08:11,  2.42s/it]





  4%|▎         | 3634/102000 [12:18<66:16:28,  2.43s/it]





  4%|▎         | 3635/102000 [12:20<65:56:10,  2.41s/it]





  4%|▎         | 3636/102000 [12:23<65:53:46,  2.41s/it]





  4%|▎         | 3637/102000 [12:25<65:52:30,  2.41s/it]





  4%|▎         | 3638/102000 [12:28<66:11:34,  2.42s/it]





  4%|▎         | 3639/102000 [12:30<66:26:45,  2.43s/it]





  4%|▎         | 3640/102000 [12:33<66:31:38,  2.43s/it]





  4%|▎         | 3641/102000 [12:35<66:41:21,  2.44s/it]





  4%|▎         | 3642/102000 [12:37<66:04:30,  2.42s/it]





  4%|▎         | 3643/102000 [12:40<66:00:34,  2.42s/it]





  4%|▎         | 3644/102000 [12:42<65:58:19,  2.41s/it]





  4%|▎         | 3645/102000 [12:45<66:18:53,  2.43s/it]





  4%|▎         | 3646/102000 [12:47<66:11:46,  2.42s/it]





  4%|▎         | 3647/102000 [12:50<66:23:16,  2.43s/it]





  4%|▎         | 3648/102000 [12:52<66:28:53,  2.43s/it]





  4%|▎         | 3649/102000 [12:54<66:32:21,  2.44s/it]





  4%|▎         | 3650/102000 [12:57<66:33:56,  2.44s/it]





  4%|▎         | 3651/102000 [12:59<66:44:41,  2.44s/it]





  4%|▎         | 3652/102000 [13:02<66:46:05,  2.44s/it]





  4%|▎         | 3653/102000 [13:04<66:57:30,  2.45s/it]





  4%|▎         | 3654/102000 [13:07<66:47:38,  2.45s/it]





  4%|▎         | 3655/102000 [13:09<66:53:54,  2.45s/it]





  4%|▎         | 3656/102000 [13:12<66:58:03,  2.45s/it]





  4%|▎         | 3657/102000 [13:14<67:00:22,  2.45s/it]





  4%|▎         | 3658/102000 [13:16<67:01:15,  2.45s/it]





  4%|▎         | 3659/102000 [13:19<66:57:14,  2.45s/it]





  4%|▎         | 3660/102000 [13:21<66:48:31,  2.45s/it]





  4%|▎         | 3661/102000 [13:24<66:58:34,  2.45s/it]





  4%|▎         | 3662/102000 [13:26<66:53:33,  2.45s/it]





  4%|▎         | 3663/102000 [13:29<66:53:54,  2.45s/it]





  4%|▎         | 3664/102000 [13:31<66:47:27,  2.45s/it]





  4%|▎         | 3665/102000 [13:34<66:42:27,  2.44s/it]





  4%|▎         | 3666/102000 [13:36<66:38:55,  2.44s/it]





  4%|▎         | 3667/102000 [13:38<66:39:20,  2.44s/it]





  4%|▎         | 3668/102000 [13:41<66:36:57,  2.44s/it]





  4%|▎         | 3669/102000 [13:43<66:14:41,  2.43s/it]





  4%|▎         | 3670/102000 [13:46<66:25:45,  2.43s/it]





  4%|▎         | 3671/102000 [13:48<66:30:04,  2.43s/it]





  4%|▎         | 3672/102000 [13:51<66:33:31,  2.44s/it]





  4%|▎         | 3673/102000 [13:53<66:38:52,  2.44s/it]





  4%|▎         | 3674/102000 [13:55<66:39:03,  2.44s/it]





  4%|▎         | 3675/102000 [13:58<66:15:03,  2.43s/it]





  4%|▎         | 3676/102000 [14:00<66:26:07,  2.43s/it]





  4%|▎         | 3677/102000 [14:03<66:32:35,  2.44s/it]





  4%|▎         | 3678/102000 [14:05<66:31:08,  2.44s/it]





  4%|▎         | 3679/102000 [14:08<66:36:53,  2.44s/it]





  4%|▎         | 3680/102000 [14:10<66:41:53,  2.44s/it]





  4%|▎         | 3681/102000 [14:13<66:21:16,  2.43s/it]





  4%|▎         | 3682/102000 [14:15<66:25:08,  2.43s/it]





  4%|▎         | 3683/102000 [14:17<66:21:37,  2.43s/it]





  4%|▎         | 3684/102000 [14:20<66:19:58,  2.43s/it]





  4%|▎         | 3685/102000 [14:22<66:30:40,  2.44s/it]





  4%|▎         | 3686/102000 [14:25<66:30:47,  2.44s/it]





  4%|▎         | 3687/102000 [14:27<66:53:03,  2.45s/it]





  4%|▎         | 3688/102000 [14:30<66:50:07,  2.45s/it]





  4%|▎         | 3689/102000 [14:32<66:49:02,  2.45s/it]





  4%|▎         | 3690/102000 [14:34<66:25:58,  2.43s/it]





  4%|▎         | 3691/102000 [14:37<66:10:59,  2.42s/it]





  4%|▎         | 3692/102000 [14:39<66:25:48,  2.43s/it]





  4%|▎         | 3693/102000 [14:42<66:11:43,  2.42s/it]





  4%|▎         | 3694/102000 [14:44<66:18:05,  2.43s/it]





  4%|▎         | 3695/102000 [14:47<66:01:24,  2.42s/it]





  4%|▎         | 3696/102000 [14:49<66:04:42,  2.42s/it]





  4%|▎         | 3697/102000 [14:51<65:45:38,  2.41s/it]





  4%|▎         | 3698/102000 [14:54<65:56:00,  2.41s/it]





  4%|▎         | 3699/102000 [14:56<65:55:38,  2.41s/it]





  4%|▎         | 3700/102000 [14:59<66:14:21,  2.43s/it]





  4%|▎         | 3701/102000 [15:01<66:39:59,  2.44s/it]





  4%|▎         | 3702/102000 [15:04<66:41:21,  2.44s/it]





  4%|▎         | 3703/102000 [15:06<66:36:27,  2.44s/it]





  4%|▎         | 3704/102000 [15:08<66:16:16,  2.43s/it]





  4%|▎         | 3705/102000 [15:11<66:16:56,  2.43s/it]





  4%|▎         | 3706/102000 [15:13<66:24:44,  2.43s/it]





  4%|▎         | 3707/102000 [15:16<66:10:00,  2.42s/it]





  4%|▎         | 3708/102000 [15:18<66:25:32,  2.43s/it]





  4%|▎         | 3709/102000 [15:21<66:25:00,  2.43s/it]





  4%|▎         | 3710/102000 [15:23<66:14:13,  2.43s/it]





  4%|▎         | 3711/102000 [15:25<65:56:17,  2.42s/it]





  4%|▎         | 3712/102000 [15:28<65:57:08,  2.42s/it]





  4%|▎         | 3713/102000 [15:30<66:12:14,  2.42s/it]





  4%|▎         | 3714/102000 [15:33<66:20:39,  2.43s/it]





  4%|▎         | 3715/102000 [15:35<66:20:06,  2.43s/it]





  4%|▎         | 3716/102000 [15:38<66:31:07,  2.44s/it]





  4%|▎         | 3717/102000 [15:40<66:28:58,  2.44s/it]





  4%|▎         | 3718/102000 [15:42<66:17:07,  2.43s/it]





  4%|▎         | 3719/102000 [15:45<66:27:42,  2.43s/it]





  4%|▎         | 3720/102000 [15:47<66:29:35,  2.44s/it]





  4%|▎         | 3721/102000 [15:50<66:23:58,  2.43s/it]





  4%|▎         | 3722/102000 [15:52<66:23:37,  2.43s/it]





  4%|▎         | 3723/102000 [15:55<66:23:25,  2.43s/it]





  4%|▎         | 3724/102000 [15:57<66:22:04,  2.43s/it]





  4%|▎         | 3725/102000 [15:59<66:21:05,  2.43s/it]





  4%|▎         | 3726/102000 [16:02<66:08:18,  2.42s/it]





  4%|▎         | 3727/102000 [16:04<66:12:01,  2.43s/it]





  4%|▎         | 3728/102000 [16:07<66:19:21,  2.43s/it]





  4%|▎         | 3729/102000 [16:09<66:30:49,  2.44s/it]





  4%|▎         | 3730/102000 [16:12<66:21:46,  2.43s/it]





  4%|▎         | 3731/102000 [16:14<66:17:46,  2.43s/it]





  4%|▎         | 3732/102000 [16:16<66:17:30,  2.43s/it]





  4%|▎         | 3733/102000 [16:19<66:10:46,  2.42s/it]





  4%|▎         | 3734/102000 [16:21<66:00:53,  2.42s/it]





  4%|▎         | 3735/102000 [16:24<66:16:10,  2.43s/it]





  4%|▎         | 3736/102000 [16:26<66:22:07,  2.43s/it]





  4%|▎         | 3737/102000 [16:29<66:46:54,  2.45s/it]





  4%|▎         | 3738/102000 [16:31<66:44:48,  2.45s/it]





  4%|▎         | 3739/102000 [16:33<66:35:39,  2.44s/it]





  4%|▎         | 3740/102000 [16:36<66:31:44,  2.44s/it]





  4%|▎         | 3741/102000 [16:38<66:15:25,  2.43s/it]





  4%|▎         | 3742/102000 [16:41<66:26:05,  2.43s/it]





  4%|▎         | 3743/102000 [16:43<66:32:36,  2.44s/it]





  4%|▎         | 3744/102000 [16:46<66:14:29,  2.43s/it]





  4%|▎         | 3745/102000 [16:48<66:25:32,  2.43s/it]





  4%|▎         | 3746/102000 [16:50<66:20:27,  2.43s/it]





  4%|▎         | 3747/102000 [16:53<66:23:04,  2.43s/it]





  4%|▎         | 3748/102000 [16:55<66:19:10,  2.43s/it]





  4%|▎         | 3749/102000 [16:58<66:18:13,  2.43s/it]





  4%|▎         | 3750/102000 [17:00<66:24:45,  2.43s/it]





  4%|▎         | 3751/102000 [17:03<66:28:13,  2.44s/it]





  4%|▎         | 3752/102000 [17:05<66:31:49,  2.44s/it]





  4%|▎         | 3753/102000 [17:08<66:50:36,  2.45s/it]





  4%|▎         | 3754/102000 [17:10<67:08:46,  2.46s/it]





  4%|▎         | 3755/102000 [17:12<66:56:04,  2.45s/it]





  4%|▎         | 3756/102000 [17:15<67:12:45,  2.46s/it]





  4%|▎         | 3757/102000 [17:17<66:53:00,  2.45s/it]





  4%|▎         | 3758/102000 [17:20<66:48:33,  2.45s/it]





  4%|▎         | 3759/102000 [17:22<66:45:17,  2.45s/it]





  4%|▎         | 3760/102000 [17:25<66:33:23,  2.44s/it]





  4%|▎         | 3761/102000 [17:27<66:27:06,  2.44s/it]





  4%|▎         | 3762/102000 [17:30<66:23:44,  2.43s/it]





  4%|▎         | 3763/102000 [17:32<66:24:34,  2.43s/it]





  4%|▎         | 3764/102000 [17:34<66:08:18,  2.42s/it]





  4%|▎         | 3765/102000 [17:37<66:02:46,  2.42s/it]





  4%|▎         | 3766/102000 [17:39<66:17:20,  2.43s/it]





  4%|▎         | 3767/102000 [17:42<66:29:19,  2.44s/it]





  4%|▎         | 3768/102000 [17:44<66:20:27,  2.43s/it]





  4%|▎         | 3769/102000 [17:47<66:30:27,  2.44s/it]





  4%|▎         | 3770/102000 [17:49<66:11:05,  2.43s/it]





  4%|▎         | 3771/102000 [17:51<66:15:29,  2.43s/it]





  4%|▎         | 3772/102000 [17:54<66:18:51,  2.43s/it]





  4%|▎         | 3773/102000 [17:56<66:24:19,  2.43s/it]





  4%|▎         | 3774/102000 [17:59<66:28:21,  2.44s/it]





  4%|▎         | 3775/102000 [18:01<66:30:55,  2.44s/it]





  4%|▎         | 3776/102000 [18:04<66:34:48,  2.44s/it]





  4%|▎         | 3777/102000 [18:06<66:30:50,  2.44s/it]





  4%|▎         | 3778/102000 [18:09<66:40:10,  2.44s/it]





  4%|▎         | 3779/102000 [18:11<66:43:33,  2.45s/it]





  4%|▎         | 3780/102000 [18:13<66:23:28,  2.43s/it]





  4%|▎         | 3781/102000 [18:16<65:46:14,  2.41s/it]





  4%|▎         | 3782/102000 [18:18<66:13:00,  2.43s/it]





  4%|▎         | 3783/102000 [18:21<66:21:20,  2.43s/it]





  4%|▎         | 3784/102000 [18:23<66:30:21,  2.44s/it]





  4%|▎         | 3785/102000 [18:26<66:24:43,  2.43s/it]





  4%|▎         | 3786/102000 [18:28<66:26:32,  2.44s/it]





  4%|▎         | 3787/102000 [18:30<66:02:22,  2.42s/it]





  4%|▎         | 3788/102000 [18:33<65:54:16,  2.42s/it]





  4%|▎         | 3789/102000 [18:35<65:54:12,  2.42s/it]





  4%|▎         | 3790/102000 [18:38<66:00:56,  2.42s/it]





  4%|▎         | 3791/102000 [18:40<66:01:41,  2.42s/it]





  4%|▎         | 3792/102000 [18:42<66:08:17,  2.42s/it]





  4%|▎         | 3793/102000 [18:45<66:18:26,  2.43s/it]





  4%|▎         | 3794/102000 [18:47<66:12:54,  2.43s/it]





  4%|▎         | 3795/102000 [18:50<66:27:44,  2.44s/it]





  4%|▎         | 3796/102000 [18:52<66:17:48,  2.43s/it]





  4%|▎         | 3797/102000 [18:55<66:20:15,  2.43s/it]





  4%|▎         | 3798/102000 [18:57<66:29:10,  2.44s/it]





  4%|▎         | 3799/102000 [18:59<66:18:41,  2.43s/it]





  4%|▎         | 3800/102000 [19:02<66:10:02,  2.43s/it]





  4%|▎         | 3801/102000 [19:04<66:05:45,  2.42s/it]





  4%|▎         | 3802/102000 [19:07<65:33:21,  2.40s/it]





  4%|▎         | 3803/102000 [19:09<66:03:31,  2.42s/it]





  4%|▎         | 3804/102000 [19:12<66:01:35,  2.42s/it]





  4%|▎         | 3805/102000 [19:14<66:02:40,  2.42s/it]





  4%|▎         | 3806/102000 [19:16<66:06:04,  2.42s/it]





  4%|▎         | 3807/102000 [19:19<66:05:08,  2.42s/it]





  4%|▎         | 3808/102000 [19:21<66:08:06,  2.42s/it]





  4%|▎         | 3809/102000 [19:24<66:17:41,  2.43s/it]





  4%|▎         | 3810/102000 [19:26<66:15:04,  2.43s/it]





  4%|▎         | 3811/102000 [19:29<66:23:17,  2.43s/it]





  4%|▎         | 3812/102000 [19:31<66:28:24,  2.44s/it]





  4%|▎         | 3813/102000 [19:33<66:27:11,  2.44s/it]





  4%|▎         | 3814/102000 [19:36<66:08:00,  2.42s/it]





  4%|▎         | 3815/102000 [19:38<66:31:35,  2.44s/it]





  4%|▎         | 3816/102000 [19:41<66:24:22,  2.43s/it]





  4%|▎         | 3817/102000 [19:43<66:33:04,  2.44s/it]





  4%|▎         | 3818/102000 [19:46<66:20:42,  2.43s/it]





  4%|▎         | 3819/102000 [19:48<66:42:04,  2.45s/it]





  4%|▎         | 3820/102000 [19:51<66:40:47,  2.44s/it]





  4%|▎         | 3821/102000 [19:53<66:31:25,  2.44s/it]





  4%|▎         | 3822/102000 [19:55<66:32:30,  2.44s/it]





  4%|▎         | 3823/102000 [19:58<66:24:48,  2.44s/it]





  4%|▎         | 3824/102000 [20:00<66:21:19,  2.43s/it]





  4%|▍         | 3825/102000 [20:03<66:25:52,  2.44s/it]





  4%|▍         | 3826/102000 [20:05<66:13:08,  2.43s/it]





  4%|▍         | 3827/102000 [20:08<66:06:15,  2.42s/it]





  4%|▍         | 3828/102000 [20:10<66:07:16,  2.42s/it]





  4%|▍         | 3829/102000 [20:12<66:16:03,  2.43s/it]





  4%|▍         | 3830/102000 [20:15<66:24:40,  2.44s/it]





  4%|▍         | 3831/102000 [20:17<66:06:24,  2.42s/it]





  4%|▍         | 3832/102000 [20:20<66:22:35,  2.43s/it]





  4%|▍         | 3833/102000 [20:22<66:29:26,  2.44s/it]





  4%|▍         | 3834/102000 [20:25<66:08:03,  2.43s/it]





  4%|▍         | 3835/102000 [20:27<66:22:54,  2.43s/it]





  4%|▍         | 3836/102000 [20:29<66:33:09,  2.44s/it]





  4%|▍         | 3837/102000 [20:32<66:41:41,  2.45s/it]





  4%|▍         | 3838/102000 [20:34<66:22:52,  2.43s/it]





  4%|▍         | 3839/102000 [20:37<66:19:16,  2.43s/it]





  4%|▍         | 3840/102000 [20:39<66:29:30,  2.44s/it]





  4%|▍         | 3841/102000 [20:42<66:37:17,  2.44s/it]





  4%|▍         | 3842/102000 [20:44<66:38:20,  2.44s/it]





  4%|▍         | 3843/102000 [20:46<66:04:11,  2.42s/it]





  4%|▍         | 3844/102000 [20:49<66:15:00,  2.43s/it]





  4%|▍         | 3845/102000 [20:51<66:25:04,  2.44s/it]





  4%|▍         | 3846/102000 [20:54<66:09:55,  2.43s/it]





  4%|▍         | 3847/102000 [20:56<65:51:11,  2.42s/it]





  4%|▍         | 3848/102000 [20:59<65:55:17,  2.42s/it]





  4%|▍         | 3849/102000 [21:01<66:03:54,  2.42s/it]





  4%|▍         | 3850/102000 [21:03<66:21:20,  2.43s/it]





  4%|▍         | 3851/102000 [21:06<66:15:11,  2.43s/it]





  4%|▍         | 3852/102000 [21:08<66:05:55,  2.42s/it]





  4%|▍         | 3853/102000 [21:11<66:14:26,  2.43s/it]





  4%|▍         | 3854/102000 [21:13<66:27:32,  2.44s/it]





  4%|▍         | 3855/102000 [21:16<66:29:32,  2.44s/it]





  4%|▍         | 3856/102000 [21:18<66:39:46,  2.45s/it]





  4%|▍         | 3857/102000 [21:21<66:29:29,  2.44s/it]





  4%|▍         | 3858/102000 [21:23<66:36:45,  2.44s/it]





  4%|▍         | 3859/102000 [21:25<66:21:25,  2.43s/it]





  4%|▍         | 3860/102000 [21:28<66:13:44,  2.43s/it]





  4%|▍         | 3861/102000 [21:30<66:26:56,  2.44s/it]





  4%|▍         | 3862/102000 [21:33<66:30:36,  2.44s/it]





  4%|▍         | 3863/102000 [21:35<66:33:53,  2.44s/it]





  4%|▍         | 3864/102000 [21:38<66:30:05,  2.44s/it]





  4%|▍         | 3865/102000 [21:40<66:14:01,  2.43s/it]





  4%|▍         | 3866/102000 [21:42<66:25:27,  2.44s/it]





  4%|▍         | 3867/102000 [21:45<66:38:47,  2.44s/it]





  4%|▍         | 3868/102000 [21:47<66:28:35,  2.44s/it]





  4%|▍         | 3869/102000 [21:50<66:28:52,  2.44s/it]





  4%|▍         | 3870/102000 [21:52<66:35:12,  2.44s/it]





  4%|▍         | 3871/102000 [21:55<66:37:33,  2.44s/it]





  4%|▍         | 3872/102000 [21:57<66:22:17,  2.43s/it]





  4%|▍         | 3873/102000 [22:00<66:03:19,  2.42s/it]





  4%|▍         | 3874/102000 [22:02<66:16:11,  2.43s/it]





  4%|▍         | 3875/102000 [22:04<66:26:42,  2.44s/it]





  4%|▍         | 3876/102000 [22:07<65:44:52,  2.41s/it]





  4%|▍         | 3877/102000 [22:09<66:01:11,  2.42s/it]





  4%|▍         | 3878/102000 [22:12<66:24:31,  2.44s/it]





  4%|▍         | 3879/102000 [22:14<66:28:01,  2.44s/it]





  4%|▍         | 3880/102000 [22:17<66:31:55,  2.44s/it]





  4%|▍         | 3881/102000 [22:19<66:37:48,  2.44s/it]





  4%|▍         | 3882/102000 [22:21<66:42:26,  2.45s/it]





  4%|▍         | 3883/102000 [22:24<66:37:49,  2.44s/it]





  4%|▍         | 3884/102000 [22:26<66:22:51,  2.44s/it]





  4%|▍         | 3885/102000 [22:29<66:22:48,  2.44s/it]





  4%|▍         | 3886/102000 [22:31<66:17:16,  2.43s/it]





  4%|▍         | 3887/102000 [22:34<66:25:06,  2.44s/it]





  4%|▍         | 3888/102000 [22:36<66:58:12,  2.46s/it]





  4%|▍         | 3889/102000 [22:39<67:16:40,  2.47s/it]





  4%|▍         | 3890/102000 [22:41<67:17:47,  2.47s/it]





  4%|▍         | 3891/102000 [22:44<67:10:49,  2.47s/it]





  4%|▍         | 3892/102000 [22:46<67:00:19,  2.46s/it]





  4%|▍         | 3893/102000 [22:48<66:43:30,  2.45s/it]





  4%|▍         | 3894/102000 [22:51<66:17:18,  2.43s/it]





  4%|▍         | 3895/102000 [22:53<66:04:38,  2.42s/it]





  4%|▍         | 3896/102000 [22:56<65:55:38,  2.42s/it]





  4%|▍         | 3897/102000 [22:58<65:40:46,  2.41s/it]





  4%|▍         | 3898/102000 [23:01<66:24:40,  2.44s/it]





  4%|▍         | 3899/102000 [23:03<66:20:06,  2.43s/it]





  4%|▍         | 3900/102000 [23:05<66:07:16,  2.43s/it]





  4%|▍         | 3901/102000 [23:08<66:15:03,  2.43s/it]





  4%|▍         | 3902/102000 [23:10<65:58:48,  2.42s/it]





  4%|▍         | 3903/102000 [23:13<66:04:10,  2.42s/it]





  4%|▍         | 3904/102000 [23:15<66:33:53,  2.44s/it]





  4%|▍         | 3905/102000 [23:18<66:44:08,  2.45s/it]





  4%|▍         | 3906/102000 [23:20<66:26:00,  2.44s/it]





  4%|▍         | 3907/102000 [23:22<66:03:47,  2.42s/it]





  4%|▍         | 3908/102000 [23:25<66:11:46,  2.43s/it]





  4%|▍         | 3909/102000 [23:27<66:15:43,  2.43s/it]





  4%|▍         | 3910/102000 [23:30<66:02:15,  2.42s/it]





  4%|▍         | 3911/102000 [23:32<66:06:18,  2.43s/it]





  4%|▍         | 3912/102000 [23:35<66:28:40,  2.44s/it]





  4%|▍         | 3913/102000 [23:37<66:28:39,  2.44s/it]





  4%|▍         | 3914/102000 [23:39<66:30:33,  2.44s/it]





  4%|▍         | 3915/102000 [23:42<66:16:49,  2.43s/it]





  4%|▍         | 3916/102000 [23:44<66:29:26,  2.44s/it]





  4%|▍         | 3917/102000 [23:47<66:31:01,  2.44s/it]





  4%|▍         | 3918/102000 [23:49<66:02:06,  2.42s/it]





  4%|▍         | 3919/102000 [23:52<66:09:18,  2.43s/it]





  4%|▍         | 3920/102000 [23:54<66:20:49,  2.44s/it]





  4%|▍         | 3921/102000 [23:56<65:52:16,  2.42s/it]





  4%|▍         | 3922/102000 [23:59<66:06:18,  2.43s/it]





  4%|▍         | 3923/102000 [24:01<65:39:44,  2.41s/it]





  4%|▍         | 3924/102000 [24:04<65:41:04,  2.41s/it]





  4%|▍         | 3925/102000 [24:06<65:36:20,  2.41s/it]





  4%|▍         | 3926/102000 [24:08<65:43:19,  2.41s/it]





  4%|▍         | 3927/102000 [24:11<65:55:03,  2.42s/it]





  4%|▍         | 3928/102000 [24:13<66:11:17,  2.43s/it]





  4%|▍         | 3929/102000 [24:16<66:15:23,  2.43s/it]





  4%|▍         | 3930/102000 [24:18<66:22:01,  2.44s/it]





  4%|▍         | 3931/102000 [24:21<66:29:43,  2.44s/it]





  4%|▍         | 3932/102000 [24:23<66:06:37,  2.43s/it]





  4%|▍         | 3933/102000 [24:26<66:15:48,  2.43s/it]





  4%|▍         | 3934/102000 [24:28<66:20:46,  2.44s/it]





  4%|▍         | 3935/102000 [24:30<66:22:36,  2.44s/it]





  4%|▍         | 3936/102000 [24:33<66:13:01,  2.43s/it]





  4%|▍         | 3937/102000 [24:35<66:25:43,  2.44s/it]





  4%|▍         | 3938/102000 [24:38<66:28:56,  2.44s/it]





  4%|▍         | 3939/102000 [24:40<66:20:43,  2.44s/it]





  4%|▍         | 3940/102000 [24:43<66:22:23,  2.44s/it]





  4%|▍         | 3941/102000 [24:45<66:29:02,  2.44s/it]





  4%|▍         | 3942/102000 [24:47<66:22:35,  2.44s/it]





  4%|▍         | 3943/102000 [24:50<66:42:53,  2.45s/it]





  4%|▍         | 3944/102000 [24:52<66:22:44,  2.44s/it]





  4%|▍         | 3945/102000 [24:55<66:26:47,  2.44s/it]





  4%|▍         | 3946/102000 [24:57<66:43:03,  2.45s/it]





  4%|▍         | 3947/102000 [25:00<66:33:36,  2.44s/it]





  4%|▍         | 3948/102000 [25:02<66:25:27,  2.44s/it]





  4%|▍         | 3949/102000 [25:05<66:35:01,  2.44s/it]





  4%|▍         | 3950/102000 [25:07<66:31:35,  2.44s/it]





  4%|▍         | 3951/102000 [25:09<66:28:06,  2.44s/it]





  4%|▍         | 3952/102000 [25:12<66:24:57,  2.44s/it]





  4%|▍         | 3953/102000 [25:14<66:26:10,  2.44s/it]





  4%|▍         | 3954/102000 [25:17<66:30:43,  2.44s/it]





  4%|▍         | 3955/102000 [25:19<66:36:33,  2.45s/it]





  4%|▍         | 3956/102000 [25:22<66:41:10,  2.45s/it]





  4%|▍         | 3957/102000 [25:24<66:41:32,  2.45s/it]





  4%|▍         | 3958/102000 [25:27<66:48:01,  2.45s/it]





  4%|▍         | 3959/102000 [25:29<66:37:57,  2.45s/it]





  4%|▍         | 3960/102000 [25:32<66:56:41,  2.46s/it]





  4%|▍         | 3961/102000 [25:34<66:52:59,  2.46s/it]





  4%|▍         | 3962/102000 [25:36<66:37:20,  2.45s/it]





  4%|▍         | 3963/102000 [25:39<66:26:50,  2.44s/it]





  4%|▍         | 3964/102000 [25:41<66:30:48,  2.44s/it]





  4%|▍         | 3965/102000 [25:44<66:25:34,  2.44s/it]





  4%|▍         | 3966/102000 [25:46<66:22:27,  2.44s/it]





  4%|▍         | 3967/102000 [25:49<66:25:41,  2.44s/it]





  4%|▍         | 3968/102000 [25:51<66:32:17,  2.44s/it]





  4%|▍         | 3969/102000 [25:53<66:13:30,  2.43s/it]





  4%|▍         | 3970/102000 [25:56<66:18:48,  2.44s/it]





  4%|▍         | 3971/102000 [25:58<66:28:59,  2.44s/it]





  4%|▍         | 3972/102000 [26:01<66:28:51,  2.44s/it]





  4%|▍         | 3973/102000 [26:03<66:30:07,  2.44s/it]





  4%|▍         | 3974/102000 [26:06<66:31:30,  2.44s/it]





  4%|▍         | 3975/102000 [26:08<66:21:31,  2.44s/it]





  4%|▍         | 3976/102000 [26:10<65:54:45,  2.42s/it]





  4%|▍         | 3977/102000 [26:13<65:46:01,  2.42s/it]





  4%|▍         | 3978/102000 [26:15<65:38:33,  2.41s/it]





  4%|▍         | 3979/102000 [26:18<65:38:50,  2.41s/it]





  4%|▍         | 3980/102000 [26:20<65:49:01,  2.42s/it]





  4%|▍         | 3981/102000 [26:23<66:10:19,  2.43s/it]





  4%|▍         | 3982/102000 [26:25<65:57:59,  2.42s/it]





  4%|▍         | 3983/102000 [26:27<66:05:21,  2.43s/it]





  4%|▍         | 3984/102000 [26:30<66:12:57,  2.43s/it]





  4%|▍         | 3985/102000 [26:32<66:06:31,  2.43s/it]





  4%|▍         | 3986/102000 [26:35<66:02:48,  2.43s/it]





  4%|▍         | 3987/102000 [26:37<66:08:33,  2.43s/it]





  4%|▍         | 3988/102000 [26:40<66:14:03,  2.43s/it]





  4%|▍         | 3989/102000 [26:42<66:01:22,  2.43s/it]





  4%|▍         | 3990/102000 [26:44<65:47:49,  2.42s/it]





  4%|▍         | 3991/102000 [26:47<65:44:53,  2.42s/it]





  4%|▍         | 3992/102000 [26:49<65:56:26,  2.42s/it]





  4%|▍         | 3993/102000 [26:52<66:03:54,  2.43s/it]





  4%|▍         | 3994/102000 [26:54<65:53:45,  2.42s/it]





  4%|▍         | 3995/102000 [26:57<65:46:51,  2.42s/it]





  4%|▍         | 3996/102000 [26:59<65:50:53,  2.42s/it]





  4%|▍         | 3997/102000 [27:01<65:52:52,  2.42s/it]





  4%|▍         | 3998/102000 [27:04<65:38:18,  2.41s/it]





  4%|▍         | 3999/102000 [27:06<65:56:06,  2.42s/it]





  4%|▍         | 4000/102000 [27:09<66:06:59,  2.43s/it]





  4%|▍         | 4001/102000 [27:11<65:54:28,  2.42s/it]





  4%|▍         | 4002/102000 [27:13<66:06:50,  2.43s/it]





  4%|▍         | 4003/102000 [27:16<65:56:01,  2.42s/it]





  4%|▍         | 4004/102000 [27:18<65:32:18,  2.41s/it]





  4%|▍         | 4005/102000 [27:21<65:50:21,  2.42s/it]





  4%|▍         | 4006/102000 [27:23<65:45:11,  2.42s/it]





  4%|▍         | 4007/102000 [27:26<65:57:16,  2.42s/it]





  4%|▍         | 4008/102000 [27:28<65:55:34,  2.42s/it]





  4%|▍         | 4009/102000 [27:30<65:51:04,  2.42s/it]





  4%|▍         | 4010/102000 [27:33<65:51:18,  2.42s/it]





  4%|▍         | 4011/102000 [27:35<65:44:05,  2.42s/it]





  4%|▍         | 4012/102000 [27:38<65:50:44,  2.42s/it]





  4%|▍         | 4013/102000 [27:40<65:54:56,  2.42s/it]





  4%|▍         | 4014/102000 [27:43<66:07:45,  2.43s/it]





  4%|▍         | 4015/102000 [27:45<66:00:25,  2.43s/it]





  4%|▍         | 4016/102000 [27:47<66:09:18,  2.43s/it]





  4%|▍         | 4017/102000 [27:50<66:01:44,  2.43s/it]





  4%|▍         | 4018/102000 [27:52<66:13:08,  2.43s/it]





  4%|▍         | 4019/102000 [27:55<66:02:18,  2.43s/it]





  4%|▍         | 4020/102000 [27:57<66:16:58,  2.44s/it]





  4%|▍         | 4021/102000 [28:00<66:15:49,  2.43s/it]





  4%|▍         | 4022/102000 [28:02<66:21:44,  2.44s/it]





  4%|▍         | 4023/102000 [28:04<66:29:00,  2.44s/it]





  4%|▍         | 4024/102000 [28:07<66:30:03,  2.44s/it]





  4%|▍         | 4025/102000 [28:09<66:26:38,  2.44s/it]





  4%|▍         | 4026/102000 [28:12<66:26:21,  2.44s/it]





  4%|▍         | 4027/102000 [28:14<66:27:37,  2.44s/it]





  4%|▍         | 4028/102000 [28:17<66:27:58,  2.44s/it]





  4%|▍         | 4029/102000 [28:19<66:22:45,  2.44s/it]





  4%|▍         | 4030/102000 [28:22<66:23:24,  2.44s/it]





  4%|▍         | 4031/102000 [28:24<66:19:48,  2.44s/it]





  4%|▍         | 4032/102000 [28:26<66:16:01,  2.44s/it]





  4%|▍         | 4033/102000 [28:29<65:57:51,  2.42s/it]





  4%|▍         | 4034/102000 [28:31<66:03:40,  2.43s/it]





  4%|▍         | 4035/102000 [28:34<65:58:25,  2.42s/it]





  4%|▍         | 4036/102000 [28:36<65:37:56,  2.41s/it]





  4%|▍         | 4037/102000 [28:38<65:44:32,  2.42s/it]





  4%|▍         | 4038/102000 [28:41<65:56:22,  2.42s/it]





  4%|▍         | 4039/102000 [28:43<65:40:49,  2.41s/it]





  4%|▍         | 4040/102000 [28:46<65:59:41,  2.43s/it]





  4%|▍         | 4041/102000 [28:48<65:47:06,  2.42s/it]





  4%|▍         | 4042/102000 [28:51<66:03:27,  2.43s/it]





  4%|▍         | 4043/102000 [28:53<66:16:39,  2.44s/it]





  4%|▍         | 4044/102000 [28:55<65:57:01,  2.42s/it]





  4%|▍         | 4045/102000 [28:58<65:58:24,  2.42s/it]





  4%|▍         | 4046/102000 [29:00<66:17:47,  2.44s/it]





  4%|▍         | 4047/102000 [29:03<66:08:08,  2.43s/it]





  4%|▍         | 4048/102000 [29:05<66:08:56,  2.43s/it]





  4%|▍         | 4049/102000 [29:08<66:20:53,  2.44s/it]





  4%|▍         | 4050/102000 [29:10<66:28:01,  2.44s/it]





  4%|▍         | 4051/102000 [29:13<66:31:44,  2.45s/it]





  4%|▍         | 4052/102000 [29:15<66:34:15,  2.45s/it]





  4%|▍         | 4053/102000 [29:17<66:38:06,  2.45s/it]





  4%|▍         | 4054/102000 [29:20<66:54:03,  2.46s/it]





  4%|▍         | 4055/102000 [29:22<66:52:23,  2.46s/it]





  4%|▍         | 4056/102000 [29:25<66:45:33,  2.45s/it]





  4%|▍         | 4057/102000 [29:27<66:45:37,  2.45s/it]





  4%|▍         | 4058/102000 [29:30<66:36:42,  2.45s/it]





  4%|▍         | 4059/102000 [29:32<66:36:57,  2.45s/it]





  4%|▍         | 4060/102000 [29:35<66:33:31,  2.45s/it]





  4%|▍         | 4061/102000 [29:37<66:38:04,  2.45s/it]





  4%|▍         | 4062/102000 [29:39<66:27:59,  2.44s/it]





  4%|▍         | 4063/102000 [29:42<66:27:46,  2.44s/it]





  4%|▍         | 4064/102000 [29:44<66:11:02,  2.43s/it]





  4%|▍         | 4065/102000 [29:47<66:21:07,  2.44s/it]





  4%|▍         | 4066/102000 [29:49<66:27:20,  2.44s/it]





  4%|▍         | 4067/102000 [29:52<66:22:37,  2.44s/it]





  4%|▍         | 4068/102000 [29:54<66:03:17,  2.43s/it]





  4%|▍         | 4069/102000 [29:57<66:07:41,  2.43s/it]





  4%|▍         | 4070/102000 [29:59<66:31:24,  2.45s/it]





  4%|▍         | 4071/102000 [30:01<66:36:56,  2.45s/it]





  4%|▍         | 4072/102000 [30:04<66:29:59,  2.44s/it]





  4%|▍         | 4073/102000 [30:06<66:07:52,  2.43s/it]





  4%|▍         | 4074/102000 [30:09<66:00:07,  2.43s/it]





  4%|▍         | 4075/102000 [30:11<66:12:02,  2.43s/it]





  4%|▍         | 4076/102000 [30:14<66:17:02,  2.44s/it]





  4%|▍         | 4077/102000 [30:16<66:24:35,  2.44s/it]





  4%|▍         | 4078/102000 [30:18<66:26:49,  2.44s/it]





  4%|▍         | 4079/102000 [30:21<66:38:41,  2.45s/it]





  4%|▍         | 4080/102000 [30:23<66:31:17,  2.45s/it]





  4%|▍         | 4081/102000 [30:26<66:17:13,  2.44s/it]





  4%|▍         | 4082/102000 [30:28<66:17:38,  2.44s/it]





  4%|▍         | 4083/102000 [30:31<66:07:13,  2.43s/it]





  4%|▍         | 4084/102000 [30:33<65:52:03,  2.42s/it]





  4%|▍         | 4085/102000 [30:35<65:58:52,  2.43s/it]





  4%|▍         | 4086/102000 [30:38<66:13:10,  2.43s/it]





  4%|▍         | 4087/102000 [30:40<66:20:33,  2.44s/it]





  4%|▍         | 4088/102000 [30:43<65:59:16,  2.43s/it]





  4%|▍         | 4089/102000 [30:45<66:07:56,  2.43s/it]





  4%|▍         | 4090/102000 [30:48<66:21:11,  2.44s/it]





  4%|▍         | 4091/102000 [30:50<66:03:58,  2.43s/it]





  4%|▍         | 4092/102000 [30:53<66:08:58,  2.43s/it]





  4%|▍         | 4093/102000 [30:55<66:04:38,  2.43s/it]





  4%|▍         | 4094/102000 [30:57<66:08:55,  2.43s/it]





  4%|▍         | 4095/102000 [31:00<66:04:47,  2.43s/it]





  4%|▍         | 4096/102000 [31:02<65:52:34,  2.42s/it]





  4%|▍         | 4097/102000 [31:05<65:53:29,  2.42s/it]





  4%|▍         | 4098/102000 [31:07<65:49:50,  2.42s/it]





  4%|▍         | 4099/102000 [31:10<66:12:38,  2.43s/it]





  4%|▍         | 4100/102000 [31:12<66:16:04,  2.44s/it]





  4%|▍         | 4101/102000 [31:14<66:23:52,  2.44s/it]





  4%|▍         | 4102/102000 [31:17<66:24:58,  2.44s/it]





  4%|▍         | 4103/102000 [31:19<66:29:25,  2.45s/it]





  4%|▍         | 4104/102000 [31:22<66:34:58,  2.45s/it]





  4%|▍         | 4105/102000 [31:24<66:46:32,  2.46s/it]





  4%|▍         | 4106/102000 [31:27<66:27:43,  2.44s/it]





  4%|▍         | 4107/102000 [31:29<65:53:36,  2.42s/it]





  4%|▍         | 4108/102000 [31:31<66:00:15,  2.43s/it]





  4%|▍         | 4109/102000 [31:34<65:52:18,  2.42s/it]





  4%|▍         | 4110/102000 [31:36<65:51:53,  2.42s/it]





  4%|▍         | 4111/102000 [31:39<65:48:09,  2.42s/it]





  4%|▍         | 4112/102000 [31:41<66:04:42,  2.43s/it]





  4%|▍         | 4113/102000 [31:44<66:10:11,  2.43s/it]





  4%|▍         | 4114/102000 [31:46<66:12:41,  2.44s/it]





  4%|▍         | 4115/102000 [31:49<66:18:20,  2.44s/it]





  4%|▍         | 4116/102000 [31:51<66:13:43,  2.44s/it]





  4%|▍         | 4117/102000 [31:53<66:01:51,  2.43s/it]





  4%|▍         | 4118/102000 [31:56<66:05:57,  2.43s/it]





  4%|▍         | 4119/102000 [31:58<66:01:05,  2.43s/it]





  4%|▍         | 4120/102000 [32:01<66:02:45,  2.43s/it]





  4%|▍         | 4121/102000 [32:03<66:10:52,  2.43s/it]





  4%|▍         | 4122/102000 [32:05<65:42:44,  2.42s/it]





  4%|▍         | 4123/102000 [32:08<65:53:57,  2.42s/it]





  4%|▍         | 4124/102000 [32:10<65:58:24,  2.43s/it]





  4%|▍         | 4125/102000 [32:13<66:00:18,  2.43s/it]





  4%|▍         | 4126/102000 [32:15<65:54:57,  2.42s/it]





  4%|▍         | 4127/102000 [32:18<65:38:07,  2.41s/it]





  4%|▍         | 4128/102000 [32:20<65:24:36,  2.41s/it]





  4%|▍         | 4129/102000 [32:22<65:29:40,  2.41s/it]





  4%|▍         | 4130/102000 [32:25<65:30:37,  2.41s/it]





  4%|▍         | 4131/102000 [32:27<65:09:40,  2.40s/it]





  4%|▍         | 4132/102000 [32:30<65:15:50,  2.40s/it]





  4%|▍         | 4133/102000 [32:32<65:22:41,  2.40s/it]





  4%|▍         | 4134/102000 [32:34<65:15:33,  2.40s/it]





  4%|▍         | 4135/102000 [32:37<65:29:45,  2.41s/it]





  4%|▍         | 4136/102000 [32:39<65:23:02,  2.41s/it]





  4%|▍         | 4137/102000 [32:42<65:37:55,  2.41s/it]





  4%|▍         | 4138/102000 [32:44<65:39:59,  2.42s/it]





  4%|▍         | 4139/102000 [32:47<65:54:42,  2.42s/it]





  4%|▍         | 4140/102000 [32:49<65:56:55,  2.43s/it]





  4%|▍         | 4141/102000 [32:51<66:02:53,  2.43s/it]





  4%|▍         | 4142/102000 [32:54<65:37:27,  2.41s/it]





  4%|▍         | 4143/102000 [32:56<65:36:17,  2.41s/it]





  4%|▍         | 4144/102000 [32:59<65:46:44,  2.42s/it]





  4%|▍         | 4145/102000 [33:01<65:53:12,  2.42s/it]





  4%|▍         | 4146/102000 [33:03<66:04:11,  2.43s/it]





  4%|▍         | 4147/102000 [33:06<66:06:23,  2.43s/it]





  4%|▍         | 4148/102000 [33:08<65:52:03,  2.42s/it]





  4%|▍         | 4149/102000 [33:11<65:51:44,  2.42s/it]





  4%|▍         | 4150/102000 [33:13<65:42:47,  2.42s/it]





  4%|▍         | 4151/102000 [33:16<65:38:06,  2.41s/it]





  4%|▍         | 4152/102000 [33:18<65:43:36,  2.42s/it]





  4%|▍         | 4153/102000 [33:20<65:52:52,  2.42s/it]





  4%|▍         | 4154/102000 [33:23<66:03:38,  2.43s/it]





  4%|▍         | 4155/102000 [33:25<66:04:59,  2.43s/it]





  4%|▍         | 4156/102000 [33:28<66:05:33,  2.43s/it]





  4%|▍         | 4157/102000 [33:30<65:50:38,  2.42s/it]





  4%|▍         | 4158/102000 [33:33<65:44:56,  2.42s/it]





  4%|▍         | 4159/102000 [33:35<65:49:38,  2.42s/it]





  4%|▍         | 4160/102000 [33:37<65:53:41,  2.42s/it]





  4%|▍         | 4161/102000 [33:40<66:02:53,  2.43s/it]





  4%|▍         | 4162/102000 [33:42<66:08:35,  2.43s/it]





  4%|▍         | 4163/102000 [33:45<66:04:58,  2.43s/it]





  4%|▍         | 4164/102000 [33:47<66:05:41,  2.43s/it]





  4%|▍         | 4165/102000 [33:50<66:05:22,  2.43s/it]





  4%|▍         | 4166/102000 [33:52<65:54:24,  2.43s/it]





  4%|▍         | 4167/102000 [33:54<65:51:24,  2.42s/it]





  4%|▍         | 4168/102000 [33:57<65:53:33,  2.42s/it]





  4%|▍         | 4169/102000 [33:59<65:51:08,  2.42s/it]





  4%|▍         | 4170/102000 [34:02<65:46:04,  2.42s/it]





  4%|▍         | 4171/102000 [34:04<65:49:27,  2.42s/it]





  4%|▍         | 4172/102000 [34:07<65:58:15,  2.43s/it]





  4%|▍         | 4173/102000 [34:09<66:02:34,  2.43s/it]





  4%|▍         | 4174/102000 [34:11<66:03:00,  2.43s/it]





  4%|▍         | 4175/102000 [34:14<65:59:06,  2.43s/it]





  4%|▍         | 4176/102000 [34:16<65:58:34,  2.43s/it]





  4%|▍         | 4177/102000 [34:19<66:04:35,  2.43s/it]





  4%|▍         | 4178/102000 [34:21<66:00:42,  2.43s/it]





  4%|▍         | 4179/102000 [34:24<66:02:30,  2.43s/it]





  4%|▍         | 4180/102000 [34:26<65:50:37,  2.42s/it]





  4%|▍         | 4181/102000 [34:28<65:30:18,  2.41s/it]





  4%|▍         | 4182/102000 [34:31<65:39:52,  2.42s/it]





  4%|▍         | 4183/102000 [34:33<65:34:37,  2.41s/it]





  4%|▍         | 4184/102000 [34:36<65:30:03,  2.41s/it]





  4%|▍         | 4185/102000 [34:38<65:34:51,  2.41s/it]





  4%|▍         | 4186/102000 [34:40<65:47:55,  2.42s/it]





  4%|▍         | 4187/102000 [34:43<65:52:19,  2.42s/it]





  4%|▍         | 4188/102000 [34:45<65:41:46,  2.42s/it]





  4%|▍         | 4189/102000 [34:48<65:41:12,  2.42s/it]





  4%|▍         | 4190/102000 [34:50<65:32:49,  2.41s/it]





  4%|▍         | 4191/102000 [34:53<65:34:51,  2.41s/it]





  4%|▍         | 4192/102000 [34:55<65:48:58,  2.42s/it]





  4%|▍         | 4193/102000 [34:57<65:54:55,  2.43s/it]





  4%|▍         | 4194/102000 [35:00<65:50:28,  2.42s/it]





  4%|▍         | 4195/102000 [35:02<65:44:17,  2.42s/it]





  4%|▍         | 4196/102000 [35:05<65:52:03,  2.42s/it]





  4%|▍         | 4197/102000 [35:07<65:54:25,  2.43s/it]





  4%|▍         | 4198/102000 [35:10<66:01:37,  2.43s/it]





  4%|▍         | 4199/102000 [35:12<66:06:13,  2.43s/it]





  4%|▍         | 4200/102000 [35:14<65:58:57,  2.43s/it]





  4%|▍         | 4201/102000 [35:17<66:10:29,  2.44s/it]





  4%|▍         | 4202/102000 [35:19<66:15:06,  2.44s/it]





  4%|▍         | 4203/102000 [35:22<66:00:35,  2.43s/it]





  4%|▍         | 4204/102000 [35:24<65:59:51,  2.43s/it]





  4%|▍         | 4205/102000 [35:27<66:09:56,  2.44s/it]





  4%|▍         | 4206/102000 [35:29<65:44:50,  2.42s/it]





  4%|▍         | 4207/102000 [35:31<65:35:41,  2.41s/it]





  4%|▍         | 4208/102000 [35:34<65:33:21,  2.41s/it]





  4%|▍         | 4209/102000 [35:36<65:38:25,  2.42s/it]





  4%|▍         | 4210/102000 [35:39<65:30:55,  2.41s/it]





  4%|▍         | 4211/102000 [35:41<65:28:29,  2.41s/it]





  4%|▍         | 4212/102000 [35:43<65:31:26,  2.41s/it]





  4%|▍         | 4213/102000 [35:46<65:39:26,  2.42s/it]





  4%|▍         | 4214/102000 [35:48<65:33:32,  2.41s/it]





  4%|▍         | 4215/102000 [35:51<65:07:30,  2.40s/it]





  4%|▍         | 4216/102000 [35:53<65:29:33,  2.41s/it]





  4%|▍         | 4217/102000 [35:55<65:43:01,  2.42s/it]





  4%|▍         | 4218/102000 [35:58<65:52:26,  2.43s/it]





  4%|▍         | 4219/102000 [36:00<65:41:03,  2.42s/it]





  4%|▍         | 4220/102000 [36:03<65:31:28,  2.41s/it]





  4%|▍         | 4221/102000 [36:05<65:18:38,  2.40s/it]





  4%|▍         | 4222/102000 [36:07<65:08:50,  2.40s/it]





  4%|▍         | 4223/102000 [36:10<65:15:23,  2.40s/it]





  4%|▍         | 4224/102000 [36:12<65:18:49,  2.40s/it]





  4%|▍         | 4225/102000 [36:15<64:57:14,  2.39s/it]





  4%|▍         | 4226/102000 [36:17<65:01:16,  2.39s/it]





  4%|▍         | 4227/102000 [36:19<65:06:13,  2.40s/it]





  4%|▍         | 4228/102000 [36:22<65:06:08,  2.40s/it]





  4%|▍         | 4229/102000 [36:24<65:14:54,  2.40s/it]





  4%|▍         | 4230/102000 [36:27<65:34:17,  2.41s/it]





  4%|▍         | 4231/102000 [36:29<65:47:44,  2.42s/it]





  4%|▍         | 4232/102000 [36:32<65:46:29,  2.42s/it]





  4%|▍         | 4233/102000 [36:34<66:03:14,  2.43s/it]





  4%|▍         | 4234/102000 [36:36<66:05:18,  2.43s/it]





  4%|▍         | 4235/102000 [36:39<65:59:21,  2.43s/it]





  4%|▍         | 4236/102000 [36:41<66:03:53,  2.43s/it]





  4%|▍         | 4237/102000 [36:44<66:07:16,  2.43s/it]





  4%|▍         | 4238/102000 [36:46<66:00:50,  2.43s/it]





  4%|▍         | 4239/102000 [36:49<65:48:29,  2.42s/it]





  4%|▍         | 4240/102000 [36:51<66:04:10,  2.43s/it]





  4%|▍         | 4241/102000 [36:53<65:56:16,  2.43s/it]





  4%|▍         | 4242/102000 [36:56<66:02:44,  2.43s/it]





  4%|▍         | 4243/102000 [36:58<66:14:48,  2.44s/it]





  4%|▍         | 4244/102000 [37:01<66:16:08,  2.44s/it]





  4%|▍         | 4245/102000 [37:03<66:16:25,  2.44s/it]





  4%|▍         | 4246/102000 [37:06<66:26:00,  2.45s/it]





  4%|▍         | 4247/102000 [37:08<66:21:10,  2.44s/it]





  4%|▍         | 4248/102000 [37:11<66:20:35,  2.44s/it]





  4%|▍         | 4249/102000 [37:13<66:24:43,  2.45s/it]





  4%|▍         | 4250/102000 [37:15<66:16:41,  2.44s/it]





  4%|▍         | 4251/102000 [37:18<65:44:55,  2.42s/it]





  4%|▍         | 4252/102000 [37:20<65:52:01,  2.43s/it]





  4%|▍         | 4253/102000 [37:23<65:42:05,  2.42s/it]





  4%|▍         | 4254/102000 [37:25<65:52:14,  2.43s/it]





  4%|▍         | 4255/102000 [37:28<65:56:40,  2.43s/it]





  4%|▍         | 4256/102000 [37:30<66:07:25,  2.44s/it]





  4%|▍         | 4257/102000 [37:32<66:00:52,  2.43s/it]





  4%|▍         | 4258/102000 [37:35<66:03:53,  2.43s/it]





  4%|▍         | 4259/102000 [37:37<66:08:43,  2.44s/it]





  4%|▍         | 4260/102000 [37:40<66:10:43,  2.44s/it]





  4%|▍         | 4261/102000 [37:42<66:03:34,  2.43s/it]





  4%|▍         | 4262/102000 [37:45<65:38:34,  2.42s/it]





  4%|▍         | 4263/102000 [37:47<65:44:51,  2.42s/it]





  4%|▍         | 4264/102000 [37:49<65:53:46,  2.43s/it]





  4%|▍         | 4265/102000 [37:52<65:58:32,  2.43s/it]





  4%|▍         | 4266/102000 [37:54<65:43:12,  2.42s/it]





  4%|▍         | 4267/102000 [37:57<65:49:11,  2.42s/it]





  4%|▍         | 4268/102000 [37:59<66:10:34,  2.44s/it]





  4%|▍         | 4269/102000 [38:02<66:09:57,  2.44s/it]





  4%|▍         | 4270/102000 [38:04<66:07:52,  2.44s/it]





  4%|▍         | 4271/102000 [38:06<65:57:07,  2.43s/it]





  4%|▍         | 4272/102000 [38:09<65:40:42,  2.42s/it]





  4%|▍         | 4273/102000 [38:11<65:33:52,  2.42s/it]





  4%|▍         | 4274/102000 [38:14<65:44:31,  2.42s/it]





  4%|▍         | 4275/102000 [38:16<65:38:14,  2.42s/it]





  4%|▍         | 4276/102000 [38:19<65:43:11,  2.42s/it]





  4%|▍         | 4277/102000 [38:21<65:47:10,  2.42s/it]





  4%|▍         | 4278/102000 [38:23<65:55:18,  2.43s/it]





  4%|▍         | 4279/102000 [38:26<65:31:28,  2.41s/it]





  4%|▍         | 4280/102000 [38:28<65:46:56,  2.42s/it]





  4%|▍         | 4281/102000 [38:31<65:42:44,  2.42s/it]





  4%|▍         | 4282/102000 [38:33<65:47:22,  2.42s/it]





  4%|▍         | 4283/102000 [38:35<65:31:16,  2.41s/it]





  4%|▍         | 4284/102000 [38:38<65:35:30,  2.42s/it]





  4%|▍         | 4285/102000 [38:40<65:25:43,  2.41s/it]





  4%|▍         | 4286/102000 [38:43<65:35:07,  2.42s/it]





  4%|▍         | 4287/102000 [38:45<65:30:47,  2.41s/it]





  4%|▍         | 4288/102000 [38:48<65:22:08,  2.41s/it]





  4%|▍         | 4289/102000 [38:50<65:30:35,  2.41s/it]





  4%|▍         | 4290/102000 [38:52<65:22:46,  2.41s/it]





  4%|▍         | 4291/102000 [38:55<65:37:50,  2.42s/it]





  4%|▍         | 4292/102000 [38:57<65:29:20,  2.41s/it]





  4%|▍         | 4293/102000 [39:00<65:29:56,  2.41s/it]





  4%|▍         | 4294/102000 [39:02<65:31:40,  2.41s/it]





  4%|▍         | 4295/102000 [39:04<65:17:50,  2.41s/it]





  4%|▍         | 4296/102000 [39:07<65:16:04,  2.40s/it]





  4%|▍         | 4297/102000 [39:09<65:18:27,  2.41s/it]





  4%|▍         | 4298/102000 [39:12<65:32:32,  2.42s/it]





  4%|▍         | 4299/102000 [39:14<65:36:26,  2.42s/it]





  4%|▍         | 4300/102000 [39:16<65:30:41,  2.41s/it]





  4%|▍         | 4301/102000 [39:19<65:20:42,  2.41s/it]





  4%|▍         | 4302/102000 [39:21<65:26:48,  2.41s/it]





  4%|▍         | 4303/102000 [39:24<65:41:36,  2.42s/it]





  4%|▍         | 4304/102000 [39:26<65:50:36,  2.43s/it]





  4%|▍         | 4305/102000 [39:29<66:02:06,  2.43s/it]





  4%|▍         | 4306/102000 [39:31<65:53:11,  2.43s/it]





  4%|▍         | 4307/102000 [39:33<65:26:07,  2.41s/it]





  4%|▍         | 4308/102000 [39:36<65:21:45,  2.41s/it]





  4%|▍         | 4309/102000 [39:38<65:38:41,  2.42s/it]





  4%|▍         | 4310/102000 [39:41<65:37:58,  2.42s/it]





  4%|▍         | 4311/102000 [39:43<65:45:24,  2.42s/it]





  4%|▍         | 4312/102000 [39:46<65:49:55,  2.43s/it]





  4%|▍         | 4313/102000 [39:48<65:37:22,  2.42s/it]





  4%|▍         | 4314/102000 [39:50<65:30:33,  2.41s/it]





  4%|▍         | 4315/102000 [39:53<65:41:59,  2.42s/it]





  4%|▍         | 4316/102000 [39:55<65:48:19,  2.43s/it]





  4%|▍         | 4317/102000 [39:58<65:58:33,  2.43s/it]





  4%|▍         | 4318/102000 [40:00<65:55:39,  2.43s/it]





  4%|▍         | 4319/102000 [40:03<65:46:44,  2.42s/it]





  4%|▍         | 4320/102000 [40:05<65:52:12,  2.43s/it]





  4%|▍         | 4321/102000 [40:07<65:55:59,  2.43s/it]





  4%|▍         | 4322/102000 [40:10<65:47:13,  2.42s/it]





  4%|▍         | 4323/102000 [40:12<65:57:57,  2.43s/it]





  4%|▍         | 4324/102000 [40:15<65:53:55,  2.43s/it]





  4%|▍         | 4325/102000 [40:17<65:42:08,  2.42s/it]





  4%|▍         | 4326/102000 [40:19<65:33:23,  2.42s/it]





  4%|▍         | 4327/102000 [40:22<65:27:38,  2.41s/it]





  4%|▍         | 4328/102000 [40:24<65:34:55,  2.42s/it]





  4%|▍         | 4329/102000 [40:27<65:51:17,  2.43s/it]





  4%|▍         | 4330/102000 [40:29<65:49:02,  2.43s/it]





  4%|▍         | 4331/102000 [40:32<65:56:06,  2.43s/it]





  4%|▍         | 4332/102000 [40:34<66:00:36,  2.43s/it]





  4%|▍         | 4333/102000 [40:36<66:01:34,  2.43s/it]





  4%|▍         | 4334/102000 [40:39<65:35:27,  2.42s/it]





  4%|▍         | 4335/102000 [40:41<65:44:29,  2.42s/it]





  4%|▍         | 4336/102000 [40:44<65:34:02,  2.42s/it]





  4%|▍         | 4337/102000 [40:46<65:42:29,  2.42s/it]





  4%|▍         | 4338/102000 [40:49<65:43:35,  2.42s/it]





  4%|▍         | 4339/102000 [40:51<65:48:47,  2.43s/it]





  4%|▍         | 4340/102000 [40:53<65:40:05,  2.42s/it]





  4%|▍         | 4341/102000 [40:56<65:43:31,  2.42s/it]





  4%|▍         | 4342/102000 [40:58<65:49:59,  2.43s/it]





  4%|▍         | 4343/102000 [41:01<65:51:49,  2.43s/it]





  4%|▍         | 4344/102000 [41:03<65:46:39,  2.42s/it]





  4%|▍         | 4345/102000 [41:06<65:55:43,  2.43s/it]





  4%|▍         | 4346/102000 [41:08<65:35:03,  2.42s/it]





  4%|▍         | 4347/102000 [41:10<65:35:22,  2.42s/it]





  4%|▍         | 4348/102000 [41:13<65:37:09,  2.42s/it]





  4%|▍         | 4349/102000 [41:15<65:42:43,  2.42s/it]





  4%|▍         | 4350/102000 [41:18<65:51:19,  2.43s/it]





  4%|▍         | 4351/102000 [41:20<65:55:53,  2.43s/it]





  4%|▍         | 4352/102000 [41:23<66:03:04,  2.44s/it]





  4%|▍         | 4353/102000 [41:25<65:57:18,  2.43s/it]





  4%|▍         | 4354/102000 [41:27<65:56:21,  2.43s/it]





  4%|▍         | 4355/102000 [41:30<65:59:19,  2.43s/it]





  4%|▍         | 4356/102000 [41:32<65:56:25,  2.43s/it]





  4%|▍         | 4357/102000 [41:35<65:58:46,  2.43s/it]





  4%|▍         | 4358/102000 [41:37<66:00:41,  2.43s/it]





  4%|▍         | 4359/102000 [41:40<65:55:56,  2.43s/it]





  4%|▍         | 4360/102000 [41:42<65:58:47,  2.43s/it]





  4%|▍         | 4361/102000 [41:44<65:53:31,  2.43s/it]





  4%|▍         | 4362/102000 [41:47<66:11:34,  2.44s/it]





  4%|▍         | 4363/102000 [41:49<65:54:58,  2.43s/it]





  4%|▍         | 4364/102000 [41:52<65:43:04,  2.42s/it]





  4%|▍         | 4365/102000 [41:54<65:44:29,  2.42s/it]





  4%|▍         | 4366/102000 [41:57<65:49:27,  2.43s/it]





  4%|▍         | 4367/102000 [41:59<65:59:17,  2.43s/it]





  4%|▍         | 4368/102000 [42:01<65:53:57,  2.43s/it]





  4%|▍         | 4369/102000 [42:04<65:41:53,  2.42s/it]





  4%|▍         | 4370/102000 [42:06<65:41:33,  2.42s/it]





  4%|▍         | 4371/102000 [42:09<65:26:49,  2.41s/it]





  4%|▍         | 4372/102000 [42:11<65:32:29,  2.42s/it]





  4%|▍         | 4373/102000 [42:13<65:29:47,  2.42s/it]





  4%|▍         | 4374/102000 [42:16<65:36:29,  2.42s/it]





  4%|▍         | 4375/102000 [42:18<65:45:16,  2.42s/it]





  4%|▍         | 4376/102000 [42:21<65:42:02,  2.42s/it]





  4%|▍         | 4377/102000 [42:23<65:46:50,  2.43s/it]





  4%|▍         | 4378/102000 [42:26<65:18:40,  2.41s/it]





  4%|▍         | 4379/102000 [42:28<65:22:41,  2.41s/it]





  4%|▍         | 4380/102000 [42:30<65:25:38,  2.41s/it]





  4%|▍         | 4381/102000 [42:33<65:30:39,  2.42s/it]





  4%|▍         | 4382/102000 [42:35<65:38:20,  2.42s/it]





  4%|▍         | 4383/102000 [42:38<65:55:11,  2.43s/it]





  4%|▍         | 4384/102000 [42:40<65:16:44,  2.41s/it]





  4%|▍         | 4385/102000 [42:42<65:14:05,  2.41s/it]





  4%|▍         | 4386/102000 [42:45<65:27:10,  2.41s/it]





  4%|▍         | 4387/102000 [42:47<65:38:04,  2.42s/it]





  4%|▍         | 4388/102000 [42:50<65:54:01,  2.43s/it]





  4%|▍         | 4389/102000 [42:52<65:43:00,  2.42s/it]





  4%|▍         | 4390/102000 [42:55<65:53:29,  2.43s/it]





  4%|▍         | 4391/102000 [42:57<65:44:50,  2.42s/it]





  4%|▍         | 4392/102000 [42:59<65:47:15,  2.43s/it]





  4%|▍         | 4393/102000 [43:02<65:38:05,  2.42s/it]





  4%|▍         | 4394/102000 [43:04<65:28:05,  2.41s/it]





  4%|▍         | 4395/102000 [43:07<65:35:55,  2.42s/it]





  4%|▍         | 4396/102000 [43:09<65:08:53,  2.40s/it]





  4%|▍         | 4397/102000 [43:12<65:29:36,  2.42s/it]





  4%|▍         | 4398/102000 [43:14<65:41:12,  2.42s/it]





  4%|▍         | 4399/102000 [43:16<65:43:42,  2.42s/it]





  4%|▍         | 4400/102000 [43:19<65:41:53,  2.42s/it]





  4%|▍         | 4401/102000 [43:21<65:31:17,  2.42s/it]





  4%|▍         | 4402/102000 [43:24<65:30:02,  2.42s/it]





  4%|▍         | 4403/102000 [43:26<65:45:41,  2.43s/it]





  4%|▍         | 4404/102000 [43:29<65:37:13,  2.42s/it]





  4%|▍         | 4405/102000 [43:31<65:39:47,  2.42s/it]





  4%|▍         | 4406/102000 [43:33<65:35:47,  2.42s/it]





  4%|▍         | 4407/102000 [43:36<65:24:02,  2.41s/it]





  4%|▍         | 4408/102000 [43:38<65:21:59,  2.41s/it]





  4%|▍         | 4409/102000 [43:41<65:07:44,  2.40s/it]





  4%|▍         | 4410/102000 [43:43<65:28:01,  2.42s/it]





  4%|▍         | 4411/102000 [43:45<65:25:56,  2.41s/it]





  4%|▍         | 4412/102000 [43:48<65:32:13,  2.42s/it]





  4%|▍         | 4413/102000 [43:50<65:37:47,  2.42s/it]





  4%|▍         | 4414/102000 [43:53<65:39:26,  2.42s/it]





  4%|▍         | 4415/102000 [43:55<65:41:19,  2.42s/it]





  4%|▍         | 4416/102000 [43:58<65:36:55,  2.42s/it]





  4%|▍         | 4417/102000 [44:00<65:36:48,  2.42s/it]





  4%|▍         | 4418/102000 [44:02<65:30:57,  2.42s/it]





  4%|▍         | 4419/102000 [44:05<65:27:35,  2.41s/it]





  4%|▍         | 4420/102000 [44:07<65:19:12,  2.41s/it]





  4%|▍         | 4421/102000 [44:10<65:30:32,  2.42s/it]





  4%|▍         | 4422/102000 [44:12<65:21:33,  2.41s/it]





  4%|▍         | 4423/102000 [44:14<65:23:06,  2.41s/it]





  4%|▍         | 4424/102000 [44:17<65:31:46,  2.42s/it]





  4%|▍         | 4425/102000 [44:19<65:21:15,  2.41s/it]





  4%|▍         | 4426/102000 [44:22<65:17:46,  2.41s/it]





  4%|▍         | 4427/102000 [44:24<65:21:24,  2.41s/it]





  4%|▍         | 4428/102000 [44:26<65:37:31,  2.42s/it]





  4%|▍         | 4429/102000 [44:29<65:37:56,  2.42s/it]





  4%|▍         | 4430/102000 [44:31<65:33:42,  2.42s/it]





  4%|▍         | 4431/102000 [44:34<65:44:48,  2.43s/it]





  4%|▍         | 4432/102000 [44:36<65:56:05,  2.43s/it]





  4%|▍         | 4433/102000 [44:39<65:51:59,  2.43s/it]





  4%|▍         | 4434/102000 [44:41<65:54:56,  2.43s/it]





  4%|▍         | 4435/102000 [44:44<66:01:48,  2.44s/it]





  4%|▍         | 4436/102000 [44:46<66:06:40,  2.44s/it]





  4%|▍         | 4437/102000 [44:48<65:53:24,  2.43s/it]





  4%|▍         | 4438/102000 [44:51<65:58:08,  2.43s/it]





  4%|▍         | 4439/102000 [44:53<66:00:53,  2.44s/it]





  4%|▍         | 4440/102000 [44:56<65:52:55,  2.43s/it]





  4%|▍         | 4441/102000 [44:58<65:42:56,  2.42s/it]





  4%|▍         | 4442/102000 [45:00<65:29:12,  2.42s/it]





  4%|▍         | 4443/102000 [45:03<65:48:23,  2.43s/it]





  4%|▍         | 4444/102000 [45:05<65:56:52,  2.43s/it]





  4%|▍         | 4445/102000 [45:08<65:55:38,  2.43s/it]





  4%|▍         | 4446/102000 [45:10<66:00:37,  2.44s/it]





  4%|▍         | 4447/102000 [45:13<65:55:16,  2.43s/it]





  4%|▍         | 4448/102000 [45:15<65:30:22,  2.42s/it]





  4%|▍         | 4449/102000 [45:17<65:37:42,  2.42s/it]





  4%|▍         | 4450/102000 [45:20<65:35:00,  2.42s/it]





  4%|▍         | 4451/102000 [45:22<65:46:29,  2.43s/it]





  4%|▍         | 4452/102000 [45:25<65:54:54,  2.43s/it]





  4%|▍         | 4453/102000 [45:27<65:51:13,  2.43s/it]





  4%|▍         | 4454/102000 [45:30<65:49:18,  2.43s/it]





  4%|▍         | 4455/102000 [45:32<65:46:17,  2.43s/it]





  4%|▍         | 4456/102000 [45:35<65:44:47,  2.43s/it]





  4%|▍         | 4457/102000 [45:37<65:42:35,  2.43s/it]





  4%|▍         | 4458/102000 [45:39<65:48:06,  2.43s/it]





  4%|▍         | 4459/102000 [45:42<65:48:02,  2.43s/it]





  4%|▍         | 4460/102000 [45:44<65:49:27,  2.43s/it]





  4%|▍         | 4461/102000 [45:47<65:39:17,  2.42s/it]





  4%|▍         | 4462/102000 [45:49<65:40:33,  2.42s/it]





  4%|▍         | 4463/102000 [45:51<65:26:15,  2.42s/it]





  4%|▍         | 4464/102000 [45:54<65:36:15,  2.42s/it]





  4%|▍         | 4465/102000 [45:56<65:37:23,  2.42s/it]





  4%|▍         | 4466/102000 [45:59<65:44:32,  2.43s/it]





  4%|▍         | 4467/102000 [46:01<65:44:19,  2.43s/it]





  4%|▍         | 4468/102000 [46:04<65:46:45,  2.43s/it]





  4%|▍         | 4469/102000 [46:06<65:52:29,  2.43s/it]





  4%|▍         | 4470/102000 [46:08<65:39:21,  2.42s/it]





  4%|▍         | 4471/102000 [46:11<65:43:23,  2.43s/it]





  4%|▍         | 4472/102000 [46:13<65:46:51,  2.43s/it]





  4%|▍         | 4473/102000 [46:16<65:54:54,  2.43s/it]





  4%|▍         | 4474/102000 [46:18<65:49:29,  2.43s/it]





  4%|▍         | 4475/102000 [46:21<65:33:52,  2.42s/it]





  4%|▍         | 4476/102000 [46:23<65:21:24,  2.41s/it]





  4%|▍         | 4477/102000 [46:25<65:17:26,  2.41s/it]





  4%|▍         | 4478/102000 [46:28<65:04:54,  2.40s/it]





  4%|▍         | 4479/102000 [46:30<64:47:19,  2.39s/it]





  4%|▍         | 4480/102000 [46:33<64:57:19,  2.40s/it]





  4%|▍         | 4481/102000 [46:35<65:08:57,  2.41s/it]





  4%|▍         | 4482/102000 [46:37<65:05:49,  2.40s/it]





  4%|▍         | 4483/102000 [46:40<64:48:48,  2.39s/it]





  4%|▍         | 4484/102000 [46:42<64:48:32,  2.39s/it]





  4%|▍         | 4485/102000 [46:44<64:31:42,  2.38s/it]





  4%|▍         | 4486/102000 [46:47<64:57:38,  2.40s/it]





  4%|▍         | 4487/102000 [46:49<65:29:57,  2.42s/it]





  4%|▍         | 4488/102000 [46:52<65:38:30,  2.42s/it]





  4%|▍         | 4489/102000 [46:54<65:27:22,  2.42s/it]





  4%|▍         | 4490/102000 [46:57<65:06:53,  2.40s/it]





  4%|▍         | 4491/102000 [46:59<65:01:12,  2.40s/it]





  4%|▍         | 4492/102000 [47:01<64:51:15,  2.39s/it]





  4%|▍         | 4493/102000 [47:04<65:01:27,  2.40s/it]





  4%|▍         | 4494/102000 [47:06<64:58:35,  2.40s/it]





  4%|▍         | 4495/102000 [47:09<65:10:12,  2.41s/it]





  4%|▍         | 4496/102000 [47:11<65:12:50,  2.41s/it]





  4%|▍         | 4497/102000 [47:13<64:31:12,  2.38s/it]





  4%|▍         | 4498/102000 [47:16<64:04:29,  2.37s/it]





  4%|▍         | 4499/102000 [47:18<63:58:23,  2.36s/it]





  4%|▍         | 4500/102000 [47:20<63:27:19,  2.34s/it]





  4%|▍         | 4501/102000 [47:23<63:34:04,  2.35s/it]





  4%|▍         | 4502/102000 [47:25<64:04:03,  2.37s/it]





  4%|▍         | 4503/102000 [47:27<64:10:52,  2.37s/it]





KeyboardInterrupt



In [None]:
load_to_eval=False

if load_to_eval:
    model.load_state_dict(torch.load(f'checkpoints/{env_name}_diff.pth')['model_state_dict'])
    model_target.load_state_dict(torch.load(f'checkpoints/{env_name}_diff.pth')['model_target_state_dict'])

In [None]:
import matplotlib.pyplot as plt

num_envs=1

#env = gym.vector.make(f"{env_name}NoFrameskip-v4", num_envs=num_envs, render_mode='human')
env = gym.vector.make(f"{env_name}NoFrameskip-v4", num_envs=num_envs)
env = MaxLast2FrameSkipWrapper(env,seed=SEED)

def eval_phase(eval_runs=50, max_eval_steps=27000, num_envs=1):
    progress_bar = tqdm.tqdm(total=eval_runs)
    
    scores=[]
    
    state, info = env.reset()
    state = preprocess(state)
    print(f"init state {state.shape}")
    
    states = deque(maxlen=4)
    for i in range(4):
        states.append(state)

    actions_to_step = []
    
    eps_reward=torch.tensor([0]*num_envs, dtype=torch.float)
    
    reward=np.array([0]*num_envs)
    terminated=np.array([False]*num_envs)
    
    last_lives=np.array([0]*num_envs)
    life_loss=np.array([0]*num_envs)
    resetted=np.array([0])

    finished_envs=np.array([False]*num_envs)
    done_flag=0
    last_grad_update=0
    eval_run=0
    step=np.array([0]*num_envs)
    while eval_run<eval_runs:
        #seed_np_torch(SEED+eval_run)
        env.seed=SEED+eval_run
        model_target.train()
        
        #if resetted[0]>0:
        #    states = env.noop_steps(states)
        
        if len(actions_to_step)==0:
            actions_to_step = epsilon_greedy(torch.cat(list(states),-3).unsqueeze(0), len_memory, grad_step=35000, actions_to_step=actions_to_step).cpu()
        action = actions_to_step.pop(0).squeeze()
        
        state, reward, terminated, truncated, info = env.step([action.numpy()] if num_envs==1 else action.numpy())
        state = preprocess(state)
        states.append(state)
        
        eps_reward+=reward

        
        done_flag = np.logical_or(terminated, truncated)
        lives = info['lives']
        life_loss = (last_lives-lives).clip(min=0)
        resetted = (lives-last_lives).clip(min=0)
        last_lives = lives        
        
        step+=1
        
        log_t = done_flag.astype(float).nonzero()[0]
        if len(log_t)>0:# or (step>max_eval_steps).any():
            progress_bar.update(1)
            for log in log_t:
                #wandb.log({'eval_eps_reward': eps_reward[log].sum()})
                if finished_envs[log]==False:
                    scores.append(eps_reward[log].clone())
                    eval_run+=1
                    #finished_envs[log]=True
                step[log]=0
                
            eps_reward[log_t]=0            
            for i, log in enumerate(step>max_eval_steps):
                if log==True and finished_envs[i]==False:
                    scores.append(eps_reward[i].clone())
                    step[i]=0
                    eval_run+=1
                    eps_reward[i]=0
                    #finished_envs[i]=True
            
    return scores



def eval(eval_runs=50, max_eval_steps=27000, num_envs=1):
    assert num_envs==1, 'The code for num eval envs > 1 is messed up.'
    
    scores = eval_phase(eval_runs, max_eval_steps, num_envs)    
    scores = torch.stack(scores)
    scores, _ = scores.sort()
    
    _25th = eval_runs//4

    iq = scores[_25th:-_25th]
    iqm = iq.mean()
    iqs = iq.std()

    print(f"Scores Mean {scores.mean()}")
    print(f"Inter Quantile Mean {iqm}")
    print(f"Inter Quantile STD {iqs}")

    
    plt.xlabel('Episode (Sorted by Reward)')
    plt.ylabel('Reward')
    plt.plot(scores)
    
    new_row = {'env_name': env_name, 'mean': scores.mean().item(), 'iqm': iqm.item(), 'std': iqs.item(), 'seed': SEED}
    add_to_csv('results.csv', new_row)

    with open(f'results/{env_name}-{SEED}.txt', 'w') as f:
        f.write(f" Scores Mean {scores.mean()}\n Inter Quantile Mean {iqm}\n Inter Quantile STD {iqs}")
    
    
    return scores

scores = eval(eval_runs=100, num_envs=1)

In [None]:
'''
import pandas as pd
new_row = {'env_name': "Amidar", 'mean': 11.0, 'iqm': 11.0, 'std': 11.0, 'seed': 000}

df = pd.read_csv('results.csv',sep=',')
df.loc[len(df.index)] = new_row    
#df.to_csv('results.csv', index=False)

df
'''
# Add to csv suddenly stopped working