In [1]:
import os
import sys
import gymnasium as gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.distributions.normal import Normal
from tqdm import tqdm
from collections import deque

import time
import psutil
import datetime
import subprocess
# import torch
import torchvision
from tensorboard import program
import webbrowser
import torchvision.transforms as transforms
from torch.utils.tensorboard import SummaryWriter

RED = "\033[31m"
GREEN = "\033[32m"
YELLOW = "\033[33m"
BLUE = "\033[34m"
MAGENTA = "\033[35m"
CYAN = "\033[36m"
RESET = "\033[0m"

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#learning rate backward propagation NN action
lr_actor = 0.0003
#learning rate backward propagation NN state value estimation
lr_critic = 0.0003
#Number of Learning Iteration we want to perform
Iter = 100000
#Number max of step to realise in one episode. 
MAX_STEP = 1000
#How rewards are discounted.
gamma =0.98
#How do we stabilize variance in the return computation.
lambd = 0.95
#batch to train on
batch_size = 64
# Do we want high change to be taken into account.
epsilon = 0.2
#weight decay coefficient in ADAM for state value optim.
l2_rate = 0.001

save_freq = 100

save_flag = False

[33m[MODEL/TENSORBOARD][0m The data will be saved in [33m../runs/20240712_04-24-57/[0m directory!


In [2]:
# Actor class: Used to choose actions of a continuous action space.

class Actor(nn.Module):
    def __init__(self, N_S, N_A, chkpt_dir):
      # Initialize NN structure.
        super(Actor,self).__init__()
        self.fc1 = nn.Linear(N_S,64)
        self.fc2 = nn.Linear(64,64)
        self.sigma = nn.Linear(64,N_A)
        self.mu = nn.Linear(64,N_A)
        self.mu.weight.data.mul_(0.1)
        self.mu.bias.data.mul_(0.0)
        # This approach use gaussian distribution to decide actions. Could be
        # something else.
        self.distribution = torch.distributions.Normal
        
        self.checkpoint_dir = chkpt_dir
        self.checkpoint_file = os.path.join(self.checkpoint_dir, '_actor')
        
        self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
        self.to(self.device)

    def set_init(self,layers):
      # Initialize weight and bias according to a normal distrib mean 0 and sd 0.1.
        for layer in layers:
            nn.init.normal_(layer.weight,mean=0.,std=0.1)
            nn.init.constant_(layer.bias,0.)

    def forward(self,s):
      # Use of tanh activation function is recommanded : bounded [-1,1],
      # gives some non-linearity, and tends to give some stability.
        x = torch.tanh(self.fc1(s))
        x = torch.tanh(self.fc2(x))
        # mu action output of the NN.
        mu = self.mu(x)
        #log_sigma action output of the NN
        log_sigma = self.sigma(x)
        sigma = torch.exp(log_sigma)
        return mu,sigma

    def choose_action(self,s):
      # Choose action in the continuous action space using normal distribution
      # defined by mu and sigma of each actions returned by the NN.
        s = torch.from_numpy(np.array(s).astype(np.float32)).unsqueeze(0).to(self.device)
        mu,sigma = self.forward(s)
        Pi = self.distribution(mu,sigma)
        return Pi.sample().cpu().numpy().squeeze(0)
    
    def save_model(self):
        torch.save(self.state_dict(), self.checkpoint_file)
        
    def load_model(self):
        self.load_state_dict(torch.load(self.checkpoint_file))

In [3]:
# Critic class : Used to estimate V(state) the state value function through a NN.
class Critic(nn.Module):
    def __init__(self, N_S, chkpt_dir):
      # Initialize NN structure.
        super(Critic,self).__init__()
        self.fc1 = nn.Linear(N_S,64)
        self.fc2 = nn.Linear(64,64)
        self.fc3 = nn.Linear(64,1)
        self.fc3.weight.data.mul_(0.1) # 초기 weight에 0.1을 곱해주면서 학습을 더 안정적으로 할 수 있도록(tanh, sigmoid를 사용할 경우 많이 쓰는 방식)
        self.fc3.bias.data.mul_(0.0) # bias tensor의 모든 원소를 0으로 설정
        
        self.checkpoint_dir = chkpt_dir
        self.checkpoint_file = os.path.join(self.checkpoint_dir, '_critic')
        
        self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
        self.to(self.device)

    def set_init(self,layers):
      # Initialize weight and bias according to a normal distrib mean 0 and sd 0.1.
        for layer in layers:
            nn.init.normal_(layer.weight,mean=0.,std=0.1)
            nn.init.constant_(layer.bias,0.)

    def forward(self,s):
      # Use of tanh activation function is recommanded.
        x = torch.tanh(self.fc1(s))
        x = torch.tanh(self.fc2(x))
        values = self.fc3(x)
        return values
    
    def save_model(self):
        torch.save(self.state_dict(), self.checkpoint_file)
        
    def load_model(self):
        self.load_state_dict(torch.load(self.checkpoint_file))

In [4]:
class PPO:
    def __init__(self, N_S, N_A, log_dir):
        self.log_dir = log_dir
        
        self.actor_net = Actor(N_S, N_A, log_dir)
        self.critic_net = Critic(N_S, log_dir)
        self.actor_optim = optim.Adam(self.actor_net.parameters(), lr=1e-4)
        self.critic_optim = optim.Adam(self.critic_net.parameters(), lr=1e-3, weight_decay=1e-3)
        self.critic_loss_func = torch.nn.MSELoss()
        
        self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    def train(self, memory):
        states, actions, rewards, masks = [], [], [], []
        
        for m in memory:
            states.append(m[0])
            actions.append(m[1])
            rewards.append(m[2])
            masks.append(m[3])
        
        states = torch.tensor(np.array(states), dtype=torch.float32).to(self.device)
        actions = torch.tensor(np.array(actions), dtype=torch.float32).to(self.device)
        rewards = torch.tensor(np.array(rewards), dtype=torch.float32).to(self.device)
        masks = torch.tensor(np.array(masks), dtype=torch.float32).to(self.device)

        # Use critic network defined in Model.py
        # This function enables to get the current state value V(S).
        values = self.critic_net(states)
        # Get advantage.
        returns,advants = self.get_gae(rewards,masks,values)
        #Get old mu and std.
        old_mu,old_std = self.actor_net(states)
        #Get the old distribution.
        pi = self.actor_net.distribution(old_mu,old_std)
        #Compute old policy.
        old_log_prob = pi.log_prob(actions).sum(1,keepdim=True)

        # Everything happens here
        n = len(states)
        arr = np.arange(n)
        for epoch in range(1):
            np.random.shuffle(arr)
            for i in range(n//batch_size):
                b_index = arr[batch_size*i:batch_size*(i+1)]
                b_states = states[b_index]
                b_advants = advants[b_index].unsqueeze(1)
                b_actions = actions[b_index]
                b_returns = returns[b_index].unsqueeze(1)

                #New parameter of the policy distribution by action.
                mu,std = self.actor_net(b_states)
                pi = self.actor_net.distribution(mu,std)
                new_prob = pi.log_prob(b_actions).sum(1,keepdim=True)
                old_prob = old_log_prob[b_index].detach()
                #Regularisation fixed KL : does not work as good as following clipping strategy
                # empirically.
                # KL_penalty = self.kl_divergence(old_mu[b_index],old_std[b_index],mu,std)
                ratio = torch.exp(new_prob-old_prob)

                surrogate_loss = ratio*b_advants
                values = self.critic_net(b_states)
                # MSE Loss : (State action value - State value)^2
                critic_loss = self.critic_loss_func(values,b_returns)
                # critic_loss = critic_loss - beta*KL_penalty

                self.critic_optim.zero_grad()
                critic_loss.backward()
                self.critic_optim.step()
                #Clipping strategy
                ratio = torch.clamp(ratio,1.0-epsilon,1.0+epsilon)
                clipped_loss =ratio*b_advants
                # Actual loss
                actor_loss = -torch.min(surrogate_loss,clipped_loss).mean()
                
                walker_xvel = torch.tensor([get_walker_x_velocity(state) for state in b_states], dtype=torch.float32).to(self.device)
                actor_loss = augmented_objective(actor_loss, walker_xvel, 3, 20)

                #Now that we have the loss, we can do the backward propagation to learn : everything is here.
                self.actor_optim.zero_grad()
                actor_loss.backward()
                self.actor_optim.step()
                
    # Get the Kullback - Leibler divergence: Measure of the diff btwn new and old policy:
    # Could be used for the objective function depending on the strategy that needs to be
    # teste.
    def kl_divergence(self,old_mu,old_sigma,mu,sigma):

        old_mu = old_mu.detach()
        old_sigma = old_sigma.detach()

        kl = torch.log(old_sigma) - torch.log(sigma) + (old_sigma.pow(2) + (old_mu - mu).pow(2)) / \
             (2.0 * sigma.pow(2)) - 0.5
        return kl.sum(1, keepdim=True)
    
    # Advantage estimation:
    def get_gae(self,rewards, masks, values):
        rewards = torch.Tensor(rewards).to(self.device)
        masks = torch.Tensor(masks).to(self.device)
        #Create an equivalent fullfilled of 0.
        returns = torch.zeros_like(rewards).to(self.device)
        advants = torch.zeros_like(rewards).to(self.device)
        #Init
        running_returns = 0
        previous_value = 0
        running_advants = 0
        #Here we compute A_t the advantage.
        for t in reversed(range(0, len(rewards))):
            # Here we compute the discounted returns. Gamma is the discount factor.
            running_returns = rewards[t] + gamma * running_returns * masks[t]
            #computes the difference between the estimated value at time step t (values.data[t]) and the discounted next value.
            running_tderror = rewards[t] + gamma * previous_value * masks[t] - values.data[t]
            # Compute advantage
            running_advants = running_tderror + gamma * lambd * running_advants * masks[t]

            returns[t] = running_returns
            previous_value = values.data[t]
            advants[t] = running_advants
        #Normalization to stabilize final advantage of the history to now.
        advants = (advants - advants.mean()) / advants.std()
        return returns, advants

    def save(self, filename):
        filename = str(filename)
        torch.save(self.actor_net.state_dict(), filename + "_actor")
        torch.save(self.critic_net.state_dict(), filename + "_critic")
        torch.save(self.actor_optim.state_dict(), filename + "_actor_optimizer")
        torch.save(self.critic_optim.state_dict(), filename + "_critic_optimizer")

    def load(self, filename):
        filename = str(filename)
        self.actor_net.load_state_dict(torch.load(filename + "_actor"))
        self.critic_net.load_state_dict(torch.load(filename + "_critic"))
        self.actor_optim.load_state_dict(torch.load(filename + "_actor_optimizer"))
        self.critic_optim.load_state_dict(torch.load(filename + "_critic_optimizer"))

In [5]:
# Creation of a class to normalize the states
class Normalize:
    def __init__(self, N_S, chkpt_dir, train_mode=True):
        self.mean = np.zeros((N_S,))
        self.std = np.zeros((N_S, ))
        self.stdd = np.zeros((N_S, ))
        self.n = 0
        
        self.train_mode = train_mode
        
        self.checkpoint_dir = chkpt_dir
        self.checkpoint_file = os.path.join(self.checkpoint_dir, '_normalize.npy')
        
        self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    def __call__(self, x):
        x = np.asarray(x)
        if self.train_mode:
            self.n += 1
            if self.n == 1:
                self.mean = x
            else:
                old_mean = self.mean.copy()
                self.mean = old_mean + (x - old_mean) / self.n
                self.stdd = self.stdd + (x - old_mean) * (x - self.mean)
            if self.n > 1:
                self.std = np.sqrt(self.stdd / (self.n - 1))
            else:
                self.std = self.mean

        x = x - self.mean
        x = x / (self.std + 1e-8)
        x = np.clip(x, -5, +5)
        return x
    
    def update(self, x):
        self.mean = np.mean(x, axis=0)
        self.std = np.std(x, axis=0) + 1e-8
    
    def save_params(self):
        np.save(self.checkpoint_file, {'mean': self.mean, 'std': self.std})

    def load_params(self):
        params = np.load(self.checkpoint_file, allow_pickle=True).item()
        self.mean = params['mean']
        self.std = params['std']

In [6]:
def get_walker_x_velocity(state):
    x_vel = state[8]
    return x_vel

def logarithmic_barrier(state, constraint_max):
    return -torch.log(-(state - constraint_max))

def augmented_objective(actor_loss, state, constraint_max, t):
    constraint_barrier = logarithmic_barrier(state, constraint_max) / t
    return actor_loss + constraint_barrier.mean()

In [7]:
def main():
    env = gym.make('Walker2d-v4', render_mode='rgb_array')

    #Number of state and action
    N_S = env.observation_space.shape[0]
    N_A = env.action_space.shape[0]

    # Run the Ppo class
    frames = []
    ppo = PPO(N_S, N_A, log_dir)
    # ppo.actor_net.load_model("../runs/20240708_11-19-08/ppo/100000/")
    # ppo.critic_net.load_model("../runs/20240708_11-19-08/ppo/100000/")
    
    # Normalisation for stability, fast convergence... always good to do.
    normalize = Normalize(N_S, log_dir)
    episodes = 0
    eva_episodes = 0
    episode_data = []
    state, _ = env.reset()

    for iter in tqdm(range(Iter)):
        memory = deque()
        scores = []
        steps = 0
        xvel = []
        while steps < 2048: #Horizon
            episodes += 1
            state, _ = env.reset()
            s = normalize(state)
            score = 0
            for _ in range(MAX_STEP):
                steps += 1
                #Choose an action: detailed in PPO.py
                # The action is a numpy array of 17 elements. It means that in the 17 possible directions of action we have a specific value in the continuous space.
                # Exemple : the first coordinate correspond to the Torque applied on the hinge in the y-coordinate of the abdomen: this is continuous space.
                a = ppo.actor_net.choose_action(s)
                # print(f"{YELLOW}walker velocity: {RESET}", s[8]) # 3
                #Environnement reaction to the action : There is a reaction in the 376 elements that characterize the space :
                # Exemple : the first coordinate of the states is the z-coordinate of the torso (centre) and using env.step(a), we get the reaction of this state and
                # of all the other ones after the action has been made.
                s_ , r ,truncated, terminated ,info = env.step(a)
                s_ = normalize(s_)
                done = truncated or terminated

                # Do we continue or do we terminate an episode?
                mask = (1-done)*1
                memory.append([s,a,r,mask])
                xvel.append(s[8])
                score += r
                s = s_

                if done:
                    break
            # with open('log_' + args.env_name  + '.txt', 'a') as outfile:
            #     outfile.write('\t' + str(episodes)  + '\t' + str(score) + '\n')
            scores.append(score)
        score_avg = np.mean(scores)
        xvel_avg = np.mean(xvel)
        print('{} episode score is {:.2f}, average_xvel is {:.3f}'.format(episodes, score_avg, xvel_avg))
        episode_data.append([iter + 1, score_avg])
        if (iter + 1) % save_freq == 0:
            save_flag = True

            if save_flag:
                ppo.actor_net.save_model()
                ppo.critic_net.save_model()
                normalize.save_params()
                print(f"{GREEN} >> Successfully saved models! {RESET}")

                np.save(log_dir + "reward.npy", episode_data)
                save_flag = False

        ppo.train(memory)
        
if __name__ == "__main__":
    current_time = datetime.datetime.now().strftime("%Y%m%d_%H-%M-%S")
    log_dir = f"../runs/{current_time}/"
    os.makedirs(log_dir, exist_ok=True)
    writer = SummaryWriter(log_dir)
    print(f"{YELLOW}[MODEL/TENSORBOARD]{RESET} The data will be saved in {YELLOW}{log_dir}{RESET} directory!")

    # tb = program.TensorBoard()
    # tb.configure(argv=[None, '--logdir', f"../runs/franka_cabinet/{current_time}", '--port', '6300'])
    # url = tb.launch()
    # webbrowser.open_new(url)
    
    main()

  0%|          | 0/100000 [00:00<?, ?it/s]

102 episode score is -0.19, average_xvel is 0.056


  0%|          | 1/100000 [00:04<112:51:44,  4.06s/it]

203 episode score is 0.11, average_xvel is 0.021


  0%|          | 2/100000 [00:07<101:19:54,  3.65s/it]

302 episode score is 1.63, average_xvel is 0.104


  0%|          | 3/100000 [00:10<97:11:13,  3.50s/it] 

404 episode score is 0.99, average_xvel is 0.042


  0%|          | 4/100000 [00:14<95:07:44,  3.42s/it]

496 episode score is 2.89, average_xvel is 0.137


  0%|          | 5/100000 [00:17<94:09:40,  3.39s/it]

589 episode score is 3.63, average_xvel is 0.162


  0%|          | 6/100000 [00:20<93:40:18,  3.37s/it]

673 episode score is 4.37, average_xvel is 0.161


  0%|          | 7/100000 [00:24<93:31:39,  3.37s/it]

756 episode score is 4.42, average_xvel is 0.141


  0%|          | 8/100000 [00:27<93:15:14,  3.36s/it]

832 episode score is 7.87, average_xvel is 0.282


  0%|          | 9/100000 [00:30<92:59:02,  3.35s/it]

909 episode score is 5.82, average_xvel is 0.156


  0%|          | 10/100000 [00:34<92:47:41,  3.34s/it]

977 episode score is 8.69, average_xvel is 0.234


  0%|          | 11/100000 [00:37<92:43:15,  3.34s/it]

1047 episode score is 9.42, average_xvel is 0.261


  0%|          | 12/100000 [00:40<92:42:57,  3.34s/it]

1110 episode score is 11.83, average_xvel is 0.299


  0%|          | 13/100000 [00:44<92:39:20,  3.34s/it]

1162 episode score is 13.16, average_xvel is 0.229


  0%|          | 14/100000 [00:47<92:43:57,  3.34s/it]

1216 episode score is 13.38, average_xvel is 0.238


  0%|          | 15/100000 [00:50<92:59:36,  3.35s/it]

1257 episode score is 26.42, average_xvel is 0.456


  0%|          | 16/100000 [00:54<93:11:35,  3.36s/it]

1293 episode score is 24.65, average_xvel is 0.295


  0%|          | 17/100000 [00:57<93:24:07,  3.36s/it]

1333 episode score is 29.74, average_xvel is 0.460


  0%|          | 18/100000 [01:00<94:11:04,  3.39s/it]

1367 episode score is 24.63, average_xvel is 0.222


  0%|          | 19/100000 [01:04<93:57:13,  3.38s/it]

1395 episode score is 41.03, average_xvel is 0.404


  0%|          | 20/100000 [01:07<93:54:31,  3.38s/it]

1422 episode score is 45.75, average_xvel is 0.454


  0%|          | 21/100000 [01:11<93:27:31,  3.37s/it]

1444 episode score is 74.90, average_xvel is 0.619


  0%|          | 22/100000 [01:14<95:27:23,  3.44s/it]

1462 episode score is 99.74, average_xvel is 0.709


  0%|          | 23/100000 [01:18<95:43:12,  3.45s/it]

1477 episode score is 148.38, average_xvel is 0.936


  0%|          | 24/100000 [01:21<95:38:06,  3.44s/it]

1493 episode score is 165.81, average_xvel is 0.999


  0%|          | 25/100000 [01:25<97:56:41,  3.53s/it]

1506 episode score is 210.26, average_xvel is 1.111


  0%|          | 26/100000 [01:28<98:07:07,  3.53s/it]

1521 episode score is 150.63, average_xvel is 0.757


  0%|          | 27/100000 [01:32<97:31:03,  3.51s/it]

1537 episode score is 145.03, average_xvel is 0.696


  0%|          | 28/100000 [01:35<97:54:34,  3.53s/it]

1550 episode score is 209.02, average_xvel is 0.887


  0%|          | 29/100000 [01:39<98:17:44,  3.54s/it]

1564 episode score is 173.00, average_xvel is 0.737


  0%|          | 30/100000 [01:42<97:24:40,  3.51s/it]

1578 episode score is 183.70, average_xvel is 0.817


  0%|          | 31/100000 [01:46<96:19:32,  3.47s/it]

1592 episode score is 199.10, average_xvel is 0.894


  0%|          | 32/100000 [01:49<95:43:57,  3.45s/it]

1606 episode score is 217.27, average_xvel is 0.914


  0%|          | 33/100000 [01:53<96:17:49,  3.47s/it]

1618 episode score is 271.27, average_xvel is 1.054


  0%|          | 34/100000 [01:56<95:40:58,  3.45s/it]

1629 episode score is 288.81, average_xvel is 0.862


  0%|          | 35/100000 [02:00<97:15:23,  3.50s/it]

1644 episode score is 229.63, average_xvel is 0.910


  0%|          | 36/100000 [02:03<98:58:13,  3.56s/it]

1658 episode score is 244.24, average_xvel is 1.029


  0%|          | 37/100000 [02:07<97:21:46,  3.51s/it]

1671 episode score is 270.95, average_xvel is 1.025


  0%|          | 38/100000 [02:10<96:22:17,  3.47s/it]

1684 episode score is 237.35, average_xvel is 0.750


  0%|          | 39/100000 [02:14<96:27:42,  3.47s/it]

1697 episode score is 265.82, average_xvel is 0.856


  0%|          | 40/100000 [02:17<97:34:18,  3.51s/it]

1708 episode score is 225.50, average_xvel is 0.460


  0%|          | 41/100000 [02:21<96:08:32,  3.46s/it]

1722 episode score is 229.14, average_xvel is 0.708


  0%|          | 42/100000 [02:24<96:59:18,  3.49s/it]

1735 episode score is 230.86, average_xvel is 0.598


  0%|          | 43/100000 [02:28<97:26:18,  3.51s/it]

1746 episode score is 276.79, average_xvel is 0.692


  0%|          | 44/100000 [02:31<95:46:24,  3.45s/it]

1758 episode score is 255.56, average_xvel is 0.568


  0%|          | 45/100000 [02:35<97:21:02,  3.51s/it]

1770 episode score is 273.53, average_xvel is 0.715


  0%|          | 46/100000 [02:38<97:07:32,  3.50s/it]

1781 episode score is 247.84, average_xvel is 0.466


  0%|          | 47/100000 [02:42<96:18:15,  3.47s/it]

1795 episode score is 229.82, average_xvel is 0.731


  0%|          | 48/100000 [02:45<95:05:29,  3.42s/it]

1809 episode score is 262.99, average_xvel is 0.821


  0%|          | 49/100000 [02:48<96:16:02,  3.47s/it]

1822 episode score is 265.06, average_xvel is 0.725


  0%|          | 50/100000 [02:52<96:26:39,  3.47s/it]

1834 episode score is 287.09, average_xvel is 0.746


  0%|          | 51/100000 [02:55<95:54:39,  3.45s/it]

1845 episode score is 282.16, average_xvel is 0.575


  0%|          | 52/100000 [02:59<95:31:26,  3.44s/it]

1856 episode score is 246.06, average_xvel is 0.376


  0%|          | 53/100000 [03:02<95:08:02,  3.43s/it]

1869 episode score is 278.26, average_xvel is 0.715


  0%|          | 54/100000 [03:06<96:20:18,  3.47s/it]

1881 episode score is 251.20, average_xvel is 0.428


  0%|          | 55/100000 [03:09<97:19:49,  3.51s/it]

1893 episode score is 241.79, average_xvel is 0.419


  0%|          | 56/100000 [03:13<97:00:20,  3.49s/it]

1905 episode score is 268.86, average_xvel is 0.566


  0%|          | 57/100000 [03:16<96:36:07,  3.48s/it]

1916 episode score is 304.70, average_xvel is 0.653


  0%|          | 58/100000 [03:20<95:42:40,  3.45s/it]

1928 episode score is 278.48, average_xvel is 0.557


  0%|          | 59/100000 [03:23<97:23:21,  3.51s/it]

1941 episode score is 248.35, average_xvel is 0.466


  0%|          | 60/100000 [03:27<98:19:35,  3.54s/it]

1952 episode score is 289.61, average_xvel is 0.537


  0%|          | 61/100000 [03:30<96:37:34,  3.48s/it]

1963 episode score is 292.37, average_xvel is 0.530


  0%|          | 62/100000 [03:34<95:56:37,  3.46s/it]

1975 episode score is 296.21, average_xvel is 0.603


  0%|          | 63/100000 [03:37<96:39:31,  3.48s/it]

1987 episode score is 269.20, average_xvel is 0.549


  0%|          | 64/100000 [03:40<95:20:32,  3.43s/it]

1999 episode score is 250.78, average_xvel is 0.437


  0%|          | 65/100000 [03:44<94:11:23,  3.39s/it]

2010 episode score is 271.71, average_xvel is 0.418


  0%|          | 66/100000 [03:47<93:27:08,  3.37s/it]

2022 episode score is 271.42, average_xvel is 0.511


  0%|          | 67/100000 [03:50<93:35:40,  3.37s/it]

2035 episode score is 258.11, average_xvel is 0.497


  0%|          | 68/100000 [03:54<94:49:53,  3.42s/it]

2047 episode score is 289.56, average_xvel is 0.599


  0%|          | 69/100000 [03:57<94:28:09,  3.40s/it]

2058 episode score is 303.57, average_xvel is 0.519


  0%|          | 70/100000 [04:01<94:57:16,  3.42s/it]

2072 episode score is 223.65, average_xvel is 0.452


  0%|          | 71/100000 [04:04<94:14:56,  3.40s/it]

2085 episode score is 265.74, average_xvel is 0.517


  0%|          | 72/100000 [04:08<95:38:43,  3.45s/it]

2095 episode score is 307.51, average_xvel is 0.272


  0%|          | 73/100000 [04:11<97:42:42,  3.52s/it]

2107 episode score is 270.43, average_xvel is 0.405


  0%|          | 74/100000 [04:15<98:11:59,  3.54s/it]

2121 episode score is 264.50, average_xvel is 0.649


  0%|          | 75/100000 [04:18<97:53:28,  3.53s/it]

2134 episode score is 253.13, average_xvel is 0.389


  0%|          | 76/100000 [04:22<98:25:15,  3.55s/it]

2145 episode score is 286.09, average_xvel is 0.375


  0%|          | 77/100000 [04:26<97:51:44,  3.53s/it]

2158 episode score is 252.62, average_xvel is 0.368


  0%|          | 78/100000 [04:29<98:38:46,  3.55s/it]

2170 episode score is 260.05, average_xvel is 0.406


  0%|          | 79/100000 [04:33<97:08:45,  3.50s/it]

2181 episode score is 256.77, average_xvel is 0.258


  0%|          | 80/100000 [04:36<95:54:09,  3.46s/it]

2194 episode score is 274.49, average_xvel is 0.612


  0%|          | 81/100000 [04:39<95:32:52,  3.44s/it]

2207 episode score is 276.83, average_xvel is 0.588


  0%|          | 82/100000 [04:43<95:45:09,  3.45s/it]

2220 episode score is 260.41, average_xvel is 0.462


  0%|          | 83/100000 [04:46<95:56:58,  3.46s/it]

2231 episode score is 261.39, average_xvel is 0.260


  0%|          | 84/100000 [04:50<95:06:06,  3.43s/it]

2241 episode score is 308.38, average_xvel is 0.302


  0%|          | 85/100000 [04:53<95:15:16,  3.43s/it]

2254 episode score is 240.69, average_xvel is 0.323


  0%|          | 86/100000 [04:56<95:38:45,  3.45s/it]

2264 episode score is 320.41, average_xvel is 0.395


  0%|          | 87/100000 [05:00<95:24:59,  3.44s/it]

2275 episode score is 264.30, average_xvel is 0.243


  0%|          | 88/100000 [05:03<95:05:32,  3.43s/it]

2288 episode score is 220.29, average_xvel is 0.134


  0%|          | 89/100000 [05:07<96:31:45,  3.48s/it]

2301 episode score is 244.29, average_xvel is 0.323


  0%|          | 90/100000 [05:10<96:43:44,  3.49s/it]

2314 episode score is 265.56, average_xvel is 0.424


  0%|          | 91/100000 [05:14<97:17:53,  3.51s/it]

2327 episode score is 260.81, average_xvel is 0.405


  0%|          | 92/100000 [05:17<97:20:39,  3.51s/it]

2340 episode score is 272.24, average_xvel is 0.537


  0%|          | 93/100000 [05:21<96:37:34,  3.48s/it]

2353 episode score is 257.27, average_xvel is 0.422


  0%|          | 94/100000 [05:24<96:06:28,  3.46s/it]

2366 episode score is 262.99, average_xvel is 0.420


  0%|          | 95/100000 [05:28<96:24:36,  3.47s/it]

2378 episode score is 295.92, average_xvel is 0.465


  0%|          | 96/100000 [05:31<96:56:42,  3.49s/it]

2391 episode score is 270.34, average_xvel is 0.491


  0%|          | 97/100000 [05:35<96:30:25,  3.48s/it]

2403 episode score is 232.80, average_xvel is 0.132


  0%|          | 98/100000 [05:38<95:59:23,  3.46s/it]

2418 episode score is 246.11, average_xvel is 0.534


  0%|          | 99/100000 [05:42<96:15:42,  3.47s/it]

2430 episode score is 275.34, average_xvel is 0.353
[32m >> Successfully saved normalization parameters! Mean: [ 1.17062917  0.04600893 -0.05197994 -0.04930541  0.62093054 -0.03586666
 -0.05597222  0.66294592  0.21849592 -0.38591171 -0.45941083 -0.59394457
 -0.47295524  0.13175865 -0.59010359 -0.46753628  0.13755514], Std: [0.08591458 0.33378179 0.17481732 0.21199368 0.33106957 0.17148914
 0.21852836 0.32190651 0.93345871 0.68277493 3.42036936 3.65021816
 3.46420607 6.10246456 3.55955822 3.45990153 5.60618119] [0m
[32m >> Successfully saved models! [0m


  0%|          | 100/100000 [05:45<96:45:45,  3.49s/it]

2443 episode score is 273.93, average_xvel is 0.530


  0%|          | 101/100000 [05:49<95:51:14,  3.45s/it]

2456 episode score is 278.53, average_xvel is 0.516


  0%|          | 102/100000 [05:52<95:49:56,  3.45s/it]

2468 episode score is 267.53, average_xvel is 0.315


  0%|          | 103/100000 [05:56<95:49:41,  3.45s/it]

2482 episode score is 267.42, average_xvel is 0.544


  0%|          | 104/100000 [05:59<96:17:43,  3.47s/it]

2494 episode score is 268.64, average_xvel is 0.325


  0%|          | 105/100000 [06:02<95:54:50,  3.46s/it]

2505 episode score is 279.55, average_xvel is 0.255


  0%|          | 106/100000 [06:06<95:39:32,  3.45s/it]

2519 episode score is 263.06, average_xvel is 0.569


  0%|          | 107/100000 [06:09<95:30:33,  3.44s/it]

2531 episode score is 289.45, average_xvel is 0.432


  0%|          | 108/100000 [06:13<95:35:15,  3.44s/it]

2544 episode score is 285.48, average_xvel is 0.455


  0%|          | 109/100000 [06:16<97:04:33,  3.50s/it]

2556 episode score is 286.43, average_xvel is 0.435


  0%|          | 110/100000 [06:20<96:05:25,  3.46s/it]

2569 episode score is 279.39, average_xvel is 0.462


  0%|          | 111/100000 [06:23<96:31:50,  3.48s/it]

2582 episode score is 280.22, average_xvel is 0.448


  0%|          | 112/100000 [06:27<97:14:28,  3.50s/it]

2595 episode score is 283.57, average_xvel is 0.514


  0%|          | 113/100000 [06:30<97:18:02,  3.51s/it]

2608 episode score is 276.36, average_xvel is 0.425


  0%|          | 114/100000 [06:34<97:42:16,  3.52s/it]

2621 episode score is 279.73, average_xvel is 0.473


  0%|          | 115/100000 [06:37<97:03:52,  3.50s/it]

2633 episode score is 295.07, average_xvel is 0.396


  0%|          | 116/100000 [06:41<97:12:32,  3.50s/it]

2646 episode score is 270.41, average_xvel is 0.349


  0%|          | 117/100000 [06:44<97:54:41,  3.53s/it]

2660 episode score is 271.08, average_xvel is 0.493


  0%|          | 118/100000 [06:48<100:11:49,  3.61s/it]

2673 episode score is 276.10, average_xvel is 0.406


  0%|          | 119/100000 [06:52<103:12:35,  3.72s/it]

2686 episode score is 289.39, average_xvel is 0.454


  0%|          | 120/100000 [06:56<102:07:03,  3.68s/it]

2698 episode score is 288.56, average_xvel is 0.403


  0%|          | 121/100000 [06:59<99:33:33,  3.59s/it] 

2711 episode score is 278.65, average_xvel is 0.452


  0%|          | 122/100000 [07:03<98:10:33,  3.54s/it]

2724 episode score is 284.03, average_xvel is 0.423


  0%|          | 123/100000 [07:06<98:10:13,  3.54s/it]

2738 episode score is 276.58, average_xvel is 0.494


  0%|          | 124/100000 [07:10<98:21:57,  3.55s/it]

2751 episode score is 272.65, average_xvel is 0.456


  0%|          | 125/100000 [07:13<96:33:07,  3.48s/it]

2762 episode score is 307.39, average_xvel is 0.323


  0%|          | 126/100000 [07:16<95:43:12,  3.45s/it]

2774 episode score is 300.52, average_xvel is 0.406


  0%|          | 127/100000 [07:20<95:51:17,  3.46s/it]

2785 episode score is 315.18, average_xvel is 0.313


  0%|          | 128/100000 [07:23<96:24:31,  3.48s/it]

2798 episode score is 278.25, average_xvel is 0.414


  0%|          | 129/100000 [07:27<96:18:05,  3.47s/it]

2811 episode score is 266.71, average_xvel is 0.308


  0%|          | 130/100000 [07:30<96:35:13,  3.48s/it]

2825 episode score is 265.89, average_xvel is 0.483


  0%|          | 131/100000 [07:34<95:49:47,  3.45s/it]

2838 episode score is 290.02, average_xvel is 0.451


  0%|          | 132/100000 [07:37<96:14:50,  3.47s/it]

2851 episode score is 282.00, average_xvel is 0.458


  0%|          | 133/100000 [07:41<95:31:45,  3.44s/it]

2864 episode score is 293.83, average_xvel is 0.461


  0%|          | 134/100000 [07:44<96:08:16,  3.47s/it]

2878 episode score is 263.27, average_xvel is 0.448


  0%|          | 135/100000 [07:48<95:54:37,  3.46s/it]

2891 episode score is 267.06, average_xvel is 0.346


  0%|          | 136/100000 [07:51<95:22:37,  3.44s/it]

2903 episode score is 280.33, average_xvel is 0.250


  0%|          | 137/100000 [07:54<95:26:02,  3.44s/it]

2917 episode score is 258.75, average_xvel is 0.371


  0%|          | 138/100000 [07:58<95:57:05,  3.46s/it]

2930 episode score is 281.92, average_xvel is 0.470


  0%|          | 139/100000 [08:01<95:06:02,  3.43s/it]

2944 episode score is 270.85, average_xvel is 0.362


  0%|          | 140/100000 [08:05<97:06:03,  3.50s/it]

2956 episode score is 302.28, average_xvel is 0.347


  0%|          | 141/100000 [08:09<97:21:54,  3.51s/it]

2969 episode score is 284.33, average_xvel is 0.476


  0%|          | 142/100000 [08:12<96:04:17,  3.46s/it]

2982 episode score is 256.13, average_xvel is 0.242


  0%|          | 143/100000 [08:15<95:42:51,  3.45s/it]

2994 episode score is 302.63, average_xvel is 0.409


  0%|          | 144/100000 [08:19<95:23:07,  3.44s/it]

3006 episode score is 302.44, average_xvel is 0.430


  0%|          | 145/100000 [08:22<94:38:59,  3.41s/it]

3019 episode score is 291.35, average_xvel is 0.454


  0%|          | 146/100000 [08:26<95:17:58,  3.44s/it]

3031 episode score is 309.68, average_xvel is 0.463


  0%|          | 147/100000 [08:29<94:32:44,  3.41s/it]

3043 episode score is 313.00, average_xvel is 0.393


  0%|          | 148/100000 [08:32<95:35:33,  3.45s/it]

3055 episode score is 310.88, average_xvel is 0.457


  0%|          | 149/100000 [08:36<95:04:04,  3.43s/it]

3067 episode score is 320.97, average_xvel is 0.488


  0%|          | 150/100000 [08:39<95:07:23,  3.43s/it]

3080 episode score is 298.73, average_xvel is 0.486


  0%|          | 151/100000 [08:43<95:33:34,  3.45s/it]

3092 episode score is 315.22, average_xvel is 0.487


  0%|          | 152/100000 [08:46<95:02:58,  3.43s/it]

3105 episode score is 315.32, average_xvel is 0.529


  0%|          | 153/100000 [08:50<96:06:04,  3.46s/it]

3117 episode score is 329.90, average_xvel is 0.517


  0%|          | 154/100000 [08:53<96:15:49,  3.47s/it]

3127 episode score is 363.09, average_xvel is 0.417


  0%|          | 155/100000 [08:56<95:10:50,  3.43s/it]

3138 episode score is 347.70, average_xvel is 0.485


  0%|          | 156/100000 [09:00<94:47:04,  3.42s/it]

3149 episode score is 349.81, average_xvel is 0.538


  0%|          | 157/100000 [09:03<94:03:58,  3.39s/it]

3159 episode score is 365.53, average_xvel is 0.267


  0%|          | 158/100000 [09:07<95:50:09,  3.46s/it]

3170 episode score is 346.36, average_xvel is 0.493


  0%|          | 159/100000 [09:10<94:50:04,  3.42s/it]

3180 episode score is 385.64, average_xvel is 0.481


  0%|          | 160/100000 [09:14<94:51:20,  3.42s/it]

3190 episode score is 285.41, average_xvel is -0.020


  0%|          | 161/100000 [09:17<94:10:47,  3.40s/it]

3201 episode score is 360.20, average_xvel is 0.496


  0%|          | 162/100000 [09:20<94:37:33,  3.41s/it]

3211 episode score is 407.91, average_xvel is 0.412


  0%|          | 163/100000 [09:24<97:08:09,  3.50s/it]

3222 episode score is 375.49, average_xvel is 0.535


  0%|          | 164/100000 [09:28<97:35:39,  3.52s/it]

3233 episode score is 369.56, average_xvel is 0.526


  0%|          | 165/100000 [09:31<97:27:12,  3.51s/it]

3244 episode score is 377.84, average_xvel is 0.516


  0%|          | 166/100000 [09:35<98:12:50,  3.54s/it]

3256 episode score is 357.40, average_xvel is 0.634


  0%|          | 167/100000 [09:38<97:57:44,  3.53s/it]

3267 episode score is 356.25, average_xvel is 0.388


  0%|          | 168/100000 [09:42<98:23:55,  3.55s/it]

3277 episode score is 393.48, average_xvel is 0.527


  0%|          | 169/100000 [09:45<96:59:57,  3.50s/it]

3288 episode score is 387.27, average_xvel is 0.536


  0%|          | 170/100000 [09:49<97:55:54,  3.53s/it]

3298 episode score is 390.87, average_xvel is 0.501


  0%|          | 171/100000 [09:52<96:37:46,  3.48s/it]

3308 episode score is 389.08, average_xvel is 0.450


  0%|          | 172/100000 [09:56<96:17:17,  3.47s/it]

3318 episode score is 391.00, average_xvel is 0.527


  0%|          | 173/100000 [09:59<95:03:38,  3.43s/it]

3329 episode score is 363.95, average_xvel is 0.500


  0%|          | 174/100000 [10:02<95:11:40,  3.43s/it]

3341 episode score is 334.16, average_xvel is 0.423


  0%|          | 175/100000 [10:06<96:27:29,  3.48s/it]

3352 episode score is 325.32, average_xvel is 0.243


  0%|          | 176/100000 [10:09<96:39:42,  3.49s/it]

3362 episode score is 391.45, average_xvel is 0.514


  0%|          | 177/100000 [10:13<96:39:28,  3.49s/it]

3372 episode score is 382.05, average_xvel is 0.441


  0%|          | 178/100000 [10:17<99:49:01,  3.60s/it]

3382 episode score is 399.78, average_xvel is 0.528


  0%|          | 179/100000 [10:20<97:57:10,  3.53s/it]

3392 episode score is 405.56, average_xvel is 0.487


  0%|          | 180/100000 [10:24<97:36:20,  3.52s/it]

3401 episode score is 375.27, average_xvel is 0.197


  0%|          | 181/100000 [10:27<96:17:24,  3.47s/it]

3411 episode score is 392.16, average_xvel is 0.513


  0%|          | 182/100000 [10:30<95:18:58,  3.44s/it]

3423 episode score is 349.80, average_xvel is 0.519


  0%|          | 183/100000 [10:34<96:12:47,  3.47s/it]

3434 episode score is 361.49, average_xvel is 0.520


  0%|          | 184/100000 [10:37<95:05:07,  3.43s/it]

3444 episode score is 360.49, average_xvel is 0.299


  0%|          | 185/100000 [10:41<95:11:35,  3.43s/it]

3454 episode score is 374.52, average_xvel is 0.383


  0%|          | 186/100000 [10:44<94:54:13,  3.42s/it]

3464 episode score is 371.41, average_xvel is 0.360


  0%|          | 187/100000 [10:48<94:36:47,  3.41s/it]

3473 episode score is 390.57, average_xvel is 0.214


  0%|          | 188/100000 [10:51<94:55:11,  3.42s/it]

3483 episode score is 378.60, average_xvel is 0.384


  0%|          | 189/100000 [10:54<94:50:12,  3.42s/it]

3493 episode score is 411.83, average_xvel is 0.479


  0%|          | 190/100000 [10:58<95:28:17,  3.44s/it]

3503 episode score is 394.39, average_xvel is 0.327


  0%|          | 191/100000 [11:02<97:00:23,  3.50s/it]

3514 episode score is 379.08, average_xvel is 0.382


  0%|          | 192/100000 [11:05<98:45:49,  3.56s/it]

3524 episode score is 409.12, average_xvel is 0.536


  0%|          | 193/100000 [11:09<97:23:56,  3.51s/it]

3533 episode score is 433.91, average_xvel is 0.430


  0%|          | 194/100000 [11:12<96:36:32,  3.48s/it]

3543 episode score is 403.51, average_xvel is 0.503


  0%|          | 195/100000 [11:15<95:42:30,  3.45s/it]

3554 episode score is 395.78, average_xvel is 0.555


  0%|          | 196/100000 [11:19<97:03:58,  3.50s/it]

3564 episode score is 409.02, average_xvel is 0.475


  0%|          | 197/100000 [11:23<96:52:38,  3.49s/it]

3574 episode score is 406.91, average_xvel is 0.497


  0%|          | 198/100000 [11:26<96:15:43,  3.47s/it]

3584 episode score is 404.77, average_xvel is 0.447


  0%|          | 199/100000 [11:29<96:25:20,  3.48s/it]

3595 episode score is 371.75, average_xvel is 0.379
[32m >> Successfully saved normalization parameters! Mean: [ 1.15748779  0.16419504 -0.02449769 -0.02568664  0.57899764 -0.01045864
 -0.0275986   0.59798346  0.49297722 -0.34567647  0.01861044 -0.34752125
 -0.31598368 -0.00237315 -0.33539991 -0.31997349 -0.06373321], Std: [0.09257139 0.33793925 0.13392978 0.16260415 0.39009866 0.13013739
 0.16717424 0.41123327 0.89134391 0.60434683 2.66725922 2.84491419
 2.85557196 5.93584331 2.74837394 2.73366059 5.27061569] [0m
[32m >> Successfully saved models! [0m


  0%|          | 200/100000 [11:33<98:05:03,  3.54s/it]

3605 episode score is 417.84, average_xvel is 0.572


  0%|          | 201/100000 [11:37<96:53:41,  3.50s/it]

3616 episode score is 398.88, average_xvel is 0.558


  0%|          | 202/100000 [11:40<97:31:45,  3.52s/it]

3625 episode score is 429.85, average_xvel is 0.417


  0%|          | 203/100000 [11:43<96:16:28,  3.47s/it]

3635 episode score is 422.34, average_xvel is 0.481


  0%|          | 204/100000 [11:47<97:01:09,  3.50s/it]

3645 episode score is 411.40, average_xvel is 0.538


  0%|          | 205/100000 [11:50<96:00:28,  3.46s/it]

3656 episode score is 377.77, average_xvel is 0.417


  0%|          | 206/100000 [11:54<97:01:57,  3.50s/it]

3665 episode score is 445.12, average_xvel is 0.426


  0%|          | 207/100000 [11:57<96:22:33,  3.48s/it]

3675 episode score is 423.14, average_xvel is 0.539


  0%|          | 208/100000 [12:01<96:03:51,  3.47s/it]

3686 episode score is 376.67, average_xvel is 0.510


  0%|          | 209/100000 [12:04<95:43:47,  3.45s/it]

3697 episode score is 387.97, average_xvel is 0.612


  0%|          | 210/100000 [12:08<95:08:47,  3.43s/it]

3708 episode score is 400.54, average_xvel is 0.606


  0%|          | 211/100000 [12:11<95:44:11,  3.45s/it]

3719 episode score is 389.56, average_xvel is 0.639


  0%|          | 212/100000 [12:15<94:57:41,  3.43s/it]

3729 episode score is 430.46, average_xvel is 0.496


  0%|          | 213/100000 [12:18<96:06:25,  3.47s/it]

3739 episode score is 414.65, average_xvel is 0.529


  0%|          | 214/100000 [12:21<95:30:57,  3.45s/it]

3748 episode score is 406.93, average_xvel is 0.272


  0%|          | 215/100000 [12:25<94:53:41,  3.42s/it]

3759 episode score is 408.47, average_xvel is 0.627


  0%|          | 216/100000 [12:28<95:25:52,  3.44s/it]

3770 episode score is 392.18, average_xvel is 0.345


  0%|          | 217/100000 [12:32<98:46:47,  3.56s/it]

3781 episode score is 404.37, average_xvel is 0.557


  0%|          | 218/100000 [12:36<98:40:29,  3.56s/it]

3793 episode score is 365.46, average_xvel is 0.503


  0%|          | 219/100000 [12:39<99:09:20,  3.58s/it]

3804 episode score is 390.43, average_xvel is 0.589


  0%|          | 220/100000 [12:43<97:27:54,  3.52s/it]

3815 episode score is 414.10, average_xvel is 0.590


  0%|          | 221/100000 [12:46<97:48:49,  3.53s/it]

3825 episode score is 421.31, average_xvel is 0.496


  0%|          | 222/100000 [12:50<97:03:36,  3.50s/it]

3836 episode score is 397.52, average_xvel is 0.604


  0%|          | 223/100000 [12:53<96:49:02,  3.49s/it]

3847 episode score is 393.63, average_xvel is 0.408


  0%|          | 224/100000 [12:57<98:29:05,  3.55s/it]

3858 episode score is 408.09, average_xvel is 0.599


  0%|          | 225/100000 [13:00<97:55:26,  3.53s/it]

3869 episode score is 402.99, average_xvel is 0.573


  0%|          | 226/100000 [13:04<97:32:33,  3.52s/it]

3879 episode score is 411.25, average_xvel is 0.378


  0%|          | 227/100000 [13:07<97:33:48,  3.52s/it]

3891 episode score is 389.15, average_xvel is 0.678


  0%|          | 228/100000 [13:11<97:27:06,  3.52s/it]

3902 episode score is 407.33, average_xvel is 0.681


  0%|          | 229/100000 [13:14<96:01:40,  3.46s/it]

3912 episode score is 432.41, average_xvel is 0.534


  0%|          | 230/100000 [13:18<95:58:18,  3.46s/it]

3924 episode score is 395.75, average_xvel is 0.688


  0%|          | 231/100000 [13:21<96:33:11,  3.48s/it]

3935 episode score is 406.04, average_xvel is 0.559


  0%|          | 232/100000 [13:25<96:49:00,  3.49s/it]

3945 episode score is 434.64, average_xvel is 0.519


  0%|          | 233/100000 [13:28<96:40:38,  3.49s/it]

3956 episode score is 395.90, average_xvel is 0.439


  0%|          | 234/100000 [13:32<97:25:10,  3.52s/it]

3967 episode score is 409.08, average_xvel is 0.622


  0%|          | 235/100000 [13:35<96:58:37,  3.50s/it]

3978 episode score is 382.54, average_xvel is 0.460


  0%|          | 236/100000 [13:39<96:38:47,  3.49s/it]

3989 episode score is 405.20, average_xvel is 0.657


  0%|          | 237/100000 [13:42<95:39:29,  3.45s/it]

3999 episode score is 434.02, average_xvel is 0.545


  0%|          | 238/100000 [13:46<96:27:22,  3.48s/it]

4009 episode score is 433.29, average_xvel is 0.471


  0%|          | 239/100000 [13:49<96:39:18,  3.49s/it]

4020 episode score is 377.61, average_xvel is 0.393


  0%|          | 240/100000 [13:53<100:07:23,  3.61s/it]

4031 episode score is 421.84, average_xvel is 0.579


  0%|          | 241/100000 [13:57<102:43:11,  3.71s/it]

4041 episode score is 437.68, average_xvel is 0.521


  0%|          | 242/100000 [14:00<100:47:35,  3.64s/it]

4052 episode score is 407.77, average_xvel is 0.601


  0%|          | 243/100000 [14:04<98:56:02,  3.57s/it] 

4063 episode score is 417.24, average_xvel is 0.619


  0%|          | 244/100000 [14:07<97:52:58,  3.53s/it]

4074 episode score is 393.83, average_xvel is 0.563


  0%|          | 245/100000 [14:11<96:11:41,  3.47s/it]

4085 episode score is 413.46, average_xvel is 0.515


  0%|          | 246/100000 [14:14<97:06:19,  3.50s/it]

4096 episode score is 432.12, average_xvel is 0.566


  0%|          | 247/100000 [14:18<98:22:40,  3.55s/it]

4108 episode score is 398.89, average_xvel is 0.644


  0%|          | 248/100000 [14:21<98:28:53,  3.55s/it]

4118 episode score is 427.88, average_xvel is 0.530


  0%|          | 249/100000 [14:25<96:28:30,  3.48s/it]

4129 episode score is 414.53, average_xvel is 0.586


  0%|          | 250/100000 [14:28<96:16:58,  3.47s/it]

4139 episode score is 390.83, average_xvel is 0.310


  0%|          | 251/100000 [14:32<95:30:21,  3.45s/it]

4150 episode score is 421.15, average_xvel is 0.577


  0%|          | 252/100000 [14:35<96:21:39,  3.48s/it]

4161 episode score is 411.94, average_xvel is 0.571


  0%|          | 253/100000 [14:39<96:16:38,  3.47s/it]

4173 episode score is 395.22, average_xvel is 0.620


  0%|          | 254/100000 [14:42<96:48:59,  3.49s/it]

4185 episode score is 390.89, average_xvel is 0.642


  0%|          | 255/100000 [14:46<96:30:07,  3.48s/it]

4196 episode score is 386.62, average_xvel is 0.382


  0%|          | 256/100000 [14:49<96:48:47,  3.49s/it]

4207 episode score is 427.43, average_xvel is 0.601


  0%|          | 257/100000 [14:53<97:03:09,  3.50s/it]

4218 episode score is 416.33, average_xvel is 0.628


  0%|          | 258/100000 [14:56<96:16:40,  3.47s/it]

4230 episode score is 391.28, average_xvel is 0.649


  0%|          | 259/100000 [15:00<96:17:21,  3.48s/it]

4242 episode score is 399.25, average_xvel is 0.704


  0%|          | 260/100000 [15:03<96:08:35,  3.47s/it]

4254 episode score is 408.26, average_xvel is 0.691


  0%|          | 261/100000 [15:07<96:49:43,  3.49s/it]

4266 episode score is 393.06, average_xvel is 0.642


  0%|          | 262/100000 [15:10<96:17:08,  3.48s/it]

4278 episode score is 392.56, average_xvel is 0.615


  0%|          | 263/100000 [15:14<96:42:17,  3.49s/it]

4290 episode score is 399.68, average_xvel is 0.677


  0%|          | 264/100000 [15:17<96:33:12,  3.49s/it]

4302 episode score is 381.04, average_xvel is 0.431


  0%|          | 265/100000 [15:21<97:57:04,  3.54s/it]

4314 episode score is 411.32, average_xvel is 0.688


  0%|          | 266/100000 [15:24<98:02:53,  3.54s/it]

4325 episode score is 432.11, average_xvel is 0.625


  0%|          | 267/100000 [15:28<97:33:33,  3.52s/it]

4337 episode score is 399.50, average_xvel is 0.675


  0%|          | 268/100000 [15:31<96:44:26,  3.49s/it]

4349 episode score is 344.70, average_xvel is 0.324


  0%|          | 269/100000 [15:35<96:34:03,  3.49s/it]

4361 episode score is 409.85, average_xvel is 0.622


  0%|          | 270/100000 [15:38<97:45:23,  3.53s/it]

4373 episode score is 398.89, average_xvel is 0.629


  0%|          | 271/100000 [15:42<97:33:45,  3.52s/it]

4384 episode score is 411.85, average_xvel is 0.595


  0%|          | 272/100000 [15:45<96:09:14,  3.47s/it]

4395 episode score is 387.97, average_xvel is 0.408


  0%|          | 273/100000 [15:48<95:34:27,  3.45s/it]

4407 episode score is 389.88, average_xvel is 0.652


  0%|          | 274/100000 [15:52<94:52:24,  3.42s/it]

4419 episode score is 394.40, average_xvel is 0.653


  0%|          | 275/100000 [15:55<94:28:43,  3.41s/it]

4430 episode score is 422.48, average_xvel is 0.537


  0%|          | 276/100000 [15:59<95:11:19,  3.44s/it]

4442 episode score is 403.86, average_xvel is 0.610


  0%|          | 277/100000 [16:02<95:57:25,  3.46s/it]

4454 episode score is 410.51, average_xvel is 0.597


  0%|          | 278/100000 [16:06<97:07:51,  3.51s/it]

4466 episode score is 396.17, average_xvel is 0.659


  0%|          | 279/100000 [16:09<96:00:42,  3.47s/it]

4478 episode score is 410.69, average_xvel is 0.615


  0%|          | 280/100000 [16:13<96:55:20,  3.50s/it]

4490 episode score is 413.38, average_xvel is 0.630


  0%|          | 281/100000 [16:16<97:24:03,  3.52s/it]

4502 episode score is 412.77, average_xvel is 0.598


  0%|          | 282/100000 [16:20<98:07:30,  3.54s/it]

4514 episode score is 409.31, average_xvel is 0.646


  0%|          | 283/100000 [16:23<97:42:25,  3.53s/it]

4526 episode score is 406.79, average_xvel is 0.700


  0%|          | 284/100000 [16:27<96:30:59,  3.48s/it]

4538 episode score is 403.13, average_xvel is 0.672


  0%|          | 285/100000 [16:30<95:39:40,  3.45s/it]

4551 episode score is 401.93, average_xvel is 0.727


  0%|          | 286/100000 [16:34<96:58:24,  3.50s/it]

4563 episode score is 399.43, average_xvel is 0.678


  0%|          | 287/100000 [16:37<95:43:39,  3.46s/it]

4575 episode score is 402.37, average_xvel is 0.633


  0%|          | 288/100000 [16:41<95:49:05,  3.46s/it]

4588 episode score is 393.51, average_xvel is 0.699


  0%|          | 289/100000 [16:44<96:40:12,  3.49s/it]

4600 episode score is 404.68, average_xvel is 0.710


  0%|          | 290/100000 [16:48<95:24:37,  3.44s/it]

4612 episode score is 384.40, average_xvel is 0.521


  0%|          | 291/100000 [16:51<95:21:08,  3.44s/it]

4624 episode score is 410.08, average_xvel is 0.725


  0%|          | 292/100000 [16:54<94:52:39,  3.43s/it]

4637 episode score is 401.77, average_xvel is 0.731


  0%|          | 293/100000 [16:58<96:12:59,  3.47s/it]

4649 episode score is 411.18, average_xvel is 0.689


  0%|          | 294/100000 [17:01<95:32:13,  3.45s/it]

4662 episode score is 400.80, average_xvel is 0.705


  0%|          | 295/100000 [17:05<96:46:44,  3.49s/it]

4675 episode score is 402.05, average_xvel is 0.705


  0%|          | 296/100000 [17:09<97:42:45,  3.53s/it]

4688 episode score is 400.70, average_xvel is 0.736


  0%|          | 297/100000 [17:12<97:42:10,  3.53s/it]

4700 episode score is 414.81, average_xvel is 0.691


  0%|          | 298/100000 [17:15<96:50:16,  3.50s/it]

4712 episode score is 407.79, average_xvel is 0.719


  0%|          | 299/100000 [17:19<95:29:34,  3.45s/it]

4724 episode score is 412.85, average_xvel is 0.747
[32m >> Successfully saved normalization parameters! Mean: [ 1.14458893e+00  2.41602590e-01 -1.48743987e-02 -1.79151803e-02
  5.69954965e-01  1.62618563e-04 -2.65805947e-02  4.98204274e-01
  7.08109224e-01 -3.31116692e-01  1.67532611e-01 -2.82426882e-01
 -2.75199286e-01 -1.21046647e-01 -2.27105174e-01 -3.75607326e-01
 -2.74301501e-01], Std: [0.09748596 0.35212448 0.11500057 0.14093722 0.42848407 0.10795929
 0.15940977 0.52005529 0.94545015 0.57019443 2.27236393 2.45006005
 2.55251083 5.52821373 2.30902471 2.47346849 4.81653447] [0m
[32m >> Successfully saved models! [0m


  0%|          | 300/100000 [17:22<94:39:42,  3.42s/it]

4736 episode score is 416.63, average_xvel is 0.707


  0%|          | 301/100000 [17:26<95:03:49,  3.43s/it]

4749 episode score is 408.93, average_xvel is 0.752


  0%|          | 302/100000 [17:29<96:08:06,  3.47s/it]

4761 episode score is 414.96, average_xvel is 0.727


  0%|          | 303/100000 [17:33<95:19:15,  3.44s/it]

4774 episode score is 417.43, average_xvel is 0.758


  0%|          | 304/100000 [17:36<96:50:37,  3.50s/it]

4786 episode score is 420.17, average_xvel is 0.728


  0%|          | 305/100000 [17:40<96:12:11,  3.47s/it]

4798 episode score is 418.01, average_xvel is 0.709


  0%|          | 306/100000 [17:43<95:54:05,  3.46s/it]

4811 episode score is 413.21, average_xvel is 0.755


  0%|          | 307/100000 [17:47<96:19:16,  3.48s/it]

4823 episode score is 418.10, average_xvel is 0.699


  0%|          | 308/100000 [17:50<95:27:11,  3.45s/it]

4836 episode score is 413.99, average_xvel is 0.739


  0%|          | 309/100000 [17:54<96:42:40,  3.49s/it]

4849 episode score is 397.60, average_xvel is 0.793


  0%|          | 310/100000 [17:57<95:43:29,  3.46s/it]

4861 episode score is 423.84, average_xvel is 0.705


  0%|          | 311/100000 [18:00<95:22:17,  3.44s/it]

4874 episode score is 393.47, average_xvel is 0.768


  0%|          | 312/100000 [18:04<94:38:47,  3.42s/it]

4887 episode score is 404.63, average_xvel is 0.722


  0%|          | 313/100000 [18:07<95:56:17,  3.46s/it]

4900 episode score is 406.83, average_xvel is 0.756


  0%|          | 314/100000 [18:11<96:10:44,  3.47s/it]

4913 episode score is 400.38, average_xvel is 0.778


  0%|          | 315/100000 [18:14<95:21:36,  3.44s/it]

4926 episode score is 396.58, average_xvel is 0.783


  0%|          | 316/100000 [18:17<94:45:21,  3.42s/it]

4939 episode score is 397.96, average_xvel is 0.762


  0%|          | 317/100000 [18:21<94:29:06,  3.41s/it]

4952 episode score is 407.21, average_xvel is 0.722


  0%|          | 318/100000 [18:24<95:18:32,  3.44s/it]

4964 episode score is 414.84, average_xvel is 0.691


  0%|          | 319/100000 [18:28<94:22:45,  3.41s/it]

4977 episode score is 408.06, average_xvel is 0.766


  0%|          | 320/100000 [18:31<94:46:02,  3.42s/it]

4991 episode score is 394.58, average_xvel is 0.771


  0%|          | 321/100000 [18:35<96:01:51,  3.47s/it]

5005 episode score is 390.20, average_xvel is 0.744


  0%|          | 322/100000 [18:38<96:47:57,  3.50s/it]

5018 episode score is 395.70, average_xvel is 0.753


  0%|          | 323/100000 [18:42<95:24:22,  3.45s/it]

5031 episode score is 396.79, average_xvel is 0.777


  0%|          | 324/100000 [18:45<94:23:41,  3.41s/it]

5045 episode score is 387.13, average_xvel is 0.755


  0%|          | 325/100000 [18:48<95:07:42,  3.44s/it]

5059 episode score is 388.44, average_xvel is 0.761


  0%|          | 326/100000 [18:52<95:44:41,  3.46s/it]

5073 episode score is 388.42, average_xvel is 0.745


  0%|          | 327/100000 [18:56<96:35:56,  3.49s/it]

5086 episode score is 399.32, average_xvel is 0.749


  0%|          | 328/100000 [18:59<95:39:32,  3.46s/it]

5099 episode score is 400.04, average_xvel is 0.761


  0%|          | 329/100000 [19:02<94:50:14,  3.43s/it]

5112 episode score is 398.89, average_xvel is 0.751


  0%|          | 330/100000 [19:06<94:17:36,  3.41s/it]

5126 episode score is 393.51, average_xvel is 0.769


  0%|          | 331/100000 [19:09<95:28:17,  3.45s/it]

5140 episode score is 389.99, average_xvel is 0.728


  0%|          | 332/100000 [19:13<97:29:50,  3.52s/it]

5153 episode score is 393.36, average_xvel is 0.728


  0%|          | 333/100000 [19:17<99:32:27,  3.60s/it]

5167 episode score is 396.34, average_xvel is 0.751


  0%|          | 334/100000 [19:20<100:02:24,  3.61s/it]

5179 episode score is 423.79, average_xvel is 0.637


  0%|          | 335/100000 [19:24<98:38:36,  3.56s/it] 

5193 episode score is 393.10, average_xvel is 0.771


  0%|          | 336/100000 [19:27<98:06:03,  3.54s/it]

5206 episode score is 399.66, average_xvel is 0.735


  0%|          | 337/100000 [19:31<96:36:43,  3.49s/it]

5219 episode score is 408.87, average_xvel is 0.718


  0%|          | 338/100000 [19:34<96:07:35,  3.47s/it]

5233 episode score is 395.37, average_xvel is 0.752


  0%|          | 339/100000 [19:38<96:51:25,  3.50s/it]

5246 episode score is 401.31, average_xvel is 0.739


  0%|          | 340/100000 [19:41<95:51:06,  3.46s/it]

5259 episode score is 396.50, average_xvel is 0.727


  0%|          | 341/100000 [19:44<94:41:18,  3.42s/it]

5272 episode score is 399.50, average_xvel is 0.744


  0%|          | 342/100000 [19:48<94:03:12,  3.40s/it]

5285 episode score is 408.92, average_xvel is 0.744


  0%|          | 343/100000 [19:51<94:08:38,  3.40s/it]

5298 episode score is 410.99, average_xvel is 0.730


  0%|          | 344/100000 [19:54<94:38:54,  3.42s/it]

5312 episode score is 402.00, average_xvel is 0.773


  0%|          | 345/100000 [19:58<96:05:29,  3.47s/it]

5326 episode score is 400.33, average_xvel is 0.766


  0%|          | 346/100000 [20:02<96:48:05,  3.50s/it]

5339 episode score is 410.21, average_xvel is 0.752


  0%|          | 347/100000 [20:05<95:57:53,  3.47s/it]

5352 episode score is 412.15, average_xvel is 0.725


  0%|          | 348/100000 [20:08<95:47:06,  3.46s/it]

5365 episode score is 411.39, average_xvel is 0.773


  0%|          | 349/100000 [20:12<94:58:00,  3.43s/it]

5378 episode score is 413.86, average_xvel is 0.737


  0%|          | 350/100000 [20:15<95:01:47,  3.43s/it]

5391 episode score is 423.42, average_xvel is 0.747


  0%|          | 351/100000 [20:19<95:16:13,  3.44s/it]

5404 episode score is 406.59, average_xvel is 0.753


  0%|          | 352/100000 [20:22<94:34:44,  3.42s/it]

5418 episode score is 405.82, average_xvel is 0.756


  0%|          | 353/100000 [20:26<96:06:25,  3.47s/it]

5432 episode score is 397.63, average_xvel is 0.772


  0%|          | 354/100000 [20:29<96:12:50,  3.48s/it]

5445 episode score is 418.34, average_xvel is 0.742


  0%|          | 355/100000 [20:33<95:59:17,  3.47s/it]

5458 episode score is 419.09, average_xvel is 0.731


  0%|          | 356/100000 [20:36<96:03:48,  3.47s/it]

5471 episode score is 415.92, average_xvel is 0.741


  0%|          | 357/100000 [20:40<95:40:37,  3.46s/it]

5484 episode score is 415.31, average_xvel is 0.749


  0%|          | 358/100000 [20:43<95:07:16,  3.44s/it]

5497 episode score is 413.10, average_xvel is 0.750


  0%|          | 359/100000 [20:46<94:40:52,  3.42s/it]

5511 episode score is 407.91, average_xvel is 0.754


  0%|          | 360/100000 [20:50<96:15:36,  3.48s/it]

5524 episode score is 426.01, average_xvel is 0.759


  0%|          | 361/100000 [20:53<96:17:23,  3.48s/it]

5537 episode score is 422.30, average_xvel is 0.769


  0%|          | 362/100000 [20:57<95:45:38,  3.46s/it]

5550 episode score is 412.43, average_xvel is 0.771


  0%|          | 363/100000 [21:00<94:49:56,  3.43s/it]

5563 episode score is 414.05, average_xvel is 0.732


  0%|          | 364/100000 [21:04<94:35:48,  3.42s/it]

5576 episode score is 417.51, average_xvel is 0.734


  0%|          | 365/100000 [21:07<94:46:44,  3.42s/it]

5589 episode score is 420.61, average_xvel is 0.768


  0%|          | 366/100000 [21:10<94:40:46,  3.42s/it]

5602 episode score is 419.52, average_xvel is 0.770


  0%|          | 367/100000 [21:14<94:33:06,  3.42s/it]

5615 episode score is 414.64, average_xvel is 0.745


  0%|          | 368/100000 [21:17<94:07:49,  3.40s/it]

5628 episode score is 415.93, average_xvel is 0.762


  0%|          | 369/100000 [21:21<93:30:11,  3.38s/it]

5641 episode score is 422.01, average_xvel is 0.762


  0%|          | 370/100000 [21:24<93:43:06,  3.39s/it]

5654 episode score is 424.74, average_xvel is 0.748


  0%|          | 371/100000 [21:27<94:12:03,  3.40s/it]

5667 episode score is 420.06, average_xvel is 0.758


  0%|          | 372/100000 [21:31<94:07:49,  3.40s/it]

5680 episode score is 420.35, average_xvel is 0.745


  0%|          | 373/100000 [21:34<93:55:52,  3.39s/it]

5693 episode score is 437.01, average_xvel is 0.737


  0%|          | 374/100000 [21:38<95:22:04,  3.45s/it]

5706 episode score is 432.89, average_xvel is 0.727


  0%|          | 375/100000 [21:41<96:48:11,  3.50s/it]

5719 episode score is 417.41, average_xvel is 0.735


  0%|          | 376/100000 [21:45<95:53:09,  3.46s/it]

5732 episode score is 427.93, average_xvel is 0.756


  0%|          | 377/100000 [21:48<95:37:33,  3.46s/it]

5746 episode score is 419.10, average_xvel is 0.761


  0%|          | 378/100000 [21:52<96:48:06,  3.50s/it]

5759 episode score is 431.21, average_xvel is 0.738


  0%|          | 379/100000 [21:55<96:40:43,  3.49s/it]

5772 episode score is 426.20, average_xvel is 0.730


  0%|          | 380/100000 [21:59<96:15:32,  3.48s/it]

5785 episode score is 418.56, average_xvel is 0.730


  0%|          | 381/100000 [22:02<95:27:44,  3.45s/it]

5798 episode score is 420.88, average_xvel is 0.728


  0%|          | 382/100000 [22:05<95:04:11,  3.44s/it]

5811 episode score is 423.07, average_xvel is 0.718


  0%|          | 383/100000 [22:09<94:58:29,  3.43s/it]

5824 episode score is 435.38, average_xvel is 0.675


  0%|          | 384/100000 [22:12<96:14:28,  3.48s/it]

5837 episode score is 424.28, average_xvel is 0.740


  0%|          | 385/100000 [22:16<95:25:24,  3.45s/it]

5850 episode score is 423.46, average_xvel is 0.714


  0%|          | 386/100000 [22:19<95:09:49,  3.44s/it]

5863 episode score is 422.30, average_xvel is 0.707


  0%|          | 387/100000 [22:23<94:59:01,  3.43s/it]

5876 episode score is 432.88, average_xvel is 0.734


  0%|          | 388/100000 [22:26<95:16:56,  3.44s/it]

5889 episode score is 429.53, average_xvel is 0.738


  0%|          | 389/100000 [22:30<95:03:24,  3.44s/it]

5902 episode score is 440.20, average_xvel is 0.719


  0%|          | 390/100000 [22:33<95:53:42,  3.47s/it]

5915 episode score is 433.12, average_xvel is 0.727


  0%|          | 391/100000 [22:37<96:00:08,  3.47s/it]

5928 episode score is 444.83, average_xvel is 0.737


  0%|          | 392/100000 [22:40<96:47:20,  3.50s/it]

5941 episode score is 436.97, average_xvel is 0.736


  0%|          | 393/100000 [22:44<96:36:32,  3.49s/it]

5954 episode score is 438.56, average_xvel is 0.728


  0%|          | 394/100000 [22:47<96:41:49,  3.49s/it]

5967 episode score is 437.70, average_xvel is 0.724


  0%|          | 395/100000 [22:51<96:42:00,  3.50s/it]

5980 episode score is 436.38, average_xvel is 0.723


  0%|          | 396/100000 [22:54<96:33:24,  3.49s/it]

5993 episode score is 440.42, average_xvel is 0.719


  0%|          | 397/100000 [22:58<96:54:12,  3.50s/it]

6006 episode score is 429.48, average_xvel is 0.729


  0%|          | 398/100000 [23:01<96:09:16,  3.48s/it]

6019 episode score is 434.15, average_xvel is 0.710


  0%|          | 399/100000 [23:05<96:24:48,  3.48s/it]

6032 episode score is 441.03, average_xvel is 0.728
[32m >> Successfully saved normalization parameters! Mean: [ 1.1285438   0.31262799 -0.01033344 -0.0133515   0.54776802  0.00655258
 -0.04311744  0.41310322  0.91897338 -0.33683917  0.29113736 -0.23117479
 -0.25615101 -0.24654145 -0.16677003 -0.48325432 -0.41619561], Std: [0.10427151 0.3659931  0.1023917  0.12680019 0.47541196 0.09446751
 0.18249462 0.5953786  1.02635334 0.5491791  2.0276562  2.19715251
 2.37141671 5.17691082 2.01517368 2.36540703 4.56627658] [0m
[32m >> Successfully saved models! [0m


  0%|          | 400/100000 [23:08<97:01:53,  3.51s/it]

6045 episode score is 444.90, average_xvel is 0.710


  0%|          | 401/100000 [23:12<97:23:14,  3.52s/it]

6058 episode score is 447.45, average_xvel is 0.712


  0%|          | 402/100000 [23:15<97:36:22,  3.53s/it]

6070 episode score is 453.63, average_xvel is 0.705


  0%|          | 403/100000 [23:19<96:08:57,  3.48s/it]

6083 episode score is 454.89, average_xvel is 0.747


  0%|          | 404/100000 [23:22<98:33:55,  3.56s/it]

6096 episode score is 444.84, average_xvel is 0.717


  0%|          | 405/100000 [23:26<103:23:37,  3.74s/it]

6109 episode score is 445.11, average_xvel is 0.716


  0%|          | 406/100000 [23:30<101:18:38,  3.66s/it]

6122 episode score is 447.53, average_xvel is 0.749


  0%|          | 407/100000 [23:33<99:59:30,  3.61s/it] 

6134 episode score is 457.97, average_xvel is 0.714


  0%|          | 408/100000 [23:37<97:11:21,  3.51s/it]

6147 episode score is 455.69, average_xvel is 0.741


  0%|          | 409/100000 [23:40<97:38:06,  3.53s/it]

6159 episode score is 459.33, average_xvel is 0.711


  0%|          | 410/100000 [23:44<96:08:28,  3.48s/it]

6172 episode score is 449.17, average_xvel is 0.725


  0%|          | 411/100000 [23:47<96:30:24,  3.49s/it]

6185 episode score is 446.37, average_xvel is 0.721


  0%|          | 412/100000 [23:51<96:30:12,  3.49s/it]

6198 episode score is 453.08, average_xvel is 0.732


  0%|          | 413/100000 [23:54<96:29:25,  3.49s/it]

6211 episode score is 452.01, average_xvel is 0.714


  0%|          | 414/100000 [23:58<97:10:17,  3.51s/it]

6224 episode score is 455.37, average_xvel is 0.726


  0%|          | 415/100000 [24:01<97:30:29,  3.52s/it]

6237 episode score is 460.22, average_xvel is 0.730


  0%|          | 416/100000 [24:05<97:54:38,  3.54s/it]

6249 episode score is 460.98, average_xvel is 0.716


  0%|          | 417/100000 [24:08<96:07:40,  3.48s/it]

6261 episode score is 465.71, average_xvel is 0.724


  0%|          | 418/100000 [24:12<95:00:56,  3.43s/it]

6273 episode score is 462.81, average_xvel is 0.724


  0%|          | 419/100000 [24:15<94:07:24,  3.40s/it]

6286 episode score is 465.05, average_xvel is 0.727


  0%|          | 420/100000 [24:18<95:47:02,  3.46s/it]

6298 episode score is 462.91, average_xvel is 0.727


  0%|          | 421/100000 [24:22<94:37:59,  3.42s/it]

6311 episode score is 457.19, average_xvel is 0.728


  0%|          | 422/100000 [24:26<100:51:42,  3.65s/it]

6324 episode score is 461.71, average_xvel is 0.717


  0%|          | 423/100000 [24:30<101:18:44,  3.66s/it]

6336 episode score is 458.09, average_xvel is 0.681


  0%|          | 424/100000 [24:33<100:24:13,  3.63s/it]

6349 episode score is 457.11, average_xvel is 0.701


  0%|          | 425/100000 [24:37<104:08:17,  3.76s/it]

6361 episode score is 463.77, average_xvel is 0.707


  0%|          | 426/100000 [24:41<100:43:31,  3.64s/it]

6373 episode score is 463.34, average_xvel is 0.716


  0%|          | 427/100000 [24:44<98:06:56,  3.55s/it] 

6385 episode score is 466.58, average_xvel is 0.708


  0%|          | 428/100000 [24:47<96:32:18,  3.49s/it]

6398 episode score is 459.48, average_xvel is 0.726


  0%|          | 429/100000 [24:51<97:14:18,  3.52s/it]

6411 episode score is 460.35, average_xvel is 0.716


  0%|          | 430/100000 [24:54<98:02:27,  3.54s/it]

6424 episode score is 449.04, average_xvel is 0.695


  0%|          | 431/100000 [24:58<97:59:09,  3.54s/it]

6437 episode score is 451.31, average_xvel is 0.710


  0%|          | 432/100000 [25:02<97:39:32,  3.53s/it]

6450 episode score is 439.58, average_xvel is 0.692


  0%|          | 433/100000 [25:05<97:07:44,  3.51s/it]

6462 episode score is 463.86, average_xvel is 0.702


  0%|          | 434/100000 [25:08<96:09:49,  3.48s/it]

6475 episode score is 460.80, average_xvel is 0.703


  0%|          | 435/100000 [25:12<97:07:39,  3.51s/it]

6488 episode score is 452.21, average_xvel is 0.706


  0%|          | 436/100000 [25:15<96:54:55,  3.50s/it]

6500 episode score is 468.93, average_xvel is 0.694


  0%|          | 437/100000 [25:19<95:18:56,  3.45s/it]

6513 episode score is 454.74, average_xvel is 0.671


  0%|          | 438/100000 [25:22<96:01:39,  3.47s/it]

6526 episode score is 459.20, average_xvel is 0.683


  0%|          | 439/100000 [25:26<96:38:05,  3.49s/it]

6539 episode score is 458.23, average_xvel is 0.677


  0%|          | 440/100000 [25:29<97:04:47,  3.51s/it]

6551 episode score is 462.80, average_xvel is 0.674


  0%|          | 441/100000 [25:33<95:23:06,  3.45s/it]

6563 episode score is 468.71, average_xvel is 0.655


  0%|          | 442/100000 [25:36<94:45:43,  3.43s/it]

6576 episode score is 463.83, average_xvel is 0.687


  0%|          | 443/100000 [25:40<95:56:45,  3.47s/it]

6589 episode score is 451.68, average_xvel is 0.668


  0%|          | 444/100000 [25:43<96:07:14,  3.48s/it]

6602 episode score is 450.50, average_xvel is 0.656


  0%|          | 445/100000 [25:47<96:25:06,  3.49s/it]

6615 episode score is 457.43, average_xvel is 0.659


  0%|          | 446/100000 [25:50<96:18:20,  3.48s/it]

6627 episode score is 463.98, average_xvel is 0.681


  0%|          | 447/100000 [25:53<94:57:07,  3.43s/it]

6640 episode score is 446.14, average_xvel is 0.669


  0%|          | 448/100000 [25:57<95:21:50,  3.45s/it]

6653 episode score is 451.44, average_xvel is 0.668


  0%|          | 449/100000 [26:00<95:53:05,  3.47s/it]

6666 episode score is 463.73, average_xvel is 0.630


  0%|          | 450/100000 [26:04<97:21:30,  3.52s/it]

6679 episode score is 459.07, average_xvel is 0.674


  0%|          | 451/100000 [26:08<97:49:35,  3.54s/it]

6692 episode score is 457.95, average_xvel is 0.661


  0%|          | 452/100000 [26:11<98:09:44,  3.55s/it]

6704 episode score is 460.40, average_xvel is 0.650


  0%|          | 453/100000 [26:15<96:14:00,  3.48s/it]

6716 episode score is 459.98, average_xvel is 0.642


  0%|          | 454/100000 [26:18<94:39:54,  3.42s/it]

6729 episode score is 453.74, average_xvel is 0.651


  0%|          | 455/100000 [26:21<95:24:05,  3.45s/it]

6742 episode score is 459.02, average_xvel is 0.648


  0%|          | 456/100000 [26:25<96:16:58,  3.48s/it]

6755 episode score is 451.86, average_xvel is 0.648


  0%|          | 457/100000 [26:28<96:17:01,  3.48s/it]

6768 episode score is 462.22, average_xvel is 0.673


  0%|          | 458/100000 [26:32<96:42:00,  3.50s/it]

6780 episode score is 472.47, average_xvel is 0.666


  0%|          | 459/100000 [26:35<95:23:50,  3.45s/it]

6793 episode score is 455.90, average_xvel is 0.652


  0%|          | 460/100000 [26:39<95:55:46,  3.47s/it]

6805 episode score is 460.64, average_xvel is 0.620


  0%|          | 461/100000 [26:42<95:16:28,  3.45s/it]

6818 episode score is 457.07, average_xvel is 0.647


  0%|          | 462/100000 [26:46<95:44:01,  3.46s/it]

6831 episode score is 460.90, average_xvel is 0.647


  0%|          | 463/100000 [26:49<96:26:16,  3.49s/it]

6844 episode score is 458.32, average_xvel is 0.644


  0%|          | 464/100000 [26:53<96:47:43,  3.50s/it]

6857 episode score is 457.07, average_xvel is 0.657


  0%|          | 465/100000 [26:56<96:51:34,  3.50s/it]

6870 episode score is 453.35, average_xvel is 0.645


  0%|          | 466/100000 [27:00<96:46:44,  3.50s/it]

6883 episode score is 453.56, average_xvel is 0.643


  0%|          | 467/100000 [27:03<96:32:45,  3.49s/it]

6896 episode score is 446.99, average_xvel is 0.633


  0%|          | 468/100000 [27:07<96:00:42,  3.47s/it]

6909 episode score is 456.29, average_xvel is 0.639


  0%|          | 469/100000 [27:10<96:10:00,  3.48s/it]

6921 episode score is 463.77, average_xvel is 0.614


  0%|          | 470/100000 [27:13<94:54:32,  3.43s/it]

6934 episode score is 453.06, average_xvel is 0.645


  0%|          | 471/100000 [27:17<95:08:17,  3.44s/it]

6947 episode score is 457.44, average_xvel is 0.637


  0%|          | 472/100000 [27:20<95:46:32,  3.46s/it]

6959 episode score is 460.96, average_xvel is 0.603


  0%|          | 473/100000 [27:24<94:32:57,  3.42s/it]

6972 episode score is 456.22, average_xvel is 0.623


  0%|          | 474/100000 [27:27<95:21:55,  3.45s/it]

6984 episode score is 460.43, average_xvel is 0.607


  0%|          | 475/100000 [27:31<94:07:51,  3.40s/it]

6996 episode score is 464.28, average_xvel is 0.608


  0%|          | 476/100000 [27:34<93:30:42,  3.38s/it]

7009 episode score is 455.21, average_xvel is 0.633


  0%|          | 477/100000 [27:37<94:21:14,  3.41s/it]

7022 episode score is 454.71, average_xvel is 0.634


  0%|          | 478/100000 [27:41<94:57:36,  3.43s/it]

7034 episode score is 463.96, average_xvel is 0.626


  0%|          | 479/100000 [27:44<93:46:15,  3.39s/it]

7047 episode score is 443.01, average_xvel is 0.614


  0%|          | 480/100000 [27:48<94:06:13,  3.40s/it]

7060 episode score is 456.61, average_xvel is 0.625


  0%|          | 481/100000 [27:51<94:57:30,  3.44s/it]

7073 episode score is 451.95, average_xvel is 0.612


  0%|          | 482/100000 [27:55<95:26:47,  3.45s/it]

7086 episode score is 463.95, average_xvel is 0.631


  0%|          | 483/100000 [27:58<96:14:50,  3.48s/it]

7099 episode score is 458.71, average_xvel is 0.615


  0%|          | 484/100000 [28:02<96:40:02,  3.50s/it]

7112 episode score is 457.19, average_xvel is 0.615


  0%|          | 485/100000 [28:05<97:10:04,  3.52s/it]

7125 episode score is 449.82, average_xvel is 0.613


  0%|          | 486/100000 [28:09<96:48:52,  3.50s/it]

7138 episode score is 461.79, average_xvel is 0.627


  0%|          | 487/100000 [28:12<97:14:21,  3.52s/it]

7151 episode score is 459.09, average_xvel is 0.609


  0%|          | 488/100000 [28:16<97:15:43,  3.52s/it]

7164 episode score is 458.03, average_xvel is 0.601


  0%|          | 489/100000 [28:20<96:08:36,  3.48s/it]


KeyboardInterrupt: 

In [None]:
print('average score: ', score_avg)
print('average xvel:  ', xvel_avg)