In [1]:
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as T
from torch.nn.init import kaiming_uniform_
from torch.distributions import Normal

In [2]:
torch.normal(mean=torch.arange(1., 6.), std=torch.arange(1., 6.))

tensor([0.7506, 1.1183, 5.6112, 3.3301, 2.9043])

In [3]:
import numpy as np
import gym
from tqdm import tqdm
import random as rand
from itertools import count

In [4]:
class ReplayMemory():
    def __init__(self,capacity):   
        self.capacity = capacity
        self.memory = []
        self.push_count = 0
        
    def push(self, experience):
        if len(self.memory) < self.capacity:
            self.memory.append(experience)
        else:
            self.memory[self.push_count%self.capacity] = experience
        self.push_count+=1
    
    def sample(self, batch_size):
        return rand.sample(self.memory,batch_size)
    
    def can_provide_sample(self, batch_size):
        return len(self.memory)>=batch_size
    
    def update_td_error(self, sampled_experiences):
        for sampled_idx,sampled_exp in enumerate(sampled_experiences):
            for mem_idx, mem_exp in enumerate(self.memory):
                if mem_exp.timestep == sampled_exp.timestep:
                    self.memory[mem_idx] = sampled_exp #update memory
                    break
        
    def get_memory_values(self):
        return self.memory    

In [5]:
def extract_tensors(experiences):
    #print(".....................................................")
    #print(experiences)
    batch = Xp(*zip(*experiences))
    state = np.stack(batch.state) #stack
    action = np.stack(batch.action)
    next_state = np.stack(batch.next_state)
    reward = np.stack(batch.reward)
    done = np.stack(batch.done)
    abs_td_error = np.stack(batch.abs_td_error)
    timestep = np.stack(batch.timestep)
    return state,action,next_state,reward,done,abs_td_error,timestep

In [6]:
def rebuild_experiences(state, action, next_state, reward, done, abs_error, timestep):
    exp_list = []
    for idx_ in range(len(state)):
        exp_list.append(\
                        Xp(state[idx_], action[idx_], next_state[idx_], reward[idx_],\
                           done[idx_], abs_error[idx_], timestep[idx_]))
    return exp_list

In [7]:
from collections import namedtuple
Xp = namedtuple('Experience',
                        ('state', 'action', 'next_state', 'reward', 'done', 'abs_td_error','timestep'))
Xp_points = Xp(5,6,7,8,9,10,11)
Xp_points

Experience(state=5, action=6, next_state=7, reward=8, done=9, abs_td_error=10, timestep=11)

In [8]:
def prioritize_samples(experience_samples, alpha, beta):
    state,action,next_state,reward,done,abs_td_error,timesteps = extract_tensors(experience_samples)
    #rank based
    #('state', 'action', 'next_state', 'reward', 'done', 'abs_td_error','timestep')
    abs_td_error  = np.expand_dims(abs_td_error, axis=1)
    abs_td_error = torch.tensor(abs_td_error)
    abs_td_error, indices_ = abs_td_error.sort(0, descending=True)#big to small
    indices = np.arange(1, len(abs_td_error)+1)
    priorities = 1.0/indices
    priorities = priorities**alpha#scale by alpha
    priorities = np.expand_dims(priorities, axis=1)
    probabilities = priorities/np.sum(priorities, axis=0)#sums up to 1(or 0.999999)
    assert np.isclose(probabilities.sum(), 1.0)#ensures probs add up to 1
    
    number_of_samples  = len(probabilities)
    weight_importance_ = number_of_samples*probabilities
    weight_importance_ = weight_importance_**-beta
    weight_importance_max = np.max(weight_importance_)
    weight_importance_scaled = weight_importance_/weight_importance_max
    return weight_importance_scaled, indices_ #return weight important samples, return indices for re_arranging sampled experiences

In [9]:
class linearApproximator_FCGSAP(nn.Module):
    def __init__(self,state_shape,outputs,hidden_dims=(32,32), log_entropy_lr =0.0001,\
                log_std_dev_min=-20, log_std_dev_max= 2, epsilon = 1e-6):
        super(linearApproximator_FCGSAP, self).__init__()
        self.input_size = state_shape
        self.out = outputs
        self.log_std_dev_min = log_std_dev_min
        self.log_std_dev_max = log_std_dev_max
        self.epsilon = epsilon
        self.device = torch.device("cuda" if torch.cuda.is_available()\
                                   else "cpu")
        
        self.fc1  = nn.Linear(self.input_size,hidden_dims[0])
        self.hidden_layers = nn.ModuleList()
        for i in range(len(hidden_dims)-1):
            hidden_layer = nn.Linear(\
                                hidden_dims[i], hidden_dims[i+1])
            self.hidden_layers.append(hidden_layer)
        
        self.output_layer_distribution  = nn.Linear(hidden_dims[-1],self.out)
        self.output_layer_mean = nn.Linear(hidden_dims[-1],self.out)
        
        self.target_entropy = -float(self.out)
        #self.target_entropy = self.target_entropy.float() 
        #according to the eq, log alpha is a learnable parameter
        self.log_alpha = torch.zeros(1,\
                                     requires_grad=True,\
                                     device = self.device)
        self.log_alpha_optimizer = torch.optim.Adam([self.log_alpha],\
                                                    lr=log_entropy_lr)
                                     
        self.to(self.device)
        
    def forward(self, state_shape):
        if not isinstance(state_shape, torch.Tensor):
            state_shape = torch.tensor(state_shape, dtype=torch.float32)
        state_shape = state_shape.to(self.device)
        x = self.fc1(state_shape)
        x = F.relu(x)
        
        for hidden_layer in self.hidden_layers:
            x = F.relu(hidden_layer(x))
        
        distribution = self.output_layer_distribution(x)#logits, preferences of actions
        mean   = self.output_layer_mean(x)
        distribution = torch.clamp(distribution, self.log_std_dev_min, self.log_std_dev_max)
        return mean, distribution
        
    def full_pass(self, state):
        mean, distribution = self.forward(state)
        pi_s = Normal(mean, distribution.exp())
        pre_sampled_actions = pi_s.rsample()
        sampled_actions = torch.tanh(pre_sampled_actions)#scale actions between -1 and 1
        #we also rescale our logprobs to match action space
        log_probs = pi_s.log_prob(pre_sampled_actions) - \
                                    torch.log((1 - sampled_actions.pow(2)).clamp(0,1) + \
                                                self.epsilon)
        log_probs = log_probs.sum(dim=1, keepdim=True)
        return sampled_actions, log_probs, mean

In [10]:
class linearApproximator_FCQV(nn.Module):#Q value of state action pair
    def __init__(self,state_shape,action_outputs_size,hidden_dims=(32,32)):
        super(linearApproximator_FCQV, self).__init__()
        self.input_size = state_shape
        self.action_outputs_size = action_outputs_size
        self.device = torch.device("cuda" if torch.cuda.is_available()\
                                   else "cpu")
        
        self.fc1  = nn.Linear(self.input_size,hidden_dims[0])
        self.hidden_layers = nn.ModuleList()
        for i in range(len(hidden_dims)-1):
            hidden_input_layer = hidden_dims[i]
            if i == 0:
                hidden_input_layer += self.action_outputs_size #increased to account for size/number of actions
            hidden_layer = nn.Linear(\
                                hidden_input_layer, hidden_dims[i+1])
            self.hidden_layers.append(hidden_layer)
        
        self.output_layer  = nn.Linear(hidden_dims[-1],1)
        self.to(self.device)
        
    def forward(self, state_shape, action_shape):
        if not isinstance(state_shape, torch.Tensor):
            state_shape = torch.tensor(state_shape, dtype=torch.float32).to(self.device)
        if not isinstance(action_shape, torch.Tensor):
            action_shape = torch.tensor(action_shape, dtype=torch.float32).to(self.device)
                    
        x = self.fc1(state_shape)
        x = F.relu(x)
        
        for idx, hidden_layer in enumerate(self.hidden_layers):
            if idx == 0:
                x = torch.cat((x, action_shape), dim=1)
            x = F.relu(hidden_layer(x))
        
        q_value = self.output_layer(x)#logits, preferences of actions
        return q_value

In [11]:
def update_networks(online_q_network_a, online_q_network_b,\
                    offline_q_network_a, offline_q_network_b, tau):
        
    for target_weights, online_weights in zip(offline_q_network_a.parameters(), online_q_network_a.parameters()):
        target_weight_update = (1.0 - tau)*target_weights.data
        online_weight_update = tau*online_weights.data
        sum_up = target_weight_update + online_weight_update
        target_weights.data.copy_(sum_up)
        
    for target_weights, online_weights in zip(offline_q_network_b.parameters(), online_q_network_b.parameters()):
        target_weight_update = (1.0 - tau)*target_weights.data
        online_weight_update = tau*online_weights.data
        sum_up = target_weight_update + online_weight_update
        target_weights.data.copy_(sum_up)

    return offline_q_network_a, offline_q_network_b

In [21]:
def update_online_model(experience_samples,\
                        online_policy_network, online_q_network_a, online_q_network_b,\
                        online_policy_optimizer, online_q_optimizer_a, online_q_optimizer_b,\
                        offline_q_network_a, offline_q_network_b,\
                        gamma, weighted_importance, indices):
    
    states, actions, next_states, rewards, done, td_errors, timesteps = extract_tensors(experience_samples)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    #rearrange first
    indices = np.array(indices)#have to be same size as batch size(already taken care of)
    indices = np.squeeze(indices)
    states = states[indices,:]
    states = np.squeeze(states)
    actions = actions[indices]
    next_states = next_states[indices,:]
    next_states = np.squeeze(next_states)
    rewards = rewards[indices]
    done = done[indices]
    td_errors = td_errors[indices]
    timesteps = timesteps[indices]    
    
    states = torch.tensor(states).float().to(device)
    actions = torch.tensor(actions)
    actions = actions.float().to(device)
    next_states=torch.tensor(next_states).float().to(device)
    rewards = torch.tensor(rewards).float().to(device)
    rewards = rewards.unsqueeze(1)
    done = torch.tensor(done).float().to(device)
    done = done.unsqueeze(1)
    weighted_importance = torch.tensor(weighted_importance).float().to(device)
    
    
    current_actions,log_pi, _ = online_policy_network.full_pass(states)
    target_alpha = (log_pi +\
                    online_policy_network.target_entropy).detach()
    target_alpha_loss = -(online_policy_network.log_alpha *\
                         target_alpha).mean()
    online_policy_network.log_alpha_optimizer.zero_grad()
    target_alpha_loss.backward()
    online_policy_network.log_alpha_optimizer.step()
    optimized_alpha = online_policy_network.log_alpha.exp()
    
    
    max_q_sa_online_a = online_q_network_a(states, current_actions.detach())
    max_q_sa_online_b = online_q_network_b(states, current_actions.detach())
    max_q_sa_online_a*=weighted_importance
    max_q_sa_online_b*=weighted_importance
    max_q_online__ = torch.min(max_q_sa_online_a, max_q_sa_online_b)
    max_q_online__*=(1 - done)
    #print("max_q_online__", max_q_online__)
    predicted_online_action_policy,\
                log_pi_ns, _ = online_policy_network.full_pass(next_states)

    policy_loss = (optimized_alpha.detach() * log_pi - max_q_online__.detach()).mean()#policy loss
    
    max_q_sa_offline_a = offline_q_network_a(next_states, predicted_online_action_policy)
    max_q_sa_offline_b = offline_q_network_b(next_states, predicted_online_action_policy)
    max_q_sa_offline = torch.min(max_q_sa_offline_a, max_q_sa_offline_b)
    TWIN_target = max_q_sa_offline
    TWIN_target*=weighted_importance
    
    TWIN_target = TWIN_target.detach()
    TWIN_target *=(1 - done)
    TWIN_target = TWIN_target - optimized_alpha * log_pi_ns
    TWIN_target = rewards + gamma*TWIN_target
    
    #abs_a = abs(TWIN_target.detach().cpu().numpy() - max_q_sa_online_a.detach().cpu().numpy())
    #abs_b = abs(TWIN_target.detach().cpu().numpy() - max_q_sa_online_b.detach().cpu().numpy())
    #ovr_abs_update = (abs_a + abs_b)/2 #we get the mean(not done)
    
    TWIN_target = TWIN_target.detach()
    loss_func = torch.nn.SmoothL1Loss()
    
    
    q_sa_online_a = online_q_network_a(states, actions)
    q_sa_online_b = online_q_network_b(states, actions)
    
    abs_a = abs(TWIN_target.detach().cpu().numpy() - max_q_sa_online_a.detach().cpu().numpy())
    abs_b = abs(TWIN_target.detach().cpu().numpy() - max_q_sa_online_b.detach().cpu().numpy())
    ovr_abs_update = (abs_a + abs_b)/2 #we get the mean(not done)
    
    
    
    q_online_value_loss_a = loss_func(q_sa_online_a,\
                                             TWIN_target)
    q_online_value_loss_b = loss_func(q_sa_online_b,\
                                             TWIN_target)
    online_q_optimizer_a.zero_grad()
    q_online_value_loss_a.backward()
    online_q_optimizer_a.step()
    online_q_optimizer_b.zero_grad()
    q_online_value_loss_b.backward()
    online_q_optimizer_b.step()
    
    #One of the difference between SAC and TD3 is SAC only uses one online policy
    #there is also no delay in policy updates in SAC
    #policy_loss = -(pre_optimized_alpha * log_pi - max_q_online__).mean()
    online_policy_optimizer.zero_grad()
    policy_loss.backward()
    online_policy_optimizer.step()
        
    states, actions, next_states, rewards, done, td_errors, timesteps = extract_tensors(experience_samples)
    experiences_rebuilded = rebuild_experiences(states, actions, next_states, rewards, done, ovr_abs_update, timesteps)
    return experiences_rebuilded

In [22]:
def query_error(online_policy_network, offline_q_network_a, offline_q_network_b,\
                online_q_network_a, online_q_network_b, state, action, next_state, reward, gamma):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    state = torch.tensor(state).float().to(device)
    state = state.unsqueeze(0)
    next_state = torch.tensor(next_state).float().to(device)
    next_state = next_state.unsqueeze(0)
    
    alpha = online_policy_network.log_alpha.exp()
    
    ns_actions,log_pi_ns, _ = online_policy_network.full_pass(next_state)
    q_target_next_states_action_a = offline_q_network_a(next_state,\
                                                    ns_actions.detach())
    q_target_next_states_action_b = offline_q_network_b(next_state,\
                                                    ns_actions.detach())
    
    
    TWIN_target = torch.min(q_target_next_states_action_a, q_target_next_states_action_b)
    TWIN_target = TWIN_target - alpha * log_pi_ns
    TWIN_target = reward + (gamma*TWIN_target.detach())
    
    
    current_action,_, _ = online_policy_network.full_pass(state)
    #print("state: ", state.shape)
    #print("action: ", action.shape)
    action = np.expand_dims(action, axis=0)
    #print("action: ", action.shape)
    q_online_state_action_val_a = online_q_network_a(state, action)
    q_online_state_action_val_b = online_q_network_b(state, action)
    
    abs_a = abs(TWIN_target - q_online_state_action_val_a)
    abs_b = abs(TWIN_target - q_online_state_action_val_b)
    abs_stack = (abs_a + abs_b)/2
    #print("abs querry error stacked: ", abs_stack.shape)
    ovr_abs_update = abs_stack
    #print("abs querry error mean: ", ovr_abs_update.shape)
    return ovr_abs_update.detach().cpu().numpy()

In [23]:
def freeze_model(model):
    for param in model.parameters():
        param.requires_grad = False
    return model

In [24]:
def select_action(state, online_policy_network):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    state = torch.tensor(state).float().to(device)
    state = state.unsqueeze(0)
    with torch.no_grad():
        actions,log_pi_, _ = online_policy_network.full_pass(state)
        actions = actions.cpu().detach()
        actions = actions.data.numpy().squeeze()
    return actions

In [25]:
def SAC_PER(env,
         gamma=0.99,
         alpha_pr=0.6,
         beta_pr=0.3,
         memory_size = 50000,
         tau = 0.01,
         offline_update = 1000,
         min_sample_size=200,
         batch_size = 64,
         n_ep=2000,
         max_steps = 100000
         ):
    
    
    observation_space = len(env.reset())
    action_space_high, action_space_low = env.action_space.high, env.action_space.low
    n_actions = len(action_space_high)
    online_policy_network = linearApproximator_FCGSAP(observation_space,n_actions,\
                                     hidden_dims=(128,64))
    online_q_network_a = linearApproximator_FCQV(observation_space,\
                                     n_actions,hidden_dims=(128,64))
    online_q_network_b = linearApproximator_FCQV(observation_space,\
                                     n_actions,hidden_dims=(128,64))
    
    offline_q_network_a = linearApproximator_FCQV(observation_space,\
                                     n_actions,hidden_dims=(128,64))
    offline_q_network_b = linearApproximator_FCQV(observation_space,\
                                     n_actions,hidden_dims=(128,64))
    

    offline_q_network_a.eval()
    offline_q_network_a = freeze_model(offline_q_network_a)
    offline_q_network_b.eval()
    offline_q_network_b = freeze_model(offline_q_network_b)
    
    online_policy_optimizer    = torch.optim.Adam(online_policy_network.parameters(),lr=0.0008)
    online_q_optimizer_a = torch.optim.Adam(online_q_network_a.parameters(),lr=0.0008)
    online_q_optimizer_b = torch.optim.Adam(online_q_network_b.parameters(),lr=0.0008)
    
    memory = ReplayMemory(memory_size)
    
    t_step = 0 #important
    reward_per_ep = []
    
    
    for e in tqdm(range(n_ep)):
        state = env.reset()
        reward_accumulated = 0
        
        while True:
            env.render()
            action = select_action(state, online_policy_network)
            
            next_state, reward, done, info = env.step(action)
            td_error = query_error(online_policy_network, offline_q_network_a, offline_q_network_b,\
                online_q_network_a, online_q_network_b, state, action, next_state, reward, gamma)
            #print("td error: ", td_error.shape)
            td_error = np.squeeze(td_error, axis = 0)
            #print("td error: ", td_error.shape)
            reward_accumulated+=reward
            is_truncated = 'TimeLimit.truncated' in info and\
                                info['TimeLimit.truncated']
            is_failure = done and not is_truncated
           
            memory.push(Xp(state, action, next_state, reward, is_failure, td_error, t_step))
            state = next_state
            t_step+=1
            if memory.can_provide_sample(min_sample_size):
                experience_samples = memory.sample(batch_size)
                weighted_importance, indices = prioritize_samples(experience_samples, alpha_pr, beta_pr)
                rebuilded_exp = update_online_model(experience_samples,\
                        online_policy_network, online_q_network_a, online_q_network_b,\
                        online_policy_optimizer, online_q_optimizer_a, online_q_optimizer_b,\
                        offline_q_network_a, offline_q_network_b,\
                        gamma, weighted_importance, indices)
                memory.update_td_error(rebuilded_exp)
                
            if t_step%offline_update == 0:
                offline_q_network_a, offline_q_network_b = update_networks(online_q_network_a, online_q_network_b,\
                                                                    offline_q_network_a, offline_q_network_b, tau)
            if done == True:
                reward_per_ep.append(reward_accumulated)
                #print(reward_accumulated)
                break
            if t_step > max_steps:
                env.close()
                return reward_per_ep
    env.close()           
    return reward_per_ep

In [26]:
import gym
env = gym.make('BipedalWalker-v3')

In [29]:
env.close()

In [28]:
rewards = SAC_PER(env)  

  0%|          | 0/2000 [00:00<?, ?it/s]

[ 0.31225717  0.89702725 -0.79252076 -0.44635463]
[-0.7432438  -0.8940234  -0.4491917   0.42728582]
[-0.79932886 -0.854982   -0.9551391  -0.9587367 ]
[-0.82561314 -0.05194125  0.81717324 -0.26043847]
[ 0.22441521 -0.37230337 -0.00641009 -0.79782015]
[ 0.91675013  0.46006736 -0.6129836   0.82974625]
[-0.26337644 -0.0582804   0.31584558 -0.8430401 ]
[-0.59974164  0.7967043  -0.48568317 -0.76293117]
[-0.09907963 -0.8899228   0.64167094  0.8945864 ]
[ 0.29777417 -0.16318825 -0.6858963   0.87014484]
[ 0.331387   -0.78806055 -0.2505248  -0.6301668 ]
[ 0.6513088  -0.5614964  -0.62408084  0.8819127 ]
[ 0.26891983  0.75908196 -0.4508458  -0.26026094]
[ 0.25109038 -0.23522618  0.435487   -0.00044958]
[ 0.748021   0.5204329 -0.8629143  0.7069999]
[ 0.32604787  0.6999279   0.9269354  -0.6119741 ]
[ 0.62071466 -0.91474396 -0.30140376 -0.33809263]
[ 0.92291385  0.0800845  -0.82218564  0.05405251]
[ 0.5191721  -0.7326735   0.6305177  -0.66391534]
[-0.48643303  0.85014284  0.74433684 -0.02755189]
[-0.

[ 0.46929437  0.09244566 -0.18194325 -0.6485461 ]
[-0.5646587  -0.60220456  0.71915007  0.50074166]
[-0.4472952   0.7664155   0.71983147 -0.2669641 ]
[ 0.41013837 -0.7216784   0.96333796 -0.59874785]
[ 0.9588553   0.60931253 -0.9262006  -0.8481347 ]
[ 0.553233   -0.9266383  -0.30905488 -0.04399695]
[ 0.70065486 -0.5263101   0.60632    -0.8342071 ]
[-0.25815257  0.31663632 -0.18773615 -0.2542248 ]
[-0.82333195  0.8902515  -0.83909875  0.23362584]
[ 0.36499986  0.08882324 -0.9567853  -0.35637525]
[-0.92543584 -0.2711756   0.25791404  0.27508685]
[-0.83575875 -0.2853313  -0.5918309   0.16523834]
[ 0.164723    0.42776415 -0.76163656 -0.5239291 ]
[ 0.92358285 -0.61245835 -0.43060362  0.04354104]
[ 0.26057747  0.38321474 -0.02818973  0.3944738 ]
[-0.7496264   0.85116357 -0.48021057  0.50318414]
[ 0.0592734   0.18562552 -0.14054944  0.77329016]
[-0.02984556  0.37899756 -0.947459    0.6205896 ]
[-0.6709941   0.20106319 -0.43797758 -0.7609483 ]
[0.22056925 0.21665254 0.35363418 0.21675661]
[-0.

[ 0.20590842 -0.703655   -0.28982306  0.9435683 ]
[-0.19456366 -0.3568477   0.71300113 -0.3749177 ]
[-0.355209   -0.9056148  -0.11369034 -0.30114037]
[ 0.0905792  -0.7572904   0.10344674  0.519364  ]
[ 0.33668172 -0.54940146 -0.84074277 -0.18966275]
[ 0.9145525  -0.9847863   0.69093573 -0.13034359]
[-0.24982554  0.92174727 -0.6842705  -0.7109461 ]
[0.21213782 0.6281527  0.8809123  0.71144617]
[-0.8337972  -0.69043016 -0.56214106  0.80856615]
[-0.53864783 -0.63756895  0.91733706  0.68397605]
[ 0.9072213  -0.7378766  -0.11882979 -0.9301195 ]
[-0.5532026  0.8016565  0.9731082 -0.5740516]
[0.81580055 0.42317155 0.87177235 0.6609159 ]
[ 0.89178807  0.46649218  0.5570138  -0.46686167]
[ 0.4182994   0.9048997   0.10698628 -0.83991706]
[-0.89483696  0.0432418  -0.9387371   0.16126318]
[ 0.96104187 -0.10978534 -0.4578757   0.35400385]
[ 0.12123009 -0.9563756  -0.8571943  -0.34925365]
[0.17102568 0.698953   0.6799754  0.78465015]
[ 0.4353387   0.25051776 -0.34419358  0.1860269 ]
[-0.11612336  0.

[-0.95086837  0.8716552  -0.27714926 -0.97078115]
[-0.9404232  -0.17526583 -0.54847735  0.39705673]
[-0.7862877   0.7976882  -0.09314978  0.03875855]
[-0.10637313  0.22446173  0.04330362 -0.64732707]
[0.54335344 0.41732976 0.83104175 0.20465341]
[-0.54391944 -0.6628015  -0.7060667   0.86104953]
[-0.9606271   0.8073592   0.07819515  0.39050764]
[-0.6194048   0.9184751  -0.8435751  -0.24493454]
[-0.19576168 -0.8865966   0.99435395 -0.17542866]
[-0.18370621 -0.494956   -0.47999105  0.8606797 ]
[-0.67511123 -0.78851825 -0.34006464  0.6463059 ]
[-0.09490444 -0.5910299  -0.22184964  0.1103148 ]
[-0.3876792   0.77409995 -0.3562525  -0.88321835]
[ 0.5180598   0.8473507   0.11077297 -0.82814616]
[-0.7026142  -0.21185648 -0.7549128  -0.44665438]
[ 0.2584338  -0.26324627  0.96502423  0.24921578]
[ 0.59147394  0.8720606   0.81912    -0.34009725]
[-0.1598643  -0.19351168  0.82693243 -0.449664  ]
[ 0.13281022 -0.2704377   0.45894548  0.78836256]
[ 0.28693527  0.9183649  -0.9725562   0.33790186]
[ 0.

[-0.3216951   0.7488352  -0.39675266 -0.69882774]
[ 0.16371016  0.3802911  -0.31353006 -0.6344045 ]
[-0.416608    0.8093554  -0.7766119  -0.04824497]
[ 0.19493249 -0.8187894   0.11700929  0.3612725 ]
[-0.78958815  0.1487642  -0.22024757  0.1407701 ]
[ 0.18661973 -0.6192901  -0.9555528  -0.8646679 ]
[-0.54846275 -0.53632635  0.6030365  -0.8921147 ]
[-0.42290506  0.36605266  0.44509616  0.23240279]
[ 0.78593504  0.9041752  -0.68574965 -0.7912392 ]
[ 0.5225765  -0.9170697  -0.04981992 -0.6725153 ]
[ 0.32353783 -0.48301896 -0.44688162 -0.17036529]
[ 0.18258683 -0.74998504  0.16315715  0.38671353]
[-0.6880263  -0.96430117  0.656607   -0.13683134]
[0.6561105  0.0489278  0.4212476  0.69889313]
[ 0.91438955  0.17491187  0.40528792 -0.00345519]
[-0.1540266   0.08519453 -0.671376    0.24230866]
[ 0.9676161  -0.5892172   0.4159435   0.49798596]
[-0.1153221  -0.2707702   0.7397634   0.69933164]
[0.13975653 0.5923786  0.34746346 0.5373806 ]
[-0.36980692  0.05976697 -0.28845808  0.43146807]
[0.68400

[ 0.06560151 -0.79697186 -0.32061017 -0.83702064]
[ 0.08351534 -0.7821331   0.08435952 -0.92922753]
[ 0.14672442  0.7864964  -0.90602297 -0.9627927 ]
[-0.7296175  -0.12966439  0.64148945 -0.3939132 ]
[-0.6999984  -0.15443279 -0.3313075   0.9577056 ]
[-0.613492   -0.39716324 -0.2971839  -0.72498107]
[-0.5786811  -0.06685457 -0.13822521 -0.31559935]
[-0.19344346 -0.06359739 -0.5568199   0.68164355]
[-0.40109    -0.20662728  0.8455356   0.40161896]
[-0.03759556  0.07660217  0.6416745  -0.01731515]
[-0.5091309  -0.20976444  0.9100885  -0.2764207 ]
[ 0.13689317 -0.74771154 -0.15910262  0.32013944]
[-0.31798849  0.35032442  0.00633025  0.37716034]
[ 0.66970897 -0.8183128   0.00225755 -0.02632003]
[ 0.91585165 -0.378369    0.12076058 -0.07439188]
[0.8459342  0.4509024  0.14349808 0.49913523]
[-0.47929367 -0.9355408   0.9526009   0.3090911 ]
[-0.31172335  0.12284152  0.2951104  -0.88618565]
[ 0.11900338 -0.20400605  0.5853088  -0.93289334]
[-0.3062288  -0.53225094  0.8059575   0.02748229]
[-0.

[-0.23266202 -0.8799859   0.58620924  0.8438355 ]
[ 0.28202602 -0.5790669  -0.3139305   0.6943578 ]
[-0.7461728  0.3367038  0.6504046 -0.0298145]
[ 0.40202072  0.67919636  0.73999596 -0.6438266 ]
[ 0.21692105 -0.6917244   0.24164425  0.8558749 ]
[-0.3653619  -0.67020154 -0.35492277  0.85367525]
[ 0.45532885  0.26700312  0.7003558  -0.29831   ]
[0.23202786 0.89578426 0.35310054 0.72167134]
[-0.35766068  0.7909778   0.2969858  -0.94700843]
[ 0.2874672 -0.7558595  0.6340374 -0.9200998]
[ 0.49314484 -0.40374207 -0.59689504 -0.06825776]
[-0.06850468 -0.21065754 -0.3841902  -0.70088625]
[-0.32470822 -0.00896012  0.46237305 -0.5859717 ]
[-0.9420009   0.6342078  -0.5048859  -0.09368158]
[ 0.08320205  0.6417815   0.90788454 -0.81955874]
[ 0.17482977  0.27634883  0.55364835 -0.04513018]
[0.19337258 0.44785413 0.8463781  0.0820772 ]
[-0.08751935  0.18587348 -0.14989181 -0.18898728]
[-0.70548356  0.48993856 -0.5811273   0.968068  ]
[-0.17325231 -0.24953397 -0.16756444  0.89222294]
[-0.8315463  -0.

[0.2188173  0.5493865  0.59377396 0.4169046 ]
[ 0.589163    0.5850842  -0.38444036 -0.43014348]
[-0.8249396   0.5890993  -0.6210653   0.84359264]
[ 4.0162608e-01  6.9340788e-02 -3.2896921e-04 -4.9394435e-01]
[ 0.40925562 -0.09402747  0.69564927  0.9142616 ]
[-0.8971815   0.1760334   0.28386706  0.14311284]
[-0.5893159   0.38047367 -0.4309544   0.8605276 ]
[-0.42047817  0.20070738 -0.146088    0.42307058]
[-0.92784417  0.27905482 -0.69929993 -0.5411026 ]
[-0.3056457  -0.37463412 -0.7079741  -0.42950052]
[ 0.46706283  0.88604903  0.10496511 -0.8160153 ]
[-0.32764652  0.41968334  0.9645387   0.27379838]
[ 0.60077    -0.4988997  -0.2015455   0.91223174]
[-0.45596075  0.20914875 -0.09018707 -0.595885  ]
[ 0.6207093  -0.6833724   0.36365807  0.75541925]
[-0.62395597 -0.9151087   0.6248019  -0.16522942]
[0.5928458  0.62832737 0.1860634  0.16589053]
[0.5901462 0.8672315 0.6917626 0.9656808]
[-0.78724915 -0.42398947  0.29346943  0.9173172 ]
[-0.22774886 -0.03514791  0.9393517   0.9205649 ]
[ 0.

[-0.97974294  0.52752525 -0.01622695  0.7211015 ]
[ 0.3878817   0.83110094  0.5707356  -0.4848351 ]
[0.22133335 0.42776716 0.9899307  0.86332786]
[-0.0363117   0.42440417 -0.07068409 -0.74169755]
[-0.36252886  0.84492874 -0.7400699  -0.8120501 ]
[ 0.13460158 -0.00844726 -0.28409094  0.4417069 ]
[ 0.38023525  0.74126166 -0.31366593  0.4236509 ]
[-7.7884501e-01  6.8720847e-01  3.2766587e-01 -6.1505235e-04]
[ 0.11042479  0.47777158 -0.682259   -0.80957544]
[ 0.4586597   0.6007643   0.644544   -0.32077572]
[-0.77166265 -0.17217337  0.8939221  -0.57769775]
[ 0.5356104  -0.1212319   0.96410096  0.83329433]
[ 0.48326233 -0.02832248 -0.14634338  0.62288386]
[ 0.06388144  0.4729148  -0.45141324 -0.27891827]
[ 0.3543685   0.79482985 -0.97542435 -0.01632124]
[ 0.58273697 -0.67325646 -0.6225359  -0.6986275 ]
[-0.94246984 -0.94820315 -0.41248077  0.80025965]
[-0.36841318  0.9489117  -0.2056557   0.6149454 ]
[ 0.9317323   0.84868133  0.8305035  -0.7717496 ]
[ 0.6911447   0.8693361  -0.3776855   0.26

  0%|          | 1/2000 [00:28<15:57:19, 28.73s/it]

[-0.5814644   0.38029692 -0.35580707  0.18197249]
[0.4896038  0.5407572  0.9057437  0.14669847]
[-0.5530714  -0.33007538 -0.00933824 -0.8806851 ]
[-0.19369768 -0.25354475  0.9358313  -0.12432594]
[0.47385168 0.23755674 0.46144885 0.9861336 ]
[0.16984016 0.3345393  0.8637066  0.5947342 ]
[-0.6817435  -0.30329296 -0.9237883   0.771949  ]
[-0.9821513  -0.73857975 -0.41413012  0.58965445]
[-0.07597184  0.67925096 -0.22413115 -0.58151746]
[-0.70869064  0.17837548  0.9711991  -0.57345206]
[ 0.7461085  -0.9595365   0.50748074 -0.86473185]
[ 0.6165446 -0.0768109 -0.6717277 -0.4873145]
[ 0.7247912  -0.32389292 -0.82599056  0.7159468 ]
[ 0.3559384   0.7564947  -0.08390865 -0.31331894]
[-0.09691967  0.75250006 -0.42020032 -0.27908203]
[ 0.55904377 -0.9907464   0.08323357  0.68339825]
[-0.31206265  0.05859253 -0.78309023  0.9009895 ]
[-0.10360932  0.97655666  0.2490016   0.6441667 ]
[ 0.12835689 -0.81412035 -0.5390476  -0.45135692]
[ 0.5193476 -0.9442135 -0.7212972  0.3080173]
[ 0.8541927  -0.3075

  0%|          | 2/2000 [00:29<6:58:25, 12.57s/it] 

[-0.08644549 -0.1056781  -0.36658767 -0.12062372]
[-0.9648789  -0.06405415  0.71227133  0.65350926]
[ 0.13193733  0.57229614 -0.13518707 -0.0520403 ]
[-0.82027     0.6889589   0.36416984  0.87038094]
[ 0.14313373 -0.00122198 -0.40531042  0.5861957 ]
[-0.15465021  0.65603775  0.32162362  0.39045304]
[-0.60479915 -0.17103012  0.18058537 -0.7130195 ]
[-0.7597209  -0.6698805   0.19886738  0.5084712 ]
[-0.98480463 -0.46109825 -0.19550684 -0.00940644]
[ 0.39643925  0.64124036  0.8176213  -0.14589731]
[ 0.4617051   0.5599909   0.26815116 -0.75863725]
[-0.60724366  0.63910866  0.9545557  -0.9921063 ]
[ 0.79057693  0.2480215   0.40650758 -0.6028819 ]
[ 0.7178905  -0.13839978  0.40445986  0.14436093]
[ 0.7748445  -0.7475295   0.3538424   0.04286246]
[-0.88585216  0.8377516  -0.90325236  0.7529191 ]
[ 0.5938601   0.182373    0.67036384 -0.9483553 ]
[-0.6312604  -0.1392546  -0.35510856 -0.07988106]
[ 0.22511405 -0.726455    0.8246296  -0.28587484]
[-0.74303377  0.18004568 -0.37249038  0.10861919]


  0%|          | 3/2000 [00:31<4:02:48,  7.30s/it]

[ 0.3140173   0.5653082  -0.9025165   0.31693363]
[-0.07530447 -0.32131454  0.8164552   0.6473    ]
[ 0.8009906   0.12517202 -0.3229409   0.3615326 ]
[-0.35964426  0.06517377  0.7796378   0.82067823]
[-0.00474954  0.42889586  0.79360473 -0.34965372]
[-0.82312346 -0.5698689   0.26107982 -0.5502007 ]
[ 0.3671256   0.61858857  0.9369505  -0.98522824]
[-0.7091584  -0.8454649   0.10518856  0.39937773]
[-0.8237181  -0.60232556 -0.02231099  0.705111  ]
[ 0.12755223 -0.4919327  -0.3972946  -0.77167356]
[0.14455062 0.4398488  0.2195132  0.75443786]
[ 0.09913145 -0.46818146  0.6409714   0.36108097]
[0.6310595  0.5170894  0.64526033 0.57180923]
[ 0.64706874 -0.6356806  -0.19626425  0.94512546]
[ 0.82503074  0.73782593 -0.69959813  0.86015004]
[ 0.51083076  0.5267367   0.75494313 -0.42708236]
[-0.60466135  0.00528677  0.6023803   0.06636927]
[-0.5322191   0.80046445 -0.6804918   0.09516881]
[ 0.99154055  0.5096468  -0.68042064 -0.9563144 ]
[-0.85282004 -0.544768   -0.90895694  0.5884448 ]
[-0.5758

  0%|          | 4/2000 [00:32<2:52:13,  5.18s/it]

[-0.13234942  0.02728325 -0.54922175  0.5017444 ]
[ 0.34812963  0.95886827  0.3574323  -0.34554076]
[-0.6863228  -0.31044662  0.329683   -0.5844536 ]
[0.6039994  0.7491413  0.4972983  0.66411376]
[-0.299658   -0.13255051 -0.55624807 -0.91978526]
[-0.9284658  -0.2845318   0.28524664 -0.7395893 ]
[ 0.6027061  0.7961672  0.5773865 -0.1803625]
[ 0.24661982 -0.5575044   0.09954242 -0.59239024]
[ 0.09174235 -0.02027185  0.0975858  -0.5321704 ]
[-0.5182487   0.09117073 -0.12389741  0.6438396 ]
[-0.21419427  0.3562088   0.18157731 -0.4749393 ]
[0.31995395 0.754868   0.87781054 0.18871103]
[-0.21588953  0.7724038  -0.88257474  0.29706416]
[ 0.17546222  0.05076166 -0.3094138   0.2754548 ]
[ 0.67701364 -0.48989448 -0.47897324  0.4016059 ]
[-0.16329956  0.6739985   0.75759435 -0.54539156]
[ 0.32194215 -0.00239372  0.8332428  -0.46800986]
[ 0.62870157 -0.47237843 -0.62746245 -0.25937244]
[-0.37854794 -0.46035528 -0.48639682  0.40148482]
[-0.58308744  0.39321434 -0.16031133  0.78294396]
[-0.55943257

[-0.3793562  -0.5949154  -0.06645074 -0.8569889 ]
[-0.566696    0.9009421  -0.10885368  0.6366358 ]
[ 0.0438498   0.5505118  -0.46562412 -0.5029734 ]
[ 0.47735435  0.42309994 -0.21639049 -0.87207675]
[-0.3118409  -0.49014044  0.63291305 -0.9596292 ]
[-0.81750035  0.40680844 -0.8815818  -0.35779005]
[ 0.6897379  -0.40729406 -0.9419644  -0.45491564]
[0.7868998  0.6868596  0.23274153 0.15456583]
[-0.9431343   0.6963538  -0.2604127  -0.20356777]
[-0.97520375 -0.26766738 -0.8864815  -0.6424276 ]
[ 0.7405094   0.84552187 -0.520561    0.255213  ]
[-0.9877695   0.7924442  -0.2778146  -0.41377598]
[-0.07014591  0.6004001  -0.76566386  0.457202  ]
[-0.35051155  0.9396362   0.10941284 -0.7005198 ]
[ 0.39450645 -0.96730304 -0.42852333  0.6157733 ]
[-0.52656776  0.8509162  -0.11800086 -0.77678466]
[-0.5731422   0.8780442  -0.5856725  -0.21555324]
[-0.0505672  -0.86283535  0.09612235 -0.43174875]
[ 0.7680261   0.5289271   0.92627144 -0.70144117]
[-0.70830035 -0.49036747 -0.7801441  -0.66440856]
[-0.

[ 0.14924216 -0.6907121   0.34500363  0.9433069 ]
[ 0.47805968 -0.8279373   0.9032765   0.51206905]
[ 0.89170635 -0.8301588  -0.8919812  -0.29264387]
[-0.9309045   0.71781063 -0.06868093 -0.4483896 ]
[-0.31554645 -0.7792035   0.55121005 -0.9229173 ]
[-0.8081018  -0.4380076  -0.01199528 -0.06774459]
[ 0.4551712  -0.6028273   0.37639448 -0.97604454]
[-0.3858342  -0.4037483  -0.6429795   0.92721975]
[-0.08477453  0.79569     0.079333   -0.30430448]
[ 0.49520484 -0.38733932  0.87602484 -0.9432478 ]
[ 0.43391562 -0.55659795 -0.8130389   0.10571001]
[ 0.55651665 -0.2360189  -0.6760032  -0.58553934]
[-0.78852886 -0.2010129   0.11995445 -0.8015437 ]
[ 0.7786004   0.38836533  0.23749292 -0.85810083]
[ 0.3369776  -0.5508939  -0.12537223 -0.13202101]
[-0.8771083  -0.47032693  0.67199063 -0.76747584]
[0.3600502  0.66976047 0.8024234  0.7188815 ]
[ 0.93143594 -0.21616034  0.07534894  0.31849885]
[ 0.30515525  0.81235904  0.6172138  -0.8949447 ]
[-0.48386133 -0.6361307   0.6814115  -0.87728477]
[-0.

[ 0.2606759  -0.8287542  -0.70492876  0.39866605]
[-0.84583503 -0.05160661  0.8980117   0.8795563 ]
[ 0.62714183  0.8972119  -0.62342095  0.4255357 ]
[-0.6396947  -0.8251087   0.31944937  0.43702155]
[-0.27302355  0.78549695 -0.8352231  -0.71170056]
[ 0.3165625  -0.9632164   0.78550655  0.5754051 ]
[ 0.27103573  0.6327061  -0.7164765  -0.40207255]
[ 0.78980696 -0.02722303  0.74166846  0.15274006]
[ 0.79904485  0.0929148   0.90444434 -0.6880461 ]
[ 0.98726696 -0.2677358  -0.78301615 -0.6824695 ]
[ 0.70361435 -0.73009944  0.43014252  0.04537912]
[ 0.26709908 -0.03257526 -0.45974186 -0.2889823 ]
[ 0.68829906 -0.7004032  -0.21685725  0.37338692]
[-0.82321423 -0.36437732 -0.9387089   0.9505673 ]
[ 0.84238046 -0.3551787   0.90822446 -0.06739426]
[-0.68252987 -0.33600634 -0.8211091  -0.9747129 ]
[ 0.8766975   0.74579895 -0.27892137  0.85071087]
[ 0.51032203 -0.37692246  0.48647442 -0.8290252 ]
[-0.9336338  -0.803143   -0.50631976 -0.83292925]
[ 0.8542576  -0.7154287   0.0961321   0.78350866]


[-0.7886806   0.72165823  0.25634533 -0.02283542]
[ 0.35497582  0.8887696  -0.9074197   0.8013591 ]
[-0.42546797  0.4101857  -0.7660345   0.17620963]
[-0.04401438 -0.10187878  0.9121488  -0.63148355]
[-0.25958842 -0.39513063 -0.00752743  0.73414254]
[-0.16539119  0.44348738 -0.94903266  0.0194135 ]
[-0.02465322 -0.7412646  -0.8371042  -0.8281735 ]
[ 0.426277    0.19921637  0.16082162 -0.10906882]
[0.83563244 0.43260226 0.9738005  0.6881107 ]
[-0.8593213  0.3295684  0.9905753  0.9125713]
[-0.3592004   0.44519567  0.5646467   0.91197616]
[ 0.1275452  -0.81739783  0.77636784 -0.22989509]
[ 0.9445916  -0.8893419   0.0318436  -0.71956134]
[ 0.3701578  0.6119686 -0.4926514 -0.6163048]
[ 0.2255925  -0.0651532  -0.5083488  -0.35418612]
[ 0.07510261  0.5664463  -0.0797507  -0.25475773]
[ 0.77810204 -0.33451486 -0.12013742 -0.36449087]
[-0.9707721   0.50175786 -0.88607925 -0.9305388 ]
[-0.27008048  0.10303056  0.28639546  0.9132833 ]
[0.7712364  0.28478682 0.73371255 0.9240612 ]
[ 0.93412507 -0.

[-0.23439322  0.38718393  0.5276576  -0.75464773]
[-0.6523399  -0.8185599  -0.49899805 -0.0897509 ]
[-0.26648566 -0.44669357  0.3893934   0.20637856]
[-0.12999174 -0.07924936  0.8329202  -0.40830284]
[ 0.5328727  -0.5580962  -0.87189335  0.98871547]
[ 0.9470115  0.6833459 -0.0525076  0.558972 ]
[-0.11615357  0.3812046   0.32196584  0.02753379]
[-0.5383544   0.6748061   0.09518535 -0.47364476]
[-0.26786923  0.847846    0.6276536   0.19418034]
[ 0.75487155  0.36203226 -0.21093805  0.32350323]
[ 0.34763563  0.44274628 -0.18210706 -0.9701294 ]
[-0.70390004  0.5099351  -0.7775809  -0.6810065 ]
[-0.5239077  -0.16705312  0.8862816  -0.69113076]
[ 0.33050218  0.13144808 -0.04445054  0.5788682 ]
[-0.16877998  0.03367679 -0.27069452 -0.38366908]
[ 0.40394494  0.11022362 -0.4268387   0.10116515]
[-0.74548745  0.628351    0.5064799  -0.9530797 ]
[ 0.06818102 -0.7549254   0.27396825  0.9215648 ]
[0.19681785 0.89394456 0.89624697 0.50475866]
[-0.9172069  -0.12226396  0.11341163  0.356854  ]
[ 0.4465

[ 0.9022292   0.71512496 -0.56693345  0.90350014]
[-0.33647296  0.8825402   0.51549375 -0.505125  ]
[ 0.32666492 -0.6805947  -0.66228354 -0.4046186 ]
[-0.01415153  0.11580275  0.20953164  0.7991331 ]
[ 0.6892463   0.16682868 -0.50742185  0.07214332]
[-0.26496398  0.25842616 -0.1737721   0.5211038 ]
[-0.21628802 -0.09480866  0.1669904   0.09125615]
[ 0.8698143  -0.52804166  0.5955882  -0.00568535]
[-0.35335216 -0.8476093   0.98440963 -0.14807768]
[ 0.3156338 -0.865215  -0.1471546  0.4035186]
[-0.42199877  0.3471695  -0.7871599  -0.3877952 ]
[ 0.44391668  0.5097932   0.3306852  -0.9623252 ]
[-0.1807623  -0.7145848   0.12048864  0.7983267 ]
[-0.6712514   0.30447367  0.6739277   0.22042526]
[-0.2845361   0.28255644 -0.52969265 -0.2897269 ]
[-0.24674864  0.00415656 -0.18967004 -0.29399475]
[-0.74101025  0.15578125 -0.16883546 -0.613341  ]
[-0.5095994  -0.8152598  -0.52161855 -0.5726179 ]
[-0.13699856  0.44228145 -0.78709066 -0.6915139 ]
[-0.32038656  0.39733705  0.49665946 -0.10771002]
[-0.

[-0.5714859   0.9075231   0.36603653  0.27689034]
[-0.84898674 -0.04309342  0.84937876 -0.90787876]
[ 0.4352188  -0.6405971  -0.8812474  -0.44089296]
[-0.47706106  0.78876674  0.6554003   0.78910255]
[ 0.22289304 -0.5462239  -0.352045    0.26444992]
[-0.34769756 -0.9576163  -0.13700405  0.38565063]
[-0.36369202 -0.29095572  0.87251174 -0.7907953 ]
[-0.98149616  0.95566213  0.5829387  -0.97718656]
[-0.75895137  0.2841941   0.77236056  0.6974447 ]
[-0.05510847  0.802162    0.6103606  -0.26928338]
[ 0.12244952 -0.61349905 -0.5047328  -0.6811767 ]
[-0.96724725  0.59156746 -0.34863707 -0.48284847]
[ 0.15860216  0.1505117  -0.47879887 -0.8531117 ]
[-0.273991   -0.5141905   0.90400124  0.7556591 ]
[-0.90582937 -0.87212825 -0.4749203  -0.12756583]
[-0.34287685 -0.81927764  0.877836   -0.95375216]
[-0.8111558  -0.92126036 -0.6420434   0.3456137 ]
[ 0.1753201   0.42721024  0.9325536  -0.5014007 ]
[ 0.35725442 -0.9334795  -0.47826108 -0.881638  ]
[ 0.17914514  0.78903437 -0.7574583   0.88104934]


[-0.40709838  0.6075038  -0.29280546  0.10249499]
[-0.08080877  0.43787748 -0.29447153 -0.972152  ]
[-0.04363754  0.16390803 -0.50477004  0.17907503]
[-0.4767814   0.7777873  -0.6242894  -0.67935085]
[-0.25932336 -0.5603188  -0.67498535  0.73221743]
[ 0.02193436 -0.32582545  0.66574895  0.46456063]
[-0.95027417  0.3616261   0.51472765 -0.22853819]
[ 0.4928623  -0.24075948 -0.3560547   0.7809235 ]
[ 0.37344438  0.0932861   0.68290794 -0.28408927]
[0.8716113  0.6314478  0.13757066 0.18857007]
[-0.91572624  0.1206245  -0.13418838 -0.2081957 ]
[-0.6194527  -0.5647023   0.21988049 -0.5293408 ]
[ 0.75025094 -0.75977093 -0.796083    0.7257786 ]
[ 0.06486359 -0.82236516  0.41662666  0.54014397]
[-0.38304755 -0.94353586 -0.36015305 -0.85375524]
[-0.27549902  0.5391302  -0.33831525 -0.01195315]
[-0.9550547  -0.09538128 -0.93042576  0.30195236]
[ 0.6481823  0.5285351 -0.2449481  0.9370578]
[ 0.1826429  -0.2567711   0.0615915  -0.40272683]
[-0.441929   0.5177274  0.5648514  0.6890865]
[-0.39639294

[-0.64460874 -0.13764384  0.98405516 -0.8174683 ]
[0.5461236  0.72020173 0.72535074 0.23126705]
[ 0.46851444  0.6952276   0.8659824  -0.49244043]
[ 0.85430604 -0.48746014 -0.8111892   0.38371897]
[-0.5402495  -0.5592574   0.8401588   0.08365995]
[ 0.00862446  0.46971735 -0.09977326  0.37441668]
[ 0.65894127 -0.18632741 -0.9364107   0.04439005]
[ 0.3722749  -0.7654321   0.18329242 -0.6404431 ]
[0.86153936 0.36461118 0.7476876  0.02161527]
[ 0.10041095 -0.8950768  -0.9838586   0.97720003]
[ 0.74867463 -0.70569044  0.04759393 -0.50397176]
[ 0.14861612 -0.5664456  -0.29999128  0.32833484]
[-0.36328915  0.05906827  0.9157965   0.7066952 ]
[ 0.2664351   0.67950004 -0.23756464  0.39357743]
[-0.21483672 -0.6666659  -0.6777919  -0.8027512 ]
[ 0.30442747 -0.5951052  -0.94104713  0.20069677]
[-0.5004067   0.43363112  0.06866836  0.61956525]
[-0.7823109  -0.56241596 -0.9229559  -0.86239135]
[ 0.3178865  -0.32747698  0.7521024   0.8554542 ]
[-0.5944052  -0.5320309  -0.37386578  0.40261927]
[-0.4980

  0%|          | 5/2000 [01:11<9:30:37, 17.16s/it]

[ 0.24749736 -0.3782776   0.21886754  0.42387238]
[ 0.7020108   0.7268176  -0.3960562  -0.40184188]
[ 0.5955522   0.2953594  -0.08318507  0.8192087 ]
[-0.248389  -0.9316916  0.959005  -0.6562366]
[ 0.46966618  0.82763684 -0.79439664 -0.15660919]
[-0.7434251  -0.7718268  -0.18254308  0.3432081 ]
[-0.12794288 -0.46148905  0.5159422  -0.7571599 ]
[ 0.6511542 -0.2540454 -0.9218334  0.3575103]
[-0.4927507  -0.577465   -0.17233437 -0.34034634]
[ 0.0505226  -0.28194126  0.5348267  -0.7081321 ]
[ 0.71250546  0.6387093   0.31730363 -0.31599003]
[-0.05240085 -0.45536005  0.6218039   0.56689084]
[ 0.7090719   0.79505974  0.5917373  -0.62686247]
[ 0.32546228  0.05862252 -0.756675   -0.08620515]
[ 0.8448848   0.2648141  -0.84463954 -0.74848497]
[ 0.06326819 -0.87535274  0.23687145 -0.6479652 ]
[-0.39201903 -0.73857397 -0.6690093   0.8710443 ]
[ 0.77526146 -0.789309    0.6133123  -0.33375138]
[ 0.78871715 -0.8376155  -0.875132   -0.07391761]
[-0.83485675 -0.30681786  0.881268   -0.7682721 ]
[ 0.7668

[ 0.8790287  -0.25648066 -0.07002785 -0.8814261 ]
[-0.07266226 -0.42443225 -0.9854506  -0.15457648]
[-0.45416132  0.90830094  0.81683695  0.6265864 ]
[-0.4809712   0.41181526  0.79775953 -0.9457072 ]
[-0.0251743  -0.46756455  0.6842115  -0.42153612]
[-0.02891252 -0.65805924 -0.19791698 -0.7293233 ]
[ 0.33332476 -0.62197304  0.16627967  0.784474  ]
[ 0.9003676   0.3357535  -0.29709318 -0.1536442 ]
[-0.6558417   0.48071665  0.98469913  0.21793054]
[-0.71117085 -0.4980366   0.98243815  0.0715596 ]
[-0.67431927  0.41976872  0.15814549 -0.14943644]
[ 0.66504693 -0.4963767   0.40314168  0.38102517]
[ 0.6337839   0.8172884   0.09174876 -0.7033617 ]
[ 0.7117152   0.9349725  -0.00790774  0.8495946 ]
[-0.6856854  -0.3235816  -0.49257404 -0.5047591 ]
[ 0.75546855 -0.29112378  0.6469358   0.26654628]
[-0.66264117  0.29375675  0.5394311   0.8144523 ]
[-0.82204425  0.78282344 -0.01964336  0.34913033]
[ 0.17655332  0.66265506 -0.10482918 -0.44673154]
[ 0.3326864  -0.3092138  -0.8381438   0.40393034]


[ 0.3997366   0.37258488 -0.54305124  0.54460937]
[0.15289061 0.6408688  0.33033004 0.5397264 ]
[-0.9151987   0.23138882 -0.52596825 -0.12919556]
[0.5845454 0.2077545 0.5123918 0.9577823]
[-0.8684866  -0.14868629 -0.53145015 -0.8519571 ]
[-0.5481008   0.48665687  0.062527    0.8388619 ]
[-0.7288005   0.5560316  -0.55085826 -0.7400038 ]
[-0.84301645 -0.19159782 -0.9543727  -0.20974919]
[-0.31703302 -0.15432471  0.11617073  0.84249634]
[ 0.18090144  0.72019506 -0.17939876 -0.69501245]
[-0.40586147 -0.75076926 -0.68534935 -0.97758013]
[ 0.8301193   0.96664727 -0.22947006  0.6312401 ]
[-0.46786764  0.26114997  0.28907937  0.8660114 ]
[ 0.06516182  0.680422   -0.4225396  -0.19122179]
[ 0.3181258   0.30988023 -0.26869845  0.4889509 ]
[-0.02094556 -0.10062318  0.35191178  0.52194774]
[-0.13888411  0.1365826   0.1907582  -0.90091705]
[0.9345518  0.48659876 0.2887488  0.5473181 ]
[-0.3040538  -0.43211842  0.12494766 -0.84441006]
[-0.32751113  0.18256731 -0.47543433 -0.36751208]
[-0.07616252  0.

[ 0.11277202 -0.8697605   0.70792085  0.54257596]
[-0.7168888  -0.9142541   0.28769282  0.94226885]
[ 0.28540173  0.11242782 -0.9566785  -0.80958104]
[ 0.40450066  0.85753167 -0.36152038 -0.9106687 ]
[-0.25915843  0.7446294  -0.18986529 -0.73998046]
[ 0.716816   -0.18014266  0.8670034  -0.18275522]
[ 0.8016263  -0.59121144  0.43975782  0.7026961 ]
[ 0.12959935  0.7899527  -0.31078124  0.16662714]
[ 0.88134307 -0.8810749   0.29203305 -0.10226852]
[ 0.5406065   0.09475177  0.7974442  -0.72191525]
[ 0.06611408  0.6434578  -0.37634796 -0.00292788]
[-0.39581597 -0.00451036  0.43304113 -0.7120865 ]
[ 0.30491823  0.2086436  -0.6234157   0.5892944 ]
[-0.7858998   0.17340423 -0.90783364  0.4898672 ]
[-0.99317497  0.07607022  0.31825268  0.14466965]
[0.00900759 0.27130032 0.9121083  0.71536934]
[ 0.63339365  0.9708013   0.93436337 -0.56153905]
[-0.92326856  0.82733345 -0.6553025  -0.05289417]
[ 0.0479726  -0.18857399 -0.14729322 -0.98327583]
[ 0.8637043   0.9505362  -0.76957387  0.5022455 ]
[-0.

[ 0.70969594 -0.9069844  -0.05810003  0.67115545]
[-0.6889826  -0.6719744   0.6337035   0.39744666]
[-0.2748766   0.33286977  0.3572014  -0.5719005 ]
[0.26774058 0.82073784 0.8895305  0.54142594]
[-0.437738   -0.73610175  0.6993389  -0.04391048]
[ 0.23000985 -0.3494003  -0.69633067 -0.69259095]
[ 0.12443449  0.52861154 -0.49137396  0.8429136 ]
[-0.9251547   0.64147174  0.7639609   0.21497515]
[ 0.3877316   0.25774956 -0.8628066  -0.5217164 ]
[-0.606115    0.18126243 -0.52074134 -0.22167765]
[ 0.1759513 -0.9953157  0.5147533 -0.764393 ]
[-0.96997136 -0.1370882   0.4418944   0.22141223]
[-0.74108285  0.13597308  0.9387893   0.6454823 ]
[0.7973311  0.69596136 0.07032727 0.04550397]
[ 0.81848717 -0.19282234  0.29039732  0.9788378 ]
[ 0.7576126   0.08406629  0.8774329  -0.6235442 ]
[-0.4129156   0.8755765   0.8710166   0.26108083]
[0.7847018  0.6530483  0.32025528 0.61884034]
[ 0.12541752  0.46873972  0.48384467 -0.4338462 ]
[ 0.04125191  0.9045527  -0.44750613  0.7390962 ]
[ 0.40800613 -0.

[-0.749249   -0.99819934 -0.21980095  0.26430002]
[ 0.09324928  0.36294547  0.3623985  -0.24235782]
[-0.30432215 -0.3425284  -0.6841272   0.78062356]
[-0.30370197 -0.7303108  -0.14936903  0.4424806 ]
[ 0.974954    0.35897323 -0.78018767 -0.08063941]
[ 0.7099482   0.361904    0.20497605 -0.45417368]
[-0.4822762   0.34681296  0.93253034  0.6753751 ]
[ 0.06826331  0.3414272   0.21726167 -0.7165971 ]
[-0.5371032   0.06272577  0.8520079   0.8376453 ]
[ 0.64402807  0.61836773  0.43431312 -0.5414948 ]
[0.3431048  0.1860003  0.82671803 0.86173546]
[ 0.38384816 -0.09359153  0.32619387  0.73516816]
[ 0.21961391  0.80198956 -0.6086892   0.6774145 ]
[ 0.77346116 -0.7436874   0.7571827  -0.8310054 ]
[ 0.22820172  0.28199947 -0.31155556  0.81684065]
[-0.8584087   0.31605682  0.17098133  0.324741  ]
[-0.5556696  -0.42290568 -0.22522949  0.7665375 ]
[-0.91133237  0.3783034   0.73933166 -0.34040496]
[-0.6086655   0.13458383 -0.21491997  0.15388708]
[ 0.76515496 -0.82980216 -0.68306303  0.6175707 ]
[ 0.

[-0.09223091 -0.9905532   0.04428082 -0.88803273]
[ 0.89487994 -0.7993561  -0.7765639   0.70558727]
[-0.10341125 -0.7880667  -0.21619052 -0.96104115]
[-0.8133301   0.785945   -0.3391987  -0.45564818]
[-0.28636444  0.83750516 -0.08484647 -0.05166501]
[-0.32849428 -0.1473605   0.87111473 -0.17829429]
[ 0.8507967   0.54691654  0.44953382 -0.59626687]
[ 0.29907972 -0.44573808  0.82352614  0.9429551 ]
[ 0.3654394   0.8957741   0.19213687 -0.36929062]
[-0.51146555  0.065012    0.16338253 -0.06310052]
[-0.7929772   0.91436684 -0.52267164 -0.31522903]
[-0.16485578  0.4407902  -0.6853565   0.15179151]
[-0.13691041 -0.7902566  -0.7942057  -0.38899368]
[ 0.35810474  0.24760723  0.4383594  -0.1572618 ]
[ 0.71090275 -0.1619328  -0.9363265   0.6180997 ]
[-0.25501478  0.43922463 -0.95834     0.17452547]
[-0.6063502   0.53588873  0.5800306  -0.2671994 ]
[ 0.39576578 -0.5227758   0.05737447  0.723506  ]
[ 0.51353335 -0.8822001   0.7156056  -0.6597376 ]
[-0.9788498   0.7673743  -0.1644709  -0.67492896]


[-0.8469831   0.6193687  -0.36121723  0.00878594]
[ 0.9304708   0.9590598  -0.04041778 -0.7735214 ]
[ 0.04521613  0.5515415   0.88772905 -0.05818241]
[-0.7128146  -0.83763844  0.49080366 -0.33325815]
[-0.1839217  -0.38067147  0.9277186   0.80644864]
[-0.33208564 -0.6571652  -0.82871073  0.57076746]
[-0.34975776 -0.38918382  0.65187883 -0.08190679]
[-0.15596394  0.3624167  -0.07095567 -0.2735121 ]
[0.4210275  0.06510654 0.14959043 0.9500714 ]
[-0.8783972  -0.35879856  0.62057257 -0.4533538 ]
[0.5252025  0.46031716 0.83389497 0.37925428]
[-0.67787147  0.6347184  -0.665246    0.25013638]
[0.12376712 0.64761966 0.81179345 0.00552421]
[-0.83579576  0.36049256  0.37896332  0.76545894]
[ 0.8829929  -0.9768873  -0.38781443  0.05494119]
[-0.84315526  0.18378428  0.22132759  0.97210336]
[ 0.97245395 -0.33700484 -0.07391659  0.56755626]
[-0.82472634 -0.8256987   0.6250845  -0.34560442]
[ 0.4273076   0.04373678 -0.8151967   0.27702075]
[ 0.3999912  -0.27626687 -0.6043546  -0.6714144 ]
[-0.7126399 

[ 0.8814738   0.9025409  -0.5661303  -0.72029895]
[-0.0432312   0.43416864  0.30374938  0.7463356 ]
[-0.24275182 -0.3458029  -0.4179728   0.23712023]
[ 0.5301634  -0.9385265  -0.15391517  0.2466422 ]
[-0.8900351  -0.0824773  -0.3187452   0.13196419]
[-0.21822898 -0.4782569  -0.9555384  -0.1083174 ]
[ 0.29668334  0.51676947  0.20514108 -0.73725545]
[0.03891153 0.8741808  0.48954883 0.405291  ]
[ 0.5485058   0.24710722 -0.49260613  0.54367733]
[ 0.6272806   0.59385437 -0.89660716  0.41802198]
[ 0.5922767   0.48602068 -0.90845466  0.11859708]
[ 0.83423114  0.9057782  -0.47467566  0.50771976]
[-0.8111366  -0.45673355  0.04193632 -0.7084329 ]
[-0.1128533  -0.7541508   0.00955142 -0.17616215]
[-0.15334609 -0.91169333 -0.6728413   0.9578382 ]
[ 0.7897248   0.64643335 -0.54549617 -0.64070934]
[-0.06101749 -0.9864154   0.172522   -0.08122513]
[ 0.4406176   0.98406273 -0.53638536  0.7219734 ]
[ 0.05201292 -0.20140794 -0.42176017 -0.54347056]
[0.6795765  0.4801408  0.33898544 0.07671895]
[-0.8326

[-0.89319354 -0.38680017  0.5050089  -0.35372475]
[-0.7785382   0.30216366 -0.07867738  0.43698615]
[-0.39930445  0.8449674  -0.62907296 -0.1398222 ]
[ 0.68923473  0.22059117  0.6261896  -0.44534567]
[-0.87620085 -0.20628907  0.2611731  -0.73787165]
[ 0.5641063  -0.08187351 -0.2446106  -0.32433817]
[-0.32059348 -0.8113762  -0.11125311  0.83053523]
[ 0.7809534   0.6214713  -0.7120398   0.73299515]
[ 0.81435335 -0.8607133  -0.39280957 -0.18713419]
[0.5104477  0.3375167  0.8880453  0.03374292]
[-0.29678458  0.00976466  0.5273079   0.8309325 ]
[ 0.95803267 -0.6041082   0.62384975  0.6860798 ]
[ 0.8820937  -0.797711    0.54576576  0.68667865]
[ 0.364152    0.06774659 -0.5059365  -0.912087  ]
[ 0.00147613  0.56365216  0.3148344  -0.76118076]
[-0.25274694  0.48293063 -0.55293036 -0.60012805]
[ 0.17459527  0.49717864 -0.91990703  0.11216138]
[0.7750177  0.7651678  0.69219714 0.5001782 ]
[0.10900382 0.06372924 0.53850484 0.3597948 ]
[-0.45613047 -0.23310693  0.37158513 -0.03677276]
[-0.5372499 

  0%|          | 5/2000 [01:54<12:42:02, 22.92s/it]

[-0.06910258 -0.5134864  -0.724423    0.9034041 ]





KeyboardInterrupt: 

In [None]:
from scipy.ndimage.filters import uniform_filter1d
arr = uniform_filter1d(rewards, size=1)

In [None]:
from matplotlib import pyplot as plt
x = []
y = []
for i,e in enumerate(arr):
    y.append(i)
    x.append(e)

In [None]:
plt.plot(y, x)

In [None]:
-np.prod(env.action_space.high.shape)