In [1]:
import torch
from torch import nn
import numpy as np
from torch.utils.tensorboard import SummaryWriter
from collections import deque
import random
import gym

In [2]:
scalar_writer = SummaryWriter('TD3')

In [3]:
class Critic(nn.Module):
    def __init__(self, obs_dim ,action_dim):
        super(Critic, self).__init__()
        self.l1 = nn.Linear(obs_dim + action_dim, 256)
        self.l2 = nn.Linear(256, 256)
        self.l3 = nn.Linear(256, 1)
        
    def forward(self, obs, action):
        sa = torch.cat([obs, action], dim=1)
        q = torch.relu(self.l1(sa))
        q = torch.relu(self.l2(q))
        q = self.l3(q)
        return q

In [4]:
class Actor(nn.Module):
    '''一般环境的a的上下界对称，所以直接forward输出对应环境的动作'''
    def __init__(self, obs_dim, action_dim, max_action):
        super(Actor, self).__init__()
        
        self.l1 = nn.Linear(obs_dim, 256)
        self.l2 = nn.Linear(256, 256)
        self.l3 = nn.Linear(256, action_dim)
        
        self.max_action = max_action
        
    def forward(self, obs):
        '''返回值直接乘上action的范围高度，得到可以用到环境的动作'''
        a = torch.relu(self.l1(obs))
        a = torch.relu(self.l2(a))
        a = torch.tanh(self.l3(a))
        
        return self.max_action * a

In [5]:
class BasicBuffer:

    def __init__(self, max_size):
        self.max_size = max_size
        self.buffer = deque(maxlen=max_size)

    def push(self, state, action, reward, next_state, done):
        experience = (state, action, np.array([reward]), next_state, done)
        self.buffer.append(experience)

    def sample(self, batch_size):
        state_batch = []
        action_batch = []
        reward_batch = []
        next_state_batch = []
        done_batch = []

        batch = random.sample(self.buffer, batch_size)

        for experience in batch:
            state, action, reward, next_state, done = experience
            state_batch.append(state)
            action_batch.append(action)
            reward_batch.append(reward)
            next_state_batch.append(next_state)
            done_batch.append(done)

        return (state_batch, action_batch, reward_batch, next_state_batch, done_batch)

In [6]:
def update_net(model, target_model, tau=1.):
    '''更新目标网络'''
    for tar_param, param in zip(target_model.parameters(), model.parameters()):
        tar_param.data.copy_(param.data * tau + tar_param.data * (1.0 - tau))

In [7]:
class TD3:
    def __init__(self, env, gamma, tau, buffer_maxlen, delay_step, noise_std, noise_bound, critic_lr, actor_lr):
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        
        self.env = env
        self.obs_dim = env.observation_space.shape[0]
        self.action_dim = env.action_space.shape[0]
        self.max_action = float(env.action_space.high[0])
        
        self.gamma = gamma
        self.tau = tau
        self.noise_std = noise_std
        self.noise_bound = noise_bound
        self.update_step = 0
        self.delay_step = delay_step
        
        # 初始化6个网络
        self.actor = Actor(self.obs_dim, self.action_dim, self.max_action).to(self.device)
        self.actor_target = Actor(self.obs_dim, self.action_dim, self.max_action).to(self.device)
        
        self.critic1 = Critic(self.obs_dim, self.action_dim).to(self.device)
        self.critic2 = Critic(self.obs_dim, self.action_dim).to(self.device)
        self.critic1_target = Critic(self.obs_dim, self.action_dim).to(self.device)
        self.critic2_target = Critic(self.obs_dim, self.action_dim).to(self.device)
        
        #  初始化目标网络的权重
        update_net(self.actor, self.actor_target, tau=1.)
        update_net(self.critic1, self.critic1_target, tau=1.)
        update_net(self.critic2, self.critic2_target, tau=1.)
        
        # 初始化优化器
        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), lr=actor_lr)
        self.critic1_optimizer = torch.optim.Adam(self.critic1.parameters(), lr=critic_lr)
        self.critic2_optimizer = torch.optim.Adam(self.critic2.parameters(), lr=critic_lr)
        
        # 设置一个mse函数
        self.loss_fn = torch.nn.MSELoss()
        
        # 初始化经验池
        self.replay_buffer = BasicBuffer(buffer_maxlen)
        
        # 初始化记录scalar的字典
        self.summaries = {}
        
    def get_action(self, obs):
        '''因为网络输出的直接是满足动作区间的动作，所以不需要rescale'''
        state = torch.FloatTensor(obs).unsqueeze(0).to(self.device)
        action = self.actor(state)
        action = action.squeeze(0).cpu().detach().numpy()
        return action
    
    def update(self, batch_size):
        '''更新网络'''
        state_batch, action_batch, reward_batch, next_state_batch, done_batch = self.replay_buffer.sample(batch_size)
        state_batch = torch.FloatTensor(state_batch).to(self.device)
        action_batch = torch.FloatTensor(action_batch).to(self.device)
        reward_batch = torch.FloatTensor(reward_batch).to(self.device)
        next_state_batch = torch.FloatTensor(next_state_batch).to(self.device)
        done_batch = torch.FloatTensor(done_batch).to(self.device)
        done_batch = done_batch.view(-1, 1)  # 转换成 （batchsize, 1） 的形状，为了下面的相乘
        
        action_noise = self.generate_noise(action_batch) # 产生一批和一批动作一样形状的高斯噪声
        # actions加过噪音要裁剪到目标范围，这里为何是next_state??论文和论文的代码不符合啊，可能就是个形式？还是要根据Q里的s或者s'
        actions_hat = (self.actor_target(next_state_batch) + action_noise).clamp(-self.max_action, self.max_action)
        next_q1 = self.critic1_target(next_state_batch, actions_hat)
        next_q2 = self.critic2_target(next_state_batch, actions_hat)
        min_next_q = torch.min(next_q1, next_q2)
        y = (reward_batch + (1.-done_batch) * self.gamma * min_next_q).detach()
        curr_q1 = self.critic1(state_batch, action_batch)
        curr_q2 = self.critic2(state_batch, action_batch)

        loss_critic1 = self.loss_fn(curr_q1, y)
        loss_critic2 = self.loss_fn(curr_q2, y)
        self.summaries['critic_loss'] = loss_critic1.detach().item()
        
        # 更新两个critic网络
        self.critic1_optimizer.zero_grad()
        self.critic2_optimizer.zero_grad()
        
        loss_critic1.backward()
        loss_critic2.backward()
        
        self.critic1_optimizer.step()
        self.critic2_optimizer.step()
        
        # 延迟更新策略网络和目标网络
        if self.update_step % self.delay_step == 0:
            actor_loss = -self.critic1(state_batch, self.actor(state_batch)).mean()
            self.summaries['actor_loss'] = actor_loss.detach().item()
            self.actor_optimizer.zero_grad()
            actor_loss.backward()
            self.actor_optimizer.step()
            
            # 更新目标网络
            update_net(self.actor, self.actor_target, tau=self.tau)
            update_net(self.critic1, self.critic1_target, tau=self.tau)
            update_net(self.critic2, self.critic2_target, tau=self.tau)
        
        self.update_step += 1
        
    def generate_noise(self, action_batch):
        '''对一批动作产生同样维度的噪声，用于探索'''
        # torch.normal(mean, std) 他们只要有一个有形状就行
        noise = torch.normal(mean=torch.zeros(action_batch.size()), std=self.noise_std)
        noise = torch.clamp(noise, -self.noise_bound, self.noise_bound).to(self.device)
        return noise

In [8]:
def train(env, agent, max_episodes, max_steps, batch_size, std_train=0.1, render=False):
    '''按照episode训练, 可能会浪费一些步数，刚开始每回合训练不满，
    优点：可以自定义每回合最高步数限制，对一些非回合制游戏有效
    缺点：如果设置了每回合最大目标步数，智能体就会被限制住，到了这个目标就不会再继续增长了
    '''
    total_steps = 0
    
    for episode in range(max_episodes):
        state = env.reset()
        episode_reward = 0
        
        for step in range(max_steps):
            if render:
                env.render()
                
            if total_steps < 25e3:
                action = env.action_space.sample()
            else:
                action = (agent.get_action(state) + np.random.normal(0, agent.max_action * std_train, size=agent.action_dim))
               
            next_state, reward, done, _ = env.step(action)
            agent.replay_buffer.push(state, action, reward, next_state, done)
            episode_reward += reward
            total_steps += 1
            
            if total_steps > 25e3:
                agent.update(batch_size)     
            
            if done or step == max_steps -1:
                print('totle_step {}, episode_reward {}'.format(total_steps, episode_reward))
                break
            state = next_state

In [9]:
def train_step(env, agent, max_steps, begin_steps, batch_size, std_train=0.1, render=False):
    '''按照所有步数进行训练：
    优点：每回合步数没有上限，一直会达到环境默认上限。
    缺点：如果环境本身没有最高步数限制返回done，会陷入死循环，对一些非回合制游戏无效'''
    state, done = env.reset(), False
    episode_steps = 0
    episode_reward = 0
    episode_num = 0
    
    for t in range(max_steps):
        episode_steps += 1
        if render:
                env.render()
        if t < begin_steps:
            action = env.action_space.sample()
        else:
            action = (agent.get_action(state) + np.random.normal(0, agent.max_action * std_train, size=agent.action_dim))        
        
        next_state, reward, done, _ = env.step(action)
        agent.replay_buffer.push(state, action, reward, next_state, done)
        episode_reward += reward
        state = next_state

        if t >= begin_steps:
            agent.update(batch_size)     
            scalar_writer.add_scalar('loss/actor_loss',agent.summaries['actor_loss'], t)
            scalar_writer.add_scalar('loss/critic_loss',agent.summaries['critic_loss'], t)

        if done:
            print('totle_step {},episode_num {}, episode_steps {}, episode_reward {}'.format(t+1, episode_num, episode_steps, episode_reward))
            scalar_writer.add_scalar('main/episode_reward', episode_reward, episode_num)
            scalar_writer.add_scalar('main/episode_steps', episode_steps, episode_num)
            #  重置各种东西
            state, done = env.reset(), False
            episode_reward = 0
            episode_steps = 0
            episode_num += 1

In [10]:
env = gym.make("Hopper-v2")
gamma = 0.99
tau = 0.005
noise_std = 0.2
bound = 0.5
delay_step = 2
buffer_maxlen = int(1e6)
critic_lr = 3e-4
actor_lr = 3e-4

agent = TD3(env, gamma, tau, buffer_maxlen, delay_step, noise_std, bound, critic_lr, actor_lr)

In [8]:
# 按照回合训练
max_episodes = 10000
max_steps = 1000
batch_size = 64
train(env, agent, max_episodes, max_steps, batch_size, render=False)

NameError: name 'env' is not defined

In [11]:
# 按照总步数训练
max_steps = int(1e6)
begin_steps = 25e3
batch_size = 64
train_step(env, agent, max_steps, begin_steps, batch_size, std_train=0.1, render=False)

totle_step 16,episode_num 0, episode_steps 16, episode_reward 9.061600552603435
totle_step 27,episode_num 1, episode_steps 11, episode_reward 7.922251309152312
totle_step 50,episode_num 2, episode_steps 23, episode_reward 20.860664669114918
totle_step 62,episode_num 3, episode_steps 12, episode_reward 9.003672784115937
totle_step 76,episode_num 4, episode_steps 14, episode_reward 9.434077701868338
totle_step 88,episode_num 5, episode_steps 12, episode_reward 10.46061525112865
totle_step 107,episode_num 6, episode_steps 19, episode_reward 7.857916006203048
totle_step 127,episode_num 7, episode_steps 20, episode_reward 12.942361662663517
totle_step 166,episode_num 8, episode_steps 39, episode_reward 24.287719212860704
totle_step 185,episode_num 9, episode_steps 19, episode_reward 12.506631883251126
totle_step 228,episode_num 10, episode_steps 43, episode_reward 44.780346606582505
totle_step 239,episode_num 11, episode_steps 11, episode_reward 9.432184929865413
totle_step 250,episode_num 

totle_step 2386,episode_num 111, episode_steps 19, episode_reward 13.730941200560348
totle_step 2398,episode_num 112, episode_steps 12, episode_reward 8.406569475240248
totle_step 2416,episode_num 113, episode_steps 18, episode_reward 14.06948967949845
totle_step 2455,episode_num 114, episode_steps 39, episode_reward 17.61438114385788
totle_step 2475,episode_num 115, episode_steps 20, episode_reward 10.689338646610526
totle_step 2493,episode_num 116, episode_steps 18, episode_reward 8.913600881421532
totle_step 2533,episode_num 117, episode_steps 40, episode_reward 51.137980005855766
totle_step 2547,episode_num 118, episode_steps 14, episode_reward 10.752407157183807
totle_step 2567,episode_num 119, episode_steps 20, episode_reward 7.461349354958761
totle_step 2579,episode_num 120, episode_steps 12, episode_reward 9.808098273518874
totle_step 2629,episode_num 121, episode_steps 50, episode_reward 53.39057974818009
totle_step 2654,episode_num 122, episode_steps 25, episode_reward 26.026

totle_step 4827,episode_num 219, episode_steps 22, episode_reward 11.856777363466204
totle_step 4840,episode_num 220, episode_steps 13, episode_reward 10.746452927638861
totle_step 4860,episode_num 221, episode_steps 20, episode_reward 19.252165576776683
totle_step 4872,episode_num 222, episode_steps 12, episode_reward 9.93180845783039
totle_step 4907,episode_num 223, episode_steps 35, episode_reward 8.278668458072666
totle_step 4921,episode_num 224, episode_steps 14, episode_reward 9.915188263982655
totle_step 4943,episode_num 225, episode_steps 22, episode_reward 16.136432615828756
totle_step 4977,episode_num 226, episode_steps 34, episode_reward 41.53138384530211
totle_step 5003,episode_num 227, episode_steps 26, episode_reward 18.75668634311825
totle_step 5030,episode_num 228, episode_steps 27, episode_reward 31.352132035949708
totle_step 5046,episode_num 229, episode_steps 16, episode_reward 11.834130580594888
totle_step 5066,episode_num 230, episode_steps 20, episode_reward 13.43

totle_step 7086,episode_num 320, episode_steps 25, episode_reward 12.819220294143705
totle_step 7106,episode_num 321, episode_steps 20, episode_reward 22.074941946675114
totle_step 7120,episode_num 322, episode_steps 14, episode_reward 13.182445096060682
totle_step 7185,episode_num 323, episode_steps 65, episode_reward 84.5091559952887
totle_step 7200,episode_num 324, episode_steps 15, episode_reward 9.819825415878574
totle_step 7211,episode_num 325, episode_steps 11, episode_reward 8.980892416035374
totle_step 7230,episode_num 326, episode_steps 19, episode_reward 12.312748640117896
totle_step 7250,episode_num 327, episode_steps 20, episode_reward 14.017418526640597
totle_step 7262,episode_num 328, episode_steps 12, episode_reward 8.600285354146905
totle_step 7315,episode_num 329, episode_steps 53, episode_reward 40.293119513668906
totle_step 7328,episode_num 330, episode_steps 13, episode_reward 10.800154977663503
totle_step 7337,episode_num 331, episode_steps 9, episode_reward 7.620

totle_step 9405,episode_num 428, episode_steps 10, episode_reward 7.666466993986746
totle_step 9429,episode_num 429, episode_steps 24, episode_reward 14.005753702948873
totle_step 9480,episode_num 430, episode_steps 51, episode_reward 53.99448184819993
totle_step 9492,episode_num 431, episode_steps 12, episode_reward 5.375083424535862
totle_step 9510,episode_num 432, episode_steps 18, episode_reward 9.654508776011102
totle_step 9526,episode_num 433, episode_steps 16, episode_reward 10.58428526788342
totle_step 9540,episode_num 434, episode_steps 14, episode_reward 6.472599505910956
totle_step 9553,episode_num 435, episode_steps 13, episode_reward 7.110247127427456
totle_step 9590,episode_num 436, episode_steps 37, episode_reward 51.512153578808935
totle_step 9623,episode_num 437, episode_steps 33, episode_reward 34.86075994501852
totle_step 9642,episode_num 438, episode_steps 19, episode_reward 11.10545193771966
totle_step 9685,episode_num 439, episode_steps 43, episode_reward 51.58230

totle_step 11922,episode_num 539, episode_steps 15, episode_reward 8.565140710408249
totle_step 11955,episode_num 540, episode_steps 33, episode_reward 25.90606211021133
totle_step 11985,episode_num 541, episode_steps 30, episode_reward 26.912925977503267
totle_step 12017,episode_num 542, episode_steps 32, episode_reward 13.015271339040563
totle_step 12032,episode_num 543, episode_steps 15, episode_reward 11.365276717201228
totle_step 12047,episode_num 544, episode_steps 15, episode_reward 10.983961025716729
totle_step 12076,episode_num 545, episode_steps 29, episode_reward 33.89976247065478
totle_step 12098,episode_num 546, episode_steps 22, episode_reward 23.088133870953126
totle_step 12114,episode_num 547, episode_steps 16, episode_reward 9.812274518442655
totle_step 12147,episode_num 548, episode_steps 33, episode_reward 28.928406783161932
totle_step 12175,episode_num 549, episode_steps 28, episode_reward 30.867145569493367
totle_step 12204,episode_num 550, episode_steps 29, episod

totle_step 14527,episode_num 654, episode_steps 14, episode_reward 12.7918592206383
totle_step 14544,episode_num 655, episode_steps 17, episode_reward 16.260084166425194
totle_step 14557,episode_num 656, episode_steps 13, episode_reward 10.600945169474363
totle_step 14573,episode_num 657, episode_steps 16, episode_reward 6.6355285903726156
totle_step 14591,episode_num 658, episode_steps 18, episode_reward 11.697980399036913
totle_step 14618,episode_num 659, episode_steps 27, episode_reward 32.22626633375488
totle_step 14635,episode_num 660, episode_steps 17, episode_reward 11.250090607258063
totle_step 14655,episode_num 661, episode_steps 20, episode_reward 25.657917699470964
totle_step 14680,episode_num 662, episode_steps 25, episode_reward 26.11718212241961
totle_step 14691,episode_num 663, episode_steps 11, episode_reward 8.89443218717479
totle_step 14708,episode_num 664, episode_steps 17, episode_reward 12.629606564573773
totle_step 14728,episode_num 665, episode_steps 20, episode_

totle_step 17052,episode_num 762, episode_steps 13, episode_reward 9.522108867355067
totle_step 17063,episode_num 763, episode_steps 11, episode_reward 9.146057125082661
totle_step 17087,episode_num 764, episode_steps 24, episode_reward 15.051558088462087
totle_step 17102,episode_num 765, episode_steps 15, episode_reward 13.58985463129855
totle_step 17135,episode_num 766, episode_steps 33, episode_reward 27.692488600472526
totle_step 17167,episode_num 767, episode_steps 32, episode_reward 40.47508472454749
totle_step 17183,episode_num 768, episode_steps 16, episode_reward 8.476100980397096
totle_step 17242,episode_num 769, episode_steps 59, episode_reward 91.45679233713331
totle_step 17254,episode_num 770, episode_steps 12, episode_reward 10.950317226147403
totle_step 17270,episode_num 771, episode_steps 16, episode_reward 15.78370592880397
totle_step 17286,episode_num 772, episode_steps 16, episode_reward 13.458746889266651
totle_step 17303,episode_num 773, episode_steps 17, episode_r

totle_step 19488,episode_num 882, episode_steps 12, episode_reward 11.238981295889316
totle_step 19512,episode_num 883, episode_steps 24, episode_reward 17.6736867232986
totle_step 19521,episode_num 884, episode_steps 9, episode_reward 6.315569392675824
totle_step 19561,episode_num 885, episode_steps 40, episode_reward 35.34471785248635
totle_step 19587,episode_num 886, episode_steps 26, episode_reward 27.0296353792383
totle_step 19603,episode_num 887, episode_steps 16, episode_reward 11.852669084651428
totle_step 19619,episode_num 888, episode_steps 16, episode_reward 12.014636289079009
totle_step 19636,episode_num 889, episode_steps 17, episode_reward 12.989333273164872
totle_step 19649,episode_num 890, episode_steps 13, episode_reward 9.128278101911377
totle_step 19662,episode_num 891, episode_steps 13, episode_reward 8.929056392390327
totle_step 19695,episode_num 892, episode_steps 33, episode_reward 35.65352767745443
totle_step 19718,episode_num 893, episode_steps 23, episode_rewa

totle_step 21615,episode_num 991, episode_steps 18, episode_reward 13.046569828423289
totle_step 21628,episode_num 992, episode_steps 13, episode_reward 13.032941542236223
totle_step 21636,episode_num 993, episode_steps 8, episode_reward 5.150711574562196
totle_step 21649,episode_num 994, episode_steps 13, episode_reward 10.345982997152465
totle_step 21681,episode_num 995, episode_steps 32, episode_reward 15.590115607049084
totle_step 21700,episode_num 996, episode_steps 19, episode_reward 16.098900800664218
totle_step 21725,episode_num 997, episode_steps 25, episode_reward 26.095938159357523
totle_step 21740,episode_num 998, episode_steps 15, episode_reward 10.474132220131699
totle_step 21769,episode_num 999, episode_steps 29, episode_reward 16.96862187457151
totle_step 21798,episode_num 1000, episode_steps 29, episode_reward 13.05028637573932
totle_step 21820,episode_num 1001, episode_steps 22, episode_reward 17.67980836240885
totle_step 21854,episode_num 1002, episode_steps 34, epis

totle_step 24192,episode_num 1095, episode_steps 83, episode_reward 160.42104251187746
totle_step 24210,episode_num 1096, episode_steps 18, episode_reward 15.644446894811471
totle_step 24227,episode_num 1097, episode_steps 17, episode_reward 11.321096911242048
totle_step 24243,episode_num 1098, episode_steps 16, episode_reward 10.139171006496808
totle_step 24295,episode_num 1099, episode_steps 52, episode_reward 55.052885656926186
totle_step 24330,episode_num 1100, episode_steps 35, episode_reward 39.56918343162467
totle_step 24373,episode_num 1101, episode_steps 43, episode_reward 32.49754858188904
totle_step 24387,episode_num 1102, episode_steps 14, episode_reward 12.10911659284377
totle_step 24398,episode_num 1103, episode_steps 11, episode_reward 10.081128385411379
totle_step 24411,episode_num 1104, episode_steps 13, episode_reward 8.92217263895086
totle_step 24428,episode_num 1105, episode_steps 17, episode_reward 16.571346449549996
totle_step 24444,episode_num 1106, episode_steps

totle_step 26273,episode_num 1190, episode_steps 86, episode_reward 167.32844775991123
totle_step 26358,episode_num 1191, episode_steps 85, episode_reward 162.15908150168056
totle_step 26444,episode_num 1192, episode_steps 86, episode_reward 168.4897919301917
totle_step 26530,episode_num 1193, episode_steps 86, episode_reward 165.10916142256795
totle_step 26615,episode_num 1194, episode_steps 85, episode_reward 167.48638884216894
totle_step 26677,episode_num 1195, episode_steps 62, episode_reward 114.2708483540667
totle_step 26742,episode_num 1196, episode_steps 65, episode_reward 118.64706662527792
totle_step 26802,episode_num 1197, episode_steps 60, episode_reward 111.61457818438504
totle_step 26870,episode_num 1198, episode_steps 68, episode_reward 125.60008764004154
totle_step 26956,episode_num 1199, episode_steps 86, episode_reward 173.58793918004343
totle_step 27023,episode_num 1200, episode_steps 67, episode_reward 122.81328268644114
totle_step 27089,episode_num 1201, episode_st

totle_step 33102,episode_num 1285, episode_steps 85, episode_reward 180.66108790569075
totle_step 33187,episode_num 1286, episode_steps 85, episode_reward 178.0634968193544
totle_step 33274,episode_num 1287, episode_steps 87, episode_reward 170.71437437035777
totle_step 33360,episode_num 1288, episode_steps 86, episode_reward 173.2000388941608
totle_step 33448,episode_num 1289, episode_steps 88, episode_reward 174.49922072688895
totle_step 33535,episode_num 1290, episode_steps 87, episode_reward 173.09919830724292
totle_step 33621,episode_num 1291, episode_steps 86, episode_reward 172.15385885840968
totle_step 33707,episode_num 1292, episode_steps 86, episode_reward 176.22876691665184
totle_step 33794,episode_num 1293, episode_steps 87, episode_reward 171.58290334287034
totle_step 33880,episode_num 1294, episode_steps 86, episode_reward 176.0848429247641
totle_step 33965,episode_num 1295, episode_steps 85, episode_reward 173.14564706097207
totle_step 34051,episode_num 1296, episode_ste

totle_step 41619,episode_num 1380, episode_steps 91, episode_reward 190.21960959731143
totle_step 41697,episode_num 1381, episode_steps 78, episode_reward 168.929400270918
totle_step 41777,episode_num 1382, episode_steps 80, episode_reward 173.52540021116155
totle_step 41855,episode_num 1383, episode_steps 78, episode_reward 167.84092644226195
totle_step 41943,episode_num 1384, episode_steps 88, episode_reward 183.94481497107734
totle_step 42021,episode_num 1385, episode_steps 78, episode_reward 172.2840493392513
totle_step 42102,episode_num 1386, episode_steps 81, episode_reward 175.4911573537981
totle_step 42198,episode_num 1387, episode_steps 96, episode_reward 197.94521421711195
totle_step 42291,episode_num 1388, episode_steps 93, episode_reward 192.69921609527407
totle_step 42370,episode_num 1389, episode_steps 79, episode_reward 172.99436970119407
totle_step 42460,episode_num 1390, episode_steps 90, episode_reward 185.72929523508392
totle_step 42549,episode_num 1391, episode_step

totle_step 52777,episode_num 1475, episode_steps 167, episode_reward 478.1267329916435
totle_step 53021,episode_num 1476, episode_steps 244, episode_reward 621.2622705065406
totle_step 53270,episode_num 1477, episode_steps 249, episode_reward 667.6500530169675
totle_step 53513,episode_num 1478, episode_steps 243, episode_reward 620.9520605709745
totle_step 53810,episode_num 1479, episode_steps 297, episode_reward 850.2228738276107
totle_step 54099,episode_num 1480, episode_steps 289, episode_reward 736.2008066830267
totle_step 54391,episode_num 1481, episode_steps 292, episode_reward 835.0854263843119
totle_step 54674,episode_num 1482, episode_steps 283, episode_reward 712.3311998975582
totle_step 54807,episode_num 1483, episode_steps 133, episode_reward 304.24669881968214
totle_step 55061,episode_num 1484, episode_steps 254, episode_reward 676.1976906430988
totle_step 55281,episode_num 1485, episode_steps 220, episode_reward 509.71744112419924
totle_step 55527,episode_num 1486, episod

totle_step 68282,episode_num 1569, episode_steps 136, episode_reward 319.4919154578469
totle_step 68477,episode_num 1570, episode_steps 195, episode_reward 397.92449022385097
totle_step 68613,episode_num 1571, episode_steps 136, episode_reward 308.0516986608512
totle_step 68766,episode_num 1572, episode_steps 153, episode_reward 328.7007197684727
totle_step 68920,episode_num 1573, episode_steps 154, episode_reward 322.98617591632774
totle_step 69062,episode_num 1574, episode_steps 142, episode_reward 327.03642344929085
totle_step 69185,episode_num 1575, episode_steps 123, episode_reward 286.022020518254
totle_step 69307,episode_num 1576, episode_steps 122, episode_reward 283.46661710163596
totle_step 69505,episode_num 1577, episode_steps 198, episode_reward 486.53520834558304
totle_step 69632,episode_num 1578, episode_steps 127, episode_reward 298.87847575918573
totle_step 69774,episode_num 1579, episode_steps 142, episode_reward 301.983436058646
totle_step 69901,episode_num 1580, epis

totle_step 82572,episode_num 1663, episode_steps 155, episode_reward 422.3349960486446
totle_step 82708,episode_num 1664, episode_steps 136, episode_reward 306.43248859243204
totle_step 82849,episode_num 1665, episode_steps 141, episode_reward 351.6447647097959
totle_step 82993,episode_num 1666, episode_steps 144, episode_reward 341.026865342936
totle_step 83133,episode_num 1667, episode_steps 140, episode_reward 313.8046326472272
totle_step 83266,episode_num 1668, episode_steps 133, episode_reward 330.09806448108816
totle_step 83408,episode_num 1669, episode_steps 142, episode_reward 367.2464122083877
totle_step 83552,episode_num 1670, episode_steps 144, episode_reward 383.1483576635874
totle_step 83707,episode_num 1671, episode_steps 155, episode_reward 391.08814784662457
totle_step 83840,episode_num 1672, episode_steps 133, episode_reward 399.00253112404977
totle_step 83974,episode_num 1673, episode_steps 134, episode_reward 298.38299475611893
totle_step 84111,episode_num 1674, epis

totle_step 100684,episode_num 1757, episode_steps 156, episode_reward 334.00848086861134
totle_step 100848,episode_num 1758, episode_steps 164, episode_reward 421.73856897835316
totle_step 100967,episode_num 1759, episode_steps 119, episode_reward 271.7164831781837
totle_step 101189,episode_num 1760, episode_steps 222, episode_reward 532.6770825359366
totle_step 101483,episode_num 1761, episode_steps 294, episode_reward 610.2735864110077
totle_step 101780,episode_num 1762, episode_steps 297, episode_reward 597.7487717259945
totle_step 101895,episode_num 1763, episode_steps 115, episode_reward 258.6607974562352
totle_step 102125,episode_num 1764, episode_steps 230, episode_reward 540.0568254239292
totle_step 102191,episode_num 1765, episode_steps 66, episode_reward 126.27913460360595
totle_step 102349,episode_num 1766, episode_steps 158, episode_reward 387.9203934898652
totle_step 102519,episode_num 1767, episode_steps 170, episode_reward 444.43423527903605
totle_step 102675,episode_num

totle_step 117149,episode_num 1850, episode_steps 153, episode_reward 451.54258027789683
totle_step 117309,episode_num 1851, episode_steps 160, episode_reward 481.74697271970007
totle_step 117445,episode_num 1852, episode_steps 136, episode_reward 327.58318276688516
totle_step 117581,episode_num 1853, episode_steps 136, episode_reward 308.2585724542196
totle_step 117781,episode_num 1854, episode_steps 200, episode_reward 585.9211265843329
totle_step 117970,episode_num 1855, episode_steps 189, episode_reward 579.9281961614438
totle_step 118105,episode_num 1856, episode_steps 135, episode_reward 322.70756864350875
totle_step 118291,episode_num 1857, episode_steps 186, episode_reward 607.6135238513167
totle_step 118462,episode_num 1858, episode_steps 171, episode_reward 591.9588878580253
totle_step 118655,episode_num 1859, episode_steps 193, episode_reward 554.7154889789695
totle_step 118844,episode_num 1860, episode_steps 189, episode_reward 663.7849567140131
totle_step 119035,episode_nu

totle_step 132919,episode_num 1944, episode_steps 91, episode_reward 127.32508252640704
totle_step 132979,episode_num 1945, episode_steps 60, episode_reward 74.15750259393364
totle_step 133233,episode_num 1946, episode_steps 254, episode_reward 421.6074802599517
totle_step 133496,episode_num 1947, episode_steps 263, episode_reward 376.6726819231606
totle_step 133602,episode_num 1948, episode_steps 106, episode_reward 161.82197817499198
totle_step 133709,episode_num 1949, episode_steps 107, episode_reward 154.29341816004742
totle_step 133914,episode_num 1950, episode_steps 205, episode_reward 277.1773590194297
totle_step 134351,episode_num 1951, episode_steps 437, episode_reward 943.6775126842476
totle_step 134468,episode_num 1952, episode_steps 117, episode_reward 176.8763665863165
totle_step 134787,episode_num 1953, episode_steps 319, episode_reward 746.3994670686627
totle_step 134913,episode_num 1954, episode_steps 126, episode_reward 213.69907058422152
totle_step 135071,episode_num 

totle_step 159027,episode_num 2037, episode_steps 258, episode_reward 724.6789977262745
totle_step 159268,episode_num 2038, episode_steps 241, episode_reward 608.9966532407198
totle_step 159473,episode_num 2039, episode_steps 205, episode_reward 590.6782804565238
totle_step 159682,episode_num 2040, episode_steps 209, episode_reward 680.8084506174712
totle_step 159912,episode_num 2041, episode_steps 230, episode_reward 662.1543545004187
totle_step 160233,episode_num 2042, episode_steps 321, episode_reward 912.1456404482722
totle_step 160392,episode_num 2043, episode_steps 159, episode_reward 416.71143947016475
totle_step 160741,episode_num 2044, episode_steps 349, episode_reward 952.9453149667498
totle_step 161075,episode_num 2045, episode_steps 334, episode_reward 970.9868477385339
totle_step 161367,episode_num 2046, episode_steps 292, episode_reward 859.2979775742697
totle_step 161500,episode_num 2047, episode_steps 133, episode_reward 340.20703186114145
totle_step 161759,episode_num 

totle_step 194010,episode_num 2130, episode_steps 216, episode_reward 481.2609783684249
totle_step 194342,episode_num 2131, episode_steps 332, episode_reward 989.9550821820222
totle_step 195269,episode_num 2132, episode_steps 927, episode_reward 2740.093935180458
totle_step 196269,episode_num 2133, episode_steps 1000, episode_reward 2993.097676385822
totle_step 196677,episode_num 2134, episode_steps 408, episode_reward 1273.2166820907316
totle_step 197000,episode_num 2135, episode_steps 323, episode_reward 934.3264824088135
totle_step 197435,episode_num 2136, episode_steps 435, episode_reward 1198.1125204359553
totle_step 198435,episode_num 2137, episode_steps 1000, episode_reward 2610.968516812707
totle_step 199435,episode_num 2138, episode_steps 1000, episode_reward 2837.3209814557763
totle_step 200435,episode_num 2139, episode_steps 1000, episode_reward 2749.590228800243
totle_step 200697,episode_num 2140, episode_steps 262, episode_reward 756.0979714638662
totle_step 200976,episode

totle_step 241193,episode_num 2223, episode_steps 552, episode_reward 1699.273358504269
totle_step 241498,episode_num 2224, episode_steps 305, episode_reward 912.7585231319531
totle_step 241700,episode_num 2225, episode_steps 202, episode_reward 502.9773316530705
totle_step 241946,episode_num 2226, episode_steps 246, episode_reward 649.9829217419405
totle_step 242293,episode_num 2227, episode_steps 347, episode_reward 1153.5674065156277
totle_step 242707,episode_num 2228, episode_steps 414, episode_reward 1326.7846296391413
totle_step 242956,episode_num 2229, episode_steps 249, episode_reward 721.7522965442834
totle_step 243220,episode_num 2230, episode_steps 264, episode_reward 789.5362713245379
totle_step 243458,episode_num 2231, episode_steps 238, episode_reward 663.2669803709748
totle_step 243830,episode_num 2232, episode_steps 372, episode_reward 1055.2211477048338
totle_step 244177,episode_num 2233, episode_steps 347, episode_reward 1158.9679818993175
totle_step 245089,episode_nu

totle_step 296277,episode_num 2316, episode_steps 599, episode_reward 2059.593329767755
totle_step 296784,episode_num 2317, episode_steps 507, episode_reward 1803.3689675834212
totle_step 297279,episode_num 2318, episode_steps 495, episode_reward 1841.949877016455
totle_step 298253,episode_num 2319, episode_steps 974, episode_reward 3500.9917995429823
totle_step 298653,episode_num 2320, episode_steps 400, episode_reward 1455.901932564338
totle_step 299147,episode_num 2321, episode_steps 494, episode_reward 1714.2692199653582
totle_step 299566,episode_num 2322, episode_steps 419, episode_reward 1512.8452307567645
totle_step 300168,episode_num 2323, episode_steps 602, episode_reward 2191.3474558419184
totle_step 301168,episode_num 2324, episode_steps 1000, episode_reward 3409.6879300677306
totle_step 301694,episode_num 2325, episode_steps 526, episode_reward 1895.3075001688655
totle_step 302123,episode_num 2326, episode_steps 429, episode_reward 1527.7027464433918
totle_step 302465,episo

totle_step 337194,episode_num 2409, episode_steps 339, episode_reward 1115.1771500645734
totle_step 337656,episode_num 2410, episode_steps 462, episode_reward 1609.4574253360909
totle_step 338140,episode_num 2411, episode_steps 484, episode_reward 1698.448816740749
totle_step 338873,episode_num 2412, episode_steps 733, episode_reward 2453.9658169795325
totle_step 339873,episode_num 2413, episode_steps 1000, episode_reward 3070.3867228921517
totle_step 340749,episode_num 2414, episode_steps 876, episode_reward 3021.765644108345
totle_step 341749,episode_num 2415, episode_steps 1000, episode_reward 3211.4641640627447
totle_step 342749,episode_num 2416, episode_steps 1000, episode_reward 3197.6569033715014
totle_step 343749,episode_num 2417, episode_steps 1000, episode_reward 3300.0229055981026
totle_step 344200,episode_num 2418, episode_steps 451, episode_reward 1494.7409098056726
totle_step 344716,episode_num 2419, episode_steps 516, episode_reward 1852.9679609658247
totle_step 345053,e

totle_step 394702,episode_num 2502, episode_steps 473, episode_reward 1678.270166177104
totle_step 395211,episode_num 2503, episode_steps 509, episode_reward 1728.7075206126706
totle_step 395817,episode_num 2504, episode_steps 606, episode_reward 2106.088497586586
totle_step 396317,episode_num 2505, episode_steps 500, episode_reward 1809.4252946020617
totle_step 396817,episode_num 2506, episode_steps 500, episode_reward 1834.444218681222
totle_step 397301,episode_num 2507, episode_steps 484, episode_reward 1749.3817007064429
totle_step 397671,episode_num 2508, episode_steps 370, episode_reward 1332.5364412665074
totle_step 398032,episode_num 2509, episode_steps 361, episode_reward 1298.667957884442
totle_step 398390,episode_num 2510, episode_steps 358, episode_reward 1274.9105286519225
totle_step 398803,episode_num 2511, episode_steps 413, episode_reward 1527.8645983831955
totle_step 399147,episode_num 2512, episode_steps 344, episode_reward 1222.651497433952
totle_step 399586,episode_

totle_step 438167,episode_num 2595, episode_steps 684, episode_reward 2434.744520947209
totle_step 438488,episode_num 2596, episode_steps 321, episode_reward 1129.6016417022481
totle_step 439013,episode_num 2597, episode_steps 525, episode_reward 1811.906675242438
totle_step 439503,episode_num 2598, episode_steps 490, episode_reward 1743.6251044054707
totle_step 439982,episode_num 2599, episode_steps 479, episode_reward 1671.6828939389766
totle_step 440430,episode_num 2600, episode_steps 448, episode_reward 1605.0001718269873
totle_step 440772,episode_num 2601, episode_steps 342, episode_reward 1186.8974452886566
totle_step 441114,episode_num 2602, episode_steps 342, episode_reward 1184.0226714994158
totle_step 441524,episode_num 2603, episode_steps 410, episode_reward 1469.4429909244097
totle_step 442049,episode_num 2604, episode_steps 525, episode_reward 1813.5442047387298
totle_step 442515,episode_num 2605, episode_steps 466, episode_reward 1618.6597165368235
totle_step 442888,episo

KeyboardInterrupt: 

In [12]:
with torch.no_grad():
    for i in range(100):
        state = env.reset()
        for j in range(1000):
            env.render()
            action = agent.get_action(np.array(state))
            print(action)
            next_state, reward, done, _ = env.step(action)
            if done:
                break
            state = next_state

Creating window glfw
[ 0.9899661  -0.14495094  0.9332513 ]
[0.99459136 0.3495038  0.20310341]
[0.9473573  0.08097157 0.18381123]
[0.9274553  0.03887747 0.27504626]
[0.9345606  0.09861897 0.34394315]
[0.9318877  0.15547793 0.5584015 ]
[0.9554483  0.2897263  0.62621194]
[0.9818423  0.58079964 0.5417278 ]
[ 0.9829051  0.325816  -0.1919474]
[ 0.64135474 -0.85198873 -0.87710196]
[ 0.6205839  -0.42334497 -0.16587439]
[0.8439528  0.05466815 0.5410438 ]
[0.5892363  0.05052556 0.41719785]
[0.35191306 0.13786589 0.62987715]
[-0.00234661  0.11559023  0.78804463]
[-0.31720573 -0.5477168   0.8707153 ]
[-0.9412783  -0.9638999   0.97850466]
[-0.8577042 -0.7939994  0.9282733]
[ 0.9935231   0.99781317 -0.79827744]
[0.9561082  0.31398773 0.6670681 ]
[ 0.7442811  -0.24355039  0.62405336]
[ 0.12883195  0.8534302  -0.89522   ]
[ 0.07959979 -0.28315622 -0.17102793]
[ 0.6535372   0.41315874 -0.7786634 ]
[-0.75203615  0.5457347  -0.9173735 ]
[0.45114046 0.48445803 0.04518583]
[-0.8440358  0.555085   0.693939 

[ 0.16243544 -0.02100173 -0.8942793 ]
[ 0.07914218 -0.06550665 -0.8881821 ]
[-0.28110108  0.7118585  -0.92766315]
[-0.2713953 -0.7005748 -0.977583 ]
[ 0.480889  -0.7068603 -0.9999679]
[ 0.42581332 -0.99172354 -1.        ]
[ 0.88353163 -0.9985934  -1.        ]
[ 0.30630586 -0.9821078  -1.        ]
[ 0.06760309 -0.86186534 -1.        ]
[ 0.27496612 -0.7823469  -1.        ]
[ 0.06049647 -0.10947889 -1.        ]
[ 0.2574385   0.42407382 -1.        ]
[ 0.380596  -0.0232765 -1.       ]
[ 0.462103   -0.18511006 -1.        ]
[ 0.22267018  0.07469459 -1.        ]
[-0.2979545  0.5986664 -0.9999952]
[-0.42756954  0.86397564 -0.9982948 ]
[-0.29791543  0.9245457  -0.54045856]
[0.6634567  0.9220532  0.99461836]
[0.99987435 0.11792442 0.9999726 ]
[0.99664557 0.9924521  0.85191596]
[ 0.9609363   0.99541444 -0.8919026 ]
[ 0.9570158  0.9941655 -0.8988815]
[ 0.8760384   0.9861432  -0.85877603]
[ 0.5184937  0.9712567 -0.8954794]
[-0.5142108   0.7409102  -0.49048525]
[-0.5631159   0.43610853  0.4243001 ]
[

[ 0.30969617 -0.12789705  0.05980837]
[ 0.41556048  0.60562956 -0.7784822 ]
[-0.16578707 -0.01162759 -0.74420124]
[ 0.23029912  0.46412048 -0.33932573]
[ 0.39526537  0.38281807 -0.14393802]
[0.8758961  0.39752716 0.30299476]
[ 0.897558    0.1840768  -0.24501486]
[0.7439791  0.27352166 0.8576778 ]
[0.1073231 0.6073583 0.9593493]
[-0.56949437  0.729177    0.9696718 ]
[-0.24811198  0.78439444  0.9082147 ]
[-0.00599113  0.41926283  0.9000216 ]
[-0.22856916  0.17861535  0.9604033 ]
[-0.057677   -0.04554025  0.94982976]
[-0.07592941 -0.2450145   0.93175733]
[-0.051658   -0.13411352  0.87489814]
[-0.05340079 -0.04135867  0.8226163 ]
[-0.06158979 -0.00973548  0.78719175]
[-0.01030842 -0.05166904  0.7752992 ]
[ 0.03649998 -0.0224153   0.8036744 ]
[0.06493551 0.07795768 0.7844123 ]
[0.09210487 0.08523491 0.79504395]
[0.1379483  0.07695375 0.8177431 ]
[0.11373544 0.02775656 0.8384055 ]
[0.11559176 0.00843816 0.86104566]
[ 0.09721688 -0.00542466  0.8847084 ]
[0.1334925  0.03137814 0.9108145 ]
[0.1

[ 0.3473112   0.9868327  -0.98171324]
[ 0.06759241  0.9602369  -0.9939208 ]
[-0.6472823   0.88153464  0.49636772]
[0.0782948  0.93290734 0.9506799 ]
[-0.35692966  0.7835199   0.9853169 ]
[0.11028793 0.7434854  0.9888699 ]
[0.04650659 0.66880065 0.9875318 ]
[0.09288672 0.662301   0.98397905]
[0.06523298 0.6143562  0.9807122 ]
[-0.00974149  0.5957327   0.97816294]
[0.01001678 0.60930705 0.9758923 ]
[0.00475713 0.6276864  0.9724997 ]
[0.00303074 0.6434715  0.97008264]
[-0.02060504  0.6742153   0.96705455]
[-0.00956051  0.6983206   0.9655654 ]
[0.0059951  0.69531715 0.9645973 ]
[-0.00584394  0.68281245  0.9626408 ]
[0.01103035 0.692967   0.9568874 ]
[0.0035007 0.7109414 0.951526 ]
[0.00974652 0.72375494 0.9460841 ]
[0.01243031 0.73647165 0.9402071 ]
[0.01425225 0.74943453 0.9326675 ]
[0.01035058 0.7627295  0.9162645 ]
[0.00513797 0.776598   0.8960478 ]
[0.01286803 0.80929834 0.8765523 ]
[0.01895602 0.84213823 0.85286385]
[0.03871215 0.86862123 0.82961977]
[0.06032045 0.88948536 0.8123047 ]

[ 0.02282993 -0.04620621  0.82484406]
[-0.00279692 -0.03765415  0.82279783]
[0.00997971 0.04140148 0.8044106 ]
[0.0231387  0.11028549 0.7879626 ]
[0.01027174 0.15093778 0.7941519 ]
[0.09783807 0.2526764  0.8224278 ]
[0.23381032 0.2749034  0.8460034 ]
[0.3060881  0.03764879 0.9281324 ]
[ 0.36201355 -0.26191458  0.9768137 ]
[ 0.2639721  -0.40066242  0.981676  ]
[ 0.03547498 -0.15867779  0.9822872 ]
[-0.05502898  0.06273274  0.97981054]
[0.05142549 0.1840374  0.980273  ]
[0.14724727 0.08923349 0.9871507 ]
[0.08673579 0.01838779 0.9923314 ]
[0.0387056  0.07717563 0.9910893 ]
[-0.03271155  0.15993002  0.98812073]
[-0.06291738  0.26786822  0.977479  ]
[-0.08648191  0.31925693  0.9227518 ]
[-0.23362067  0.29816267  0.60622096]
[-0.307845    0.55241704 -0.29336578]
[-0.16132769  0.2587617  -0.83917534]
[ 0.15383938  0.05414972 -0.7610927 ]
[-0.2028813   0.3909345  -0.25431776]
[-0.44818276  0.38846028 -0.6041432 ]
[-0.31014523  0.5341825  -0.66218877]
[-0.15199448  0.1366645  -0.31843624]
[-0.

[-0.01552948  0.53535223  0.9081813 ]
[-0.02534274  0.53473985  0.90611166]
[-0.04022458  0.5582676   0.9042373 ]
[-0.04650496  0.5844555   0.90132457]
[-0.02445014  0.60639757  0.8990315 ]
[-0.02612362  0.6187051   0.8871234 ]
[0.00265316 0.6270939  0.87231183]
[0.00126257 0.62710375 0.85893184]
[-0.02180758  0.6384815   0.8454613 ]
[-0.02381822  0.67367244  0.83294535]
[-0.01688787  0.69777286  0.82340723]
[-0.01505495  0.72108287  0.79359436]
[0.000947   0.77202535 0.7515735 ]
[-0.01074441  0.8238818   0.71053934]
[-0.00902209  0.86695844  0.65740484]
[-1.20174605e-04  9.03572083e-01  5.78564763e-01]
[0.02309828 0.9306844  0.4961246 ]
[0.0539863  0.94890386 0.43283555]
[0.12052159 0.9627612  0.1334044 ]
[ 0.11573666  0.97240406 -0.19757636]
[-9.7034517e-04  9.7695005e-01 -4.8735830e-01]
[ 0.02127811  0.98002183 -0.6607096 ]
[ 0.15223931  0.98698336 -0.8216002 ]
[ 0.44435358  0.9948245  -0.7563218 ]
[ 0.44051704  0.9941725  -0.3453561 ]
[-0.17551304  0.98658377 -0.63358134]
[-0.29554

[0.05451005 0.01327647 0.8366046 ]
[-0.01790044 -0.01532936  0.8389199 ]
[-0.00639029  0.04392721  0.8255492 ]
[0.02008623 0.10125437 0.81580985]
[0.08078721 0.2149494  0.83641154]
[0.16240524 0.29343048 0.85483426]
[0.30198273 0.169036   0.9193979 ]
[ 0.4664354  -0.23065172  0.97875345]
[ 0.3774409 -0.5337087  0.9886541]
[-0.01617027 -0.29342154  0.9849771 ]
[-0.1307839   0.1101684   0.97879034]
[0.04248726 0.33453727 0.97921854]
[0.15989989 0.1544788  0.99045396]
[0.10005668 0.00447098 0.9945415 ]
[-0.02541154  0.07373721  0.99215347]
[-0.06492379  0.27457738  0.9880125 ]
[-0.05391117  0.32763097  0.96285284]
[-0.16953434  0.2653057   0.7962147 ]
[-0.28874478  0.42631647  0.18383041]
[-0.42110354  0.37077636 -0.7417237 ]
[ 0.07102532  0.18724325 -0.8935524 ]
[ 0.0378612   0.29276443 -0.5232505 ]
[-0.32795608  0.42390507 -0.26471037]
[-0.45752275  0.44952312 -0.70407444]
[-0.09957737  0.29143804 -0.5955717 ]
[-0.12029427 -0.26923987 -0.18027115]
[-0.18698148 -0.578556   -0.7357601 ]
[

[0.04972748 0.8133277  0.9828953 ]
[0.06677118 0.73590994 0.9841303 ]
[0.07277429 0.68873066 0.9822019 ]
[0.05721903 0.635463   0.9788388 ]
[-0.01174698  0.60280365  0.9763815 ]
[0.00203174 0.61195624 0.9736625 ]
[0.00174628 0.63341993 0.97023904]
[-5.520996e-04  6.470662e-01  9.676694e-01]
[-0.00438189  0.6590049   0.96566737]
[-0.02490655  0.68434566  0.9634339 ]
[0.00434478 0.6894908  0.962629  ]
[-0.00255167  0.6781763   0.9615343 ]
[0.00434179 0.68196356 0.9574031 ]
[0.00612579 0.69931394 0.9511001 ]
[0.0095741  0.71256584 0.94552904]
[0.00908289 0.7255746  0.93970686]
[0.00688763 0.7383721  0.93356514]
[0.00691541 0.7525602  0.918297  ]
[0.00556046 0.7664915  0.89846253]
[0.00503398 0.7935335  0.87723225]
[0.01769928 0.8289203  0.85344666]
[0.0311717  0.8581392  0.82812977]
[0.06289367 0.88117146 0.80919707]
[0.03628444 0.90012753 0.79566866]
[0.02198802 0.91333586 0.7698434 ]
[0.01872859 0.9266232  0.7266834 ]
[0.0613374  0.94423395 0.58917284]
[0.11718199 0.96064705 0.35294548]

KeyboardInterrupt: 