In [87]:
#=======================================#
# Yes, this notebook is over-commented. #
#=======================================#

In [88]:
# Make notebook span entire screen, horizontally.
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [89]:
import gym
import numpy as np
import tensorflow as tf

In [123]:
#===================#
# Utility functions #
#===================#

def process_rewards(rewards, decay, norm=True):
        discounted = np.zeros_like(rewards)
        running_reward = 0
        
        for idx in reversed(range(len(rewards))):
            running_reward += rewards[idx]
            running_reward *= decay
            discounted[idx] = running_reward
            
        if norm:
            discounted -= np.mean(discounted)
            if np.std(discounted) != 0:
                discounted /= np.std(discounted)

        return discounted.tolist()

In [132]:
class PolicyAgent(object):
    def __init__(self, sess):
        self.num_actions = 4
        self._build()
        self.sess = sess
        
    def _build(self):
        self.actions      = tf.placeholder(tf.int32, (None, 1))
        self.columns      = tf.placeholder(tf.int32, (None, 1))
        self.e_encr       = tf.placeholder(tf.float32)
        self.e_dscr       = tf.placeholder(tf.float32)
        self.l_rate       = tf.placeholder(tf.float32)
        self.observations = tf.placeholder(tf.float32, (None, 8))
        self.target       = tf.placeholder(tf.float32, (None, 1))
        self.training     = tf.placeholder(tf.bool)
        
        with tf.variable_scope('actor-hidden'):
            h1    = tf.layers.dense(self.observations, 256, 
                                    activation=tf.nn.relu, 
                                    kernel_initializer=tf.contrib.layers.xavier_initializer(), 
                                    name='h1')
            
            drop1 = tf.layers.dropout(h1, training=self.training, name='drop1')
            
            h2    = tf.layers.dense(drop1, 128,
                                    activation=tf.nn.relu, 
                                    kernel_initializer=tf.contrib.layers.xavier_initializer(), 
                                    name='h2')
            
            drop2 = tf.layers.dropout(h2, training=self.training, name='drop2')
            
            h3    = tf.layers.dense(drop2, 64,
                                    activation=tf.nn.relu, 
                                    kernel_initializer=tf.contrib.layers.xavier_initializer(), 
                                    name='h3')
            
            drop3 = tf.layers.dropout(h3, training=self.training, name='dropout')
        
            self.out = tf.layers.dense(drop3, self.num_actions,
                                       kernel_initializer=tf.random_normal_initializer(), 
                                       kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=0.1),
                                       name='out')
        
        # Compute probabilities associated with each action.
        self.probabilities = tf.clip_by_value(tf.nn.softmax(self.out), 1e-10, 1.0)
        
        # Compute entropy based on action probabilities.
        self.entropy = -tf.reduce_sum(self.probabilities * tf.log(self.probabilities), 1, name="entropy")
        
        # Compute losses of action probabilities associated with each observation in a single batch.
        indices = tf.concat(values=[self.columns, self.actions], axis=1)
        self.picked_action_prob = tf.gather_nd(self.probabilities, indices)
        self.losses = -tf.log(self.picked_action_prob) * self.target - self.entropy * self.e_encr + tf.losses.get_regularization_loss()
        #self.losses = -tf.log(self.picked_action_prob) * self.target + tf.losses.get_regularization_loss()
        
        # Compute batch loss.
        self.loss = tf.reduce_mean(self.losses)
        
        # Set optimizer.
        self.train_op = tf.train.AdamOptimizer(self.l_rate).minimize(self.loss)
    
    # NOTE: computing `out` from `self.out` is not necessary -- just for debugging
    def choose_action(self, obs, verbose=False):
        # Compute probabilities associated with each action and output layer node values.
        out, probs, ent = self.sess.run([self.out, self.probabilities, self.entropy], feed_dict={
            self.observations: np.array(obs).reshape(-1, 8),
            self.training:     False
        })
        
        if verbose: print(probs, out, ent)
            
        # Choose action based on computed probabilities.
        return np.random.choice(range(probs.shape[1]), p=probs.ravel())
    
    def train(self, act, obs, target, l_rate, e_encr, e_dscr):
        length = np.array(act).reshape(-1, 1).shape[0]
        
        inp = (self.train_op, self.loss, self.probabilities, self.entropy, self.out)
        
        _, *results = self.sess.run(inp, feed_dict={
            self.actions:      np.array(act).reshape(-1, 1),
            self.columns:      np.arange(length).reshape(-1, 1),
            self.e_encr:       e_encr,
            self.e_dscr:       e_dscr,
            self.l_rate:       l_rate,
            self.observations: np.array(obs).reshape(-1, 8),
            self.target:       np.array(target).reshape(-1, 1),
            self.training:     True
        })
        
        # print('-' * 32)
        # print('\n'.join(results))
        # print('-' * 32)

        return results[0]


In [133]:
class Critic(object):
    def __init__(self, sess):
        self.sess = sess
        self._build()
        
    def _build(self):
        self.l_rate       = tf.placeholder(tf.float32)
        self.observations = tf.placeholder(tf.float32, (None, 8))
        self.target       = tf.placeholder(tf.float32, (None, 1))
        self.training     = tf.placeholder(tf.bool)
        
        with tf.variable_scope('critic-hidden'):
            h1    = tf.layers.dense(self.observations, 256,
                                    activation=tf.nn.relu,
                                    kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                    name='h1')
            
            drop1 = tf.layers.dropout(h1, name='drop1', training=self.training)
            
            h2    = tf.layers.dense(drop1, 128,
                                    activation=tf.nn.relu,
                                    kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                    name='h2')
            
            drop2 = tf.layers.dropout(h2, name='drop2', training=self.training)
            
            out   = tf.layers.dense(drop2, 1,
                                    kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                    name='out')
            
        self.value_estimate = tf.squeeze(out) # [[num]] -> num
        self.losses = tf.squared_difference(self.value_estimate, self.target)
        self.loss = tf.reduce_mean(self.losses)
        self.train_op = tf.train.AdamOptimizer(self.l_rate).minimize(self.loss)
        
    def predict(self, obs):
        return sess.run(self.value_estimate, feed_dict={
            self.observations: np.array(obs).reshape(-1, 8),
            self.training:     False
        })
    
    def update(self, obs, target, l_rate=0.01):
        inp = (self.train_op, self.loss)
        
        _, *results = self.sess.run(inp, feed_dict={
            self.l_rate:       l_rate,
            self.observations: np.array(obs).reshape(-1, 8),
            self.target:       np.array(target).reshape(-1, 1),
            self.training:     True
        })
        
        return results[0]
    

In [134]:
class ACHandler(object):
    def __init__(self, actor, critic, env, sess, path='./.model.ckpt'):
        self.actor = actor
        self.critic = critic
        self.env = env
        self.sess = sess
    
        self.saver = tf.train.Saver()
        self.path = path

    def init_vars(self):
        self.sess.run(tf.global_variables_initializer())

    def run(self, train_func, rollout=100, a_rate=0.001, c_rate=0.005, decay=0.99, render=False, e_encr=0.007, e_dscr=0.01, **kwargs):
        assert isinstance(train_func, str) and train_func.startswith('train_'), \
               'invalid train_func name specified'
        getattr(self, train_func)(self.rollout(rollout, render, decay), a_rate, c_rate, e_encr, e_dscr, **kwargs)
        
        # Close the display window
        if render: self.env.close()
            
    def run_constant_training(self, num_episodes, a_rate=0.001, c_rate=0.005, decay=0.99, e_encr=0.007, e_dscr=0.01, render=False, verbose=False):
        """
        Runs training and updates both networks during every time step
        """
        
        for _ in range(num_episodes):
            obs_curr = env.reset()
            done = False

            if verbose:
                rewards = 0
                a_episode_loss = []
                c_episode_loss = []
            
            while not done:

                if render: self.env.render()
                action = self.actor.choose_action(obs_curr)

                # Take action in environment.
                next_obs, reward, done, _ = self.env.step(action)

                next_estimate = self.critic.predict(next_obs)
                td_target = reward + decay * next_estimate
                td_error = td_target - self.critic.predict(obs_curr)
                c_loss = self.critic.update(obs_curr, td_target, c_rate)
                a_loss = self.actor.train(action, obs_curr, td_error, a_rate, e_encr, e_dscr)
                
                if verbose:
                    rewards += reward
                    a_episode_loss.append(a_loss)
                    c_episode_loss.append(c_loss)

                obs_curr = next_obs
                
            if verbose:
                print('Actor Loss: {0:5f}'.format(np.mean(a_episode_loss)), end='; ')
                print('Critic Loss: {0:5f}'.format(np.mean(c_episode_loss)), end='; ')
                print('Reward: {0:5f}'.format(rewards))
                
    def play(self, verbose=False):
        """
        Runs a single instance of the game without training or storing training information
        Always displays the game and closes the window afterward
        """
        obs_curr = self.env.reset()
        done = False
        
        while not done:
            self.env.render()

            # Agent chooses action based on difference frame.
            action = self.actor.choose_action(obs_curr, verbose=verbose)

            # Take action in environment.
            obs_curr, reward, done, _ = self.env.step(action)
            
        env.close()
        
    def train_rsample(self, batch, a_rate, c_rate, e_encr, e_dscr, num_epochs=50, mini_batch_size=100):
        """
        Performs random mini-batch training on both networks from a given
          set of batch information
        """
        for x in range(num_epochs):
            indices = np.random.randint(len(batch['obs']), size=mini_batch_size)
            loss = self.actor.train([batch['act'][i] for i in indices],
                             [batch['obs'][i] for i in indices],
                             [batch['advantage'][i] for i in indices],
                             a_rate,
                             e_encr,
                             e_dscr)
            self.critic.update([batch['obs'][i] for i in indices],
                               [batch['td_target'][i] for i in indices],
                               c_rate)
 
    def train_all(self, batch, a_rate, c_rate, e_encr, e_dscr, verbose=False):
        """
        Trains both networks on all peices of inromation in the batch
        """
        a_loss = self.actor.train(batch['act'],
                         batch['obs'],
                         batch['advantage'],
                         a_rate,
                         e_encr,
                         e_dscr)
        c_loss = self.critic.update(batch['obs'],
                           batch['td_target'],
                           c_rate)
        
        if verbose:
            print('Actor Loss: {}'.format(a_loss), end='; ')
            print('Critic Loss: {}'.format(c_loss), end='; ')
            print('Batch Reward: {}'.format(batch['avg_rew']))
    
    def compute_advantage(self, obs, rewards, decay):
        disc_rewards = process_rewards(rewards, decay, norm=False)

        policy_target = np.zeros_like(disc_rewards)
        value_target = np.zeros_like(disc_rewards)
        running_reward = 0

        for idx in range(len(disc_rewards)):
            estimate = self.critic.predict(obs[idx])
            td_target = disc_rewards[idx]
            td_error = td_target - estimate
            
            policy_target[idx] = td_error
            value_target[idx] = td_target
        
        return policy_target.tolist(), value_target.tolist()    

    def save(self):
        self.saver.save(self.sess, self.path)
        
    def load(self):
        self.saver.restore(self.sess, self.path)
            
    def rollout(self, count, render, decay):
        batch = {'act': [], 'obs': [], 'rew': [], 'advantage':[], 'td_target':[]}
        rewards = 0
        
        for episode in range(count):
            # Stores all the stuff
            history = {'act': [], 'obs': [], 'rew': [], 'advantage':[], 'td_target':[]}
            
            obs_curr = env.reset()
            done = False

            while not done:
                
                if render: self.env.render()
                # Agent chooses action based on difference frame.
                action = self.actor.choose_action(obs_curr, False)
        
                # Take action in environment.
                next_obs, reward, done, _ = self.env.step(action)
                
                history['act'].append(action)
                history['obs'].append(obs_curr)
                history['rew'].append(reward)
                
                rewards += reward
                
                obs_curr = next_obs

            # Process rewards per episode.
            history['advantage'], history['td_target'] = self.compute_advantage(history['obs'] + obs_curr, history['rew'], decay)
            
            # Add episode to batch.
            for key in batch:
                batch[key].extend(history[key])
                
        batch['avg_rew'] = rewards / count
        
        return batch

        

In [135]:
tf.reset_default_graph()
env = gym.make('LunarLander-v2') # RGB observation space

[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m


In [136]:
sess = tf.Session()
actor = PolicyAgent(sess)
critic = Critic(sess)

In [137]:
handler = ACHandler(actor, critic, env, sess, '.models/l1.cpt')

In [138]:
handler.init_vars()

In [None]:
actor.out.eval(session=actor.sess, feed_dict={actor.observations: np.array([0,0,0,0,0,0,0,0]).reshape(-1, 8), actor.training: False})

In [None]:
test = tf.trainable_variables()[6]
actor.sess.run(test)

In [None]:
while(True):
    for _ in range(100):
        handler.run('train_rsample', render=True)
        print('-',end='')
    print('\nCompleted 100 Training Iterations\n')
    handler.save()

In [139]:
while(True):
    for _ in range(100):
        handler.run('train_all', rollout=40, a_rate=0.001, c_rate=0.005, decay=0.99, render=False, verbose=True)
        handler.play()
    print('Completed 100 Training Iterations\n')
    handler.save()

Actor Loss: -192.99720764160156; Critic Loss: 17763.07421875; Batch Reward: -222.14001914435758
Actor Loss: -353.9658508300781; Critic Loss: 44000.82421875; Batch Reward: -365.3684631205898
Actor Loss: -451.3711853027344; Critic Loss: 70146.1015625; Batch Reward: -473.4783936917705
Actor Loss: -499.69598388671875; Critic Loss: 94425.90625; Batch Reward: -536.8569117678132
Actor Loss: -495.0347900390625; Critic Loss: 80561.5390625; Batch Reward: -502.1803172433084
Actor Loss: -543.6956787109375; Critic Loss: 72955.7421875; Batch Reward: -472.1208676535738
Actor Loss: -650.0604248046875; Critic Loss: 72310.515625; Batch Reward: -503.2593137761729
Actor Loss: -680.5064086914062; Critic Loss: 65563.484375; Batch Reward: -500.6445820728396
Actor Loss: -746.093017578125; Critic Loss: 64139.36328125; Batch Reward: -493.93513679631496
Actor Loss: -827.4739990234375; Critic Loss: 66718.75; Batch Reward: -529.0597672005358
Actor Loss: -946.7506103515625; Critic Loss: 67798.3203125; Batch Reward:

KeyboardInterrupt: 

In [45]:
handler.run_constant_training(1000, render=False, decay=0.99, a_rate=0.002, c_rate=0.01, e_encr=0.008, e_dscr=0.5, verbose=True)

Actor Loss: -1.102604; Critic Loss: 92.959969; Reward: -167.420054
Actor Loss: -4.083945; Critic Loss: 65.155907; Reward: -274.562861
Actor Loss: -5.609140; Critic Loss: 154.758850; Reward: -300.354072
Actor Loss: -1.852062; Critic Loss: 115.803299; Reward: -117.323924
Actor Loss: -3.397680; Critic Loss: 98.200096; Reward: -345.271298
Actor Loss: 0.144281; Critic Loss: 53.094658; Reward: -117.255758
Actor Loss: -3.379443; Critic Loss: 96.439491; Reward: -449.202999
Actor Loss: -1.541837; Critic Loss: 70.687477; Reward: -311.992834
Actor Loss: -4.872244; Critic Loss: 451.358154; Reward: -365.169829
Actor Loss: -73.236526; Critic Loss: 1090.031982; Reward: -547.764891
Actor Loss: -66.413559; Critic Loss: 1715.917969; Reward: -599.470707
Actor Loss: -68.745049; Critic Loss: 448.883423; Reward: -381.702373
Actor Loss: -40.764503; Critic Loss: 534.971985; Reward: -345.667928
Actor Loss: -22.065826; Critic Loss: 337.933228; Reward: -643.683849
Actor Loss: -101.293900; Critic Loss: 714.236450

KeyboardInterrupt: 

In [None]:
handler.load()

In [None]:
handler.save()

In [None]:
env.unwrapped.close()

In [140]:
while True: handler.play(verbose=True)

[[3.5235979e-09 1.0000000e-10 1.0000000e+00 2.8055249e-09]] [[-3.465424 -7.706469 15.998359 -3.693317]] [1.2613062e-07]
[[3.3537222e-09 1.0000000e-10 1.0000000e+00 2.6090632e-09]] [[-3.478635  -7.6648073 16.03456   -3.7297156]] [1.1931067e-07]
[[3.156166e-09 1.000000e-10 1.000000e+00 2.409935e-09]] [[-3.4932194 -7.638863  16.080688  -3.7629783]] [1.11903034e-07]
[[3.0153879e-09 1.0000000e-10 1.0000000e+00 2.3155704e-09]] [[-3.5110524 -7.6466517 16.108484  -3.7751257]] [1.07505e-07]
[[2.8899920e-09 1.0000000e-10 1.0000000e+00 2.2116367e-09]] [[-3.5353448 -7.6161404 16.126667  -3.8028655]] [1.03202524e-07]
[[2.6014009e-09 1.0000000e-10 1.0000000e+00 1.9917310e-09]] [[-3.5530565 -7.665306  16.214159  -3.8201034]] [9.3627904e-08]
[[2.2766899e-09 1.0000000e-10 1.0000000e+00 1.7210747e-09]] [[-3.5696766 -7.707111  16.330866  -3.8494518]] [8.2341785e-08]
[[2.070026e-09 1.000000e-10 1.000000e+00 1.568016e-09]] [[-3.5868878 -7.7515416 16.408817  -3.8646371]] [7.5483314e-08]
[[1.8248693e-09 1.00

[[1.e-10 1.e-10 1.e+00 1.e-10]] [[ -6.954695 -13.183489  30.87839   -7.346439]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[ -7.0256567 -13.299026   31.212837   -7.435204 ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[ -7.113294 -13.439497  31.543415  -7.504333]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[ -7.200504  -13.581532   31.876928   -7.5753207]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[ -7.3068666 -13.731194   32.25843    -7.6620374]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[ -7.3904657 -13.882644   32.571888   -7.7305064]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[ -7.476049 -14.02269   32.91665   -7.815146]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[ -7.567901 -14.160492  33.288807  -7.91057 ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[ -7.647845  -14.299104   33.63346    -7.9980674]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[ -7.7185063 -14.43843    33.93993    -8.071051 ]] [6.9077553e-09]
[[1.e-10 1.e-10 

[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-4.595944  -8.582352  19.718756  -4.5806904]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-4.623684  -8.618468  19.866913  -4.6370006]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-4.670689 -8.676723 20.081583 -4.709671]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-4.706766  -8.720913  20.268314  -4.7777634]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-4.74395  -8.77837  20.442799 -4.830565]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-4.8217373 -8.894908  20.724033  -4.8930464]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-4.902955 -9.014181 21.023071 -4.963469]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-4.9755273 -9.121807  21.297537  -5.0317907]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-5.0675435 -9.257581  21.635145  -5.1127434]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-5.17952   -9.423356  22.022928  -5.1984477]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-5.295654  -9.5936

[[1.000000e-10 1.000000e-10 1.000000e+00 2.002491e-10]] [[-4.3261256 -9.923803  18.771437  -3.5600233]] [9.0770245e-09]
[[1.0000000e-10 1.0000000e-10 1.0000000e+00 1.7941963e-10]] [[-4.3613873 -9.96665   18.866222  -3.575072 ]] [8.631579e-09]
[[1.000000e-10 1.000000e-10 1.000000e+00 1.607188e-10]] [[-4.405321  -9.998985  18.957586  -3.5937796]] [8.229598e-09]
[[1.0000000e-10 1.0000000e-10 1.0000000e+00 1.4127684e-10]] [[ -4.4471803 -10.049633   19.068266   -3.6120346]] [7.809371e-09]
[[1.000000e-10 1.000000e-10 1.000000e+00 1.201989e-10]] [[ -4.4857426 -10.121696   19.209606   -3.6322668]] [7.3507382e-09]
[[1.0000000e-10 1.0000000e-10 1.0000000e+00 1.0907412e-10]] [[ -4.515585  -10.168499   19.292921   -3.6460717]] [7.1072206e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[ -4.5543156 -10.219641   19.399212   -3.665866 ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[ -4.594158  -10.275609   19.51487    -3.6873622]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[ -4.636077 -10.392642  19

[[1.5222044e-09 1.0000000e-10 1.0000000e+00 1.0451875e-09]] [[-3.6441758 -7.6945834 16.65893   -4.0201397]] [5.4821566e-08]
[[1.3673088e-09 1.0000000e-10 1.0000000e+00 9.2516744e-10]] [[-3.6576524 -7.721122  16.75277   -4.0482774]] [4.9454385e-08]
[[1.2383627e-09 1.0000000e-10 1.0000000e+00 8.3967483e-10]] [[-3.6854582 -7.734692  16.824018  -4.0739884]] [4.5248285e-08]
[[1.1075867e-09 1.0000000e-10 1.0000000e+00 7.5362916e-10]] [[-3.7186804 -7.744737  16.902403  -4.1037183]] [4.0973045e-08]
[[9.767672e-10 1.000000e-10 1.000000e+00 6.609617e-10]] [[-3.7416048 -7.7768345 17.005167  -4.132158 ]] [3.6538314e-08]
[[8.757505e-10 1.000000e-10 1.000000e+00 5.971154e-10]] [[-3.7692094 -7.8106766 17.086731  -4.152179 ]] [3.3249265e-08]
[[7.580594e-10 1.000000e-10 1.000000e+00 5.217042e-10]] [[-3.8119116 -7.8287168 17.188349  -4.185572 ]] [2.9372892e-08]
[[6.572004e-10 1.000000e-10 1.000000e+00 4.483915e-10]] [[-3.8366954 -7.8657675 17.306337  -4.2190175]] [2.584958e-08]
[[5.803425e-10 1.000000e-

[[1.e-10 1.e-10 1.e+00 1.e-10]] [[ -9.2118225 -17.643776   40.53438   -10.593026 ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[ -9.289897 -17.827929  40.912594 -10.683527]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[ -9.349884 -17.996294  41.270947 -10.769176]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[ -9.42878  -18.187597  41.65005  -10.852162]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[ -9.52543  -18.371168  42.07102  -10.961983]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[ -9.612126 -18.553986  42.479538 -11.064652]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[ -9.727381 -18.748182  42.93342  -11.187228]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[ -9.814521 -18.923325  43.362392 -11.303536]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[ -9.924901 -19.123447  43.813526 -11.420456]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-10.040878 -19.321287  44.28523  -11.548931]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-1

[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-21.69398  -45.352863  98.28629  -23.906872]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-21.8693   -45.77205   99.11328  -24.077038]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-22.053953 -46.213272  99.988556 -24.260595]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-22.229118 -46.654335 100.86005  -24.438951]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-22.429014 -47.1119   101.76794  -24.63144 ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-22.609232 -47.54617  102.62657  -24.807184]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-22.78608  -47.993847 103.506744 -24.982704]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-22.97138  -48.46649  104.42808  -25.165447]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-23.153591 -48.937336 105.345856 -25.34584 ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-23.320698 -49.38681  106.22346  -25.514086]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-23.51

KeyboardInterrupt: 