In [87]:
#=======================================#
# Yes, this notebook is over-commented. #
#=======================================#

In [88]:
# Make notebook span entire screen, horizontally.
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [21]:
import gym
import numpy as np
import tensorflow as tf

In [41]:
#===================#
# Utility functions #
#===================#

def process_rewards(rewards, decay, norm=True):
        discounted = np.zeros_like(rewards)
        running_reward = 0
        
        for idx in reversed(range(len(rewards))):
            running_reward += rewards[idx]
            running_reward *= decay
            discounted[idx] = running_reward
            
        if norm:
            discounted -= np.mean(discounted)
            if np.std(discounted) != 0:
                discounted /= np.std(discounted)

        return discounted.tolist()

In [42]:
class PolicyAgent(object):
    def __init__(self, sess):
        self.num_actions = 4
        self._build()
        self.sess = sess
        
    def _build(self):
        self.actions      = tf.placeholder(tf.int32, (None, 1))
        self.columns      = tf.placeholder(tf.int32, (None, 1))
        self.e_encr       = tf.placeholder(tf.float32)
        self.l_rate       = tf.placeholder(tf.float32)
        self.observations = tf.placeholder(tf.float32, (None, 8))
        self.target       = tf.placeholder(tf.float32, (None, 1))
        self.training     = tf.placeholder(tf.bool)
        
        with tf.variable_scope('actor-hidden'):
            h1    = tf.layers.dense(self.observations, 256, 
                                    activation=tf.nn.relu, 
                                    kernel_initializer=tf.contrib.layers.xavier_initializer(), 
                                    name='h1')
            
            drop1 = tf.layers.dropout(h1, training=self.training, name='drop1')
            
            h2    = tf.layers.dense(drop1, 128,
                                    activation=tf.nn.relu, 
                                    kernel_initializer=tf.contrib.layers.xavier_initializer(), 
                                    name='h2')
            
            drop2 = tf.layers.dropout(h2, training=self.training, name='drop2')
            
            h3    = tf.layers.dense(drop2, 64,
                                    activation=tf.nn.relu, 
                                    kernel_initializer=tf.contrib.layers.xavier_initializer(), 
                                    name='h3')
            
            drop3 = tf.layers.dropout(h3, training=self.training, name='dropout')
        
            self.out = tf.layers.dense(drop3, self.num_actions,
                                       kernel_initializer=tf.contrib.layers.xavier_initializer(), 
                                       #kernel_regularizer=tf.contrib.layers.l2_regularizer(scale=0.1),
                                       name='out')
        
        # Compute probabilities associated with each action.
        self.probabilities = tf.clip_by_value(tf.nn.softmax(self.out), 1e-10, 1.0)
        
        # Compute entropy based on action probabilities.
        self.entropy = -tf.reduce_sum(self.probabilities * tf.log(self.probabilities), 1, name="entropy")
        
        # Compute losses of action probabilities associated with each observation in a single batch.
        indices = tf.concat(values=[self.columns, self.actions], axis=1)
        self.picked_action_prob = tf.gather_nd(self.probabilities, indices)
        self.losses = -tf.log(self.picked_action_prob) * self.target - self.entropy * self.e_encr + tf.losses.get_regularization_loss()
        #self.losses = -tf.log(self.picked_action_prob) * self.target + tf.losses.get_regularization_loss()
        
        # Compute batch loss.
        self.loss = tf.reduce_mean(self.losses)
        
        # Set optimizer.
        self.train_op = tf.train.AdamOptimizer(self.l_rate).minimize(self.loss)
    
    def choose_action(self, obs, verbose=False):
        debug = (self.out, self.entropy)
        
        # Compute probabilities associated with each action and output layer node values.
        probs, *results = self.sess.run((self.probabilities,) + (debug if verbose else ()), feed_dict={
            self.observations: np.array(obs).reshape(-1, 8),
            self.training:     False
        })
        
        if verbose: print(probs, *results)
            
        # Choose action based on computed probabilities.
        return np.random.choice(range(probs.shape[1]), p=probs.ravel())
    
    def train(self, act, obs, target, l_rate, e_encr, verbose=False):
        length = np.array(act).reshape(-1, 1).shape[0]
        
        # Values to out when debugging (i.e. verbose=True).
        debug = (self.entropy, self.loss, self.probabilities)
        
        # Results only stores debugging info.
        _, *results = self.sess.run((self.train_op,) + (debug if verbose else ()), feed_dict={
            self.actions:      np.array(act).reshape(-1, 1),
            self.columns:      np.arange(length).reshape(-1, 1),
            self.e_encr:       e_encr,
            self.l_rate:       l_rate,
            self.observations: np.array(obs).reshape(-1, 8),
            self.target:       np.array(target).reshape(-1, 1),
            self.training:     True
        })

        return results

In [43]:
class Critic(object):
    def __init__(self, sess):
        self.sess = sess
        self._build()
        
    def _build(self):
        self.l_rate       = tf.placeholder(tf.float32)
        self.observations = tf.placeholder(tf.float32, (None, 8))
        self.target       = tf.placeholder(tf.float32, (None, 1))
        self.training     = tf.placeholder(tf.bool)
        
        with tf.variable_scope('critic-hidden'):
            h1    = tf.layers.dense(self.observations, 256,
                                    activation=tf.nn.relu,
                                    kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                    name='h1')
            
            drop1 = tf.layers.dropout(h1, name='drop1', training=self.training)
            
            h2    = tf.layers.dense(drop1, 128,
                                    activation=tf.nn.relu,
                                    kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                    name='h2')
            
            drop2 = tf.layers.dropout(h2, name='drop2', training=self.training)
            
            out   = tf.layers.dense(drop2, 1,
                                    kernel_initializer=tf.contrib.layers.xavier_initializer(),
                                    name='out')
            
        self.value_estimate = tf.squeeze(out) # [[num]] -> num
        self.losses = tf.squared_difference(self.value_estimate, self.target)
        self.loss = tf.reduce_mean(self.losses)
        self.train_op = tf.train.AdamOptimizer(self.l_rate).minimize(self.loss)
        
    def predict(self, obs):
        return sess.run(self.value_estimate, feed_dict={
            self.observations: np.array(obs).reshape(-1, 8),
            self.training:     False
        })
    
    def update(self, obs, target, l_rate=0.01):
        inp = (self.train_op, self.loss)
        
        _, *results = self.sess.run(inp, feed_dict={
            self.l_rate:       l_rate,
            self.observations: np.array(obs).reshape(-1, 8),
            self.target:       np.array(target).reshape(-1, 1),
            self.training:     True
        })
        
        return results[0]
    

In [50]:
class ACHandler(object):
    def __init__(self, actor, critic, env, sess, path='./.model.ckpt'):
        self.actor = actor
        self.critic = critic
        self.env = env
        self.sess = sess
    
        self.saver = tf.train.Saver()
        self.path = path

    def init_vars(self):
        self.sess.run(tf.global_variables_initializer())

    def run(self, train_func, rollout=100, a_rate=0.001, c_rate=0.005, decay=0.99, render=False, e_encr=0.007, **kwargs):
        assert isinstance(train_func, str) and train_func.startswith('train_'), \
               'invalid train_func name specified'
        getattr(self, train_func)(self.rollout(rollout, render, decay), a_rate, c_rate, e_encr, **kwargs)
        
        # Close the display window
        if render: self.env.close()
            
    def run_constant_training(self, num_episodes, a_rate=0.001, c_rate=0.005, decay=0.99, e_encr=0.007, render=False, verbose=False):
        """
        Runs training and updates both networks during every time step
        """

        for ep in range(num_episodes):
            obs_curr = env.reset()
            done = False

            if verbose:
                rewards = 0
                a_episode_loss = []
                c_episode_loss = []
            
            while not done:

                if render: self.env.render()
                action = self.actor.choose_action(obs_curr)

                # Take action in environment.
                next_obs, reward, done, _ = self.env.step(action)

                next_estimate = self.critic.predict(next_obs)
                td_target = reward + decay * next_estimate
                td_error = td_target - self.critic.predict(obs_curr)
                c_loss = self.critic.update(obs_curr, td_target, c_rate)
                a_loss = self.actor.train(action, obs_curr, td_error, a_rate, e_encr)
                
                if verbose:
                    rewards += reward
                    a_episode_loss.append(a_loss)
                    c_episode_loss.append(c_loss)

                obs_curr = next_obs
                
            if verbose:
                print('Actor Loss: {0:5f}'.format(np.mean(a_episode_loss)), end='; ')
                print('Critic Loss: {0:5f}'.format(np.mean(c_episode_loss)), end='; ')
                print('Reward: {0:5f}'.format(rewards))
                
            #if ep % 10 == 0: print('{}'.format(ep))
                
    def play(self, verbose=False):
        """
        Runs a single instance of the game without training or storing training information
        Always displays the game and closes the window afterward
        """
        obs_curr = self.env.reset()
        done = False
        
        while not done:
            self.env.render()

            # Agent chooses action based on difference frame.
            action = self.actor.choose_action(obs_curr, verbose=verbose)

            # Take action in environment.
            obs_curr, reward, done, _ = self.env.step(action)
            
        env.close()
        
    def train_rsample(self, batch, a_rate, c_rate, e_encr, num_epochs=50, mini_batch_size=100):
        """
        Performs random mini-batch training on both networks from a given
          set of batch information
        """
        for x in range(num_epochs):
            indices = np.random.randint(len(batch['obs']), size=mini_batch_size)
            loss = self.actor.train([batch['act'][i] for i in indices],
                             [batch['obs'][i] for i in indices],
                             [batch['advantage'][i] for i in indices],
                             a_rate,
                             e_encr)
            self.critic.update([batch['obs'][i] for i in indices],
                               [batch['td_target'][i] for i in indices],
                               c_rate)
 
    def train_all(self, batch, a_rate, c_rate, e_encr, verbose=False):
        """
        Trains both networks on all peices of inromation in the batch
        """
        a_loss = self.actor.train(batch['act'],
                         batch['obs'],
                         batch['advantage'],
                         a_rate,
                         e_encr)
        c_loss = self.critic.update(batch['obs'],
                           batch['td_target'],
                           c_rate)
        
        if verbose:
            print('Actor Loss: {}'.format(a_loss), end='; ')
            print('Critic Loss: {}'.format(c_loss), end='; ')
            print('Batch Reward: {}'.format(batch['avg_rew']))
    
    def compute_advantage(self, obs, rewards, decay):
        disc_rewards = process_rewards(rewards, decay, norm=False)

        policy_target = np.zeros_like(disc_rewards)
        value_target = np.zeros_like(disc_rewards)
        running_reward = 0

        for idx in range(len(disc_rewards)):
            estimate = self.critic.predict(obs[idx])
            td_target = disc_rewards[idx]
            td_error = td_target - estimate
            
            policy_target[idx] = td_error
            value_target[idx] = td_target
        
        return policy_target.tolist(), value_target.tolist()    

    def save(self):
        self.saver.save(self.sess, self.path)
        
    def load(self):
        self.saver.restore(self.sess, self.path)
            
    def rollout(self, count, render, decay):
        batch = {'act': [], 'obs': [], 'rew': [], 'advantage':[], 'td_target':[]}
        rewards = 0
        
        for episode in range(count):
            # Stores all the stuff
            history = {'act': [], 'obs': [], 'rew': [], 'advantage':[], 'td_target':[]}
            
            obs_curr = env.reset()
            done = False

            while not done:
                
                if render: self.env.render()
                # Agent chooses action based on difference frame.
                action = self.actor.choose_action(obs_curr, False)
        
                # Take action in environment.
                next_obs, reward, done, _ = self.env.step(action)
                
                history['act'].append(action)
                history['obs'].append(obs_curr)
                history['rew'].append(reward)
                
                rewards += reward
                
                obs_curr = next_obs

            # Process rewards per episode.
            history['advantage'], history['td_target'] = self.compute_advantage(history['obs'] + obs_curr, history['rew'], decay)
            
            # Add episode to batch.
            for key in batch:
                batch[key].extend(history[key])
                
        batch['avg_rew'] = rewards / count
        
        return batch

In [45]:
tf.reset_default_graph()
env = gym.make('LunarLander-v2') # RGB observation space

[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m


In [46]:
sess = tf.Session()
actor = PolicyAgent(sess)
critic = Critic(sess)

In [52]:
handler = ACHandler(actor, critic, env, sess, '.models/l1.cpt')

In [53]:
handler.init_vars()

In [None]:
actor.out.eval(session=actor.sess, feed_dict={actor.observations: np.array([0,0,0,0,0,0,0,0]).reshape(-1, 8), actor.training: False})

In [None]:
test = tf.trainable_variables()[6]
actor.sess.run(test)

In [None]:
while(True):
    for _ in range(100):
        handler.run('train_rsample', render=True)
        print('-',end='')
    print('\nCompleted 100 Training Iterations\n')
    handler.save()

In [139]:
while(True):
    for _ in range(100):
        handler.run('train_all', rollout=40, a_rate=0.001, c_rate=0.005, decay=0.99, render=False, verbose=True)
        handler.play()
    print('Completed 100 Training Iterations\n')
    handler.save()

Actor Loss: -192.99720764160156; Critic Loss: 17763.07421875; Batch Reward: -222.14001914435758
Actor Loss: -353.9658508300781; Critic Loss: 44000.82421875; Batch Reward: -365.3684631205898
Actor Loss: -451.3711853027344; Critic Loss: 70146.1015625; Batch Reward: -473.4783936917705
Actor Loss: -499.69598388671875; Critic Loss: 94425.90625; Batch Reward: -536.8569117678132
Actor Loss: -495.0347900390625; Critic Loss: 80561.5390625; Batch Reward: -502.1803172433084
Actor Loss: -543.6956787109375; Critic Loss: 72955.7421875; Batch Reward: -472.1208676535738
Actor Loss: -650.0604248046875; Critic Loss: 72310.515625; Batch Reward: -503.2593137761729
Actor Loss: -680.5064086914062; Critic Loss: 65563.484375; Batch Reward: -500.6445820728396
Actor Loss: -746.093017578125; Critic Loss: 64139.36328125; Batch Reward: -493.93513679631496
Actor Loss: -827.4739990234375; Critic Loss: 66718.75; Batch Reward: -529.0597672005358
Actor Loss: -946.7506103515625; Critic Loss: 67798.3203125; Batch Reward:

KeyboardInterrupt: 

In [36]:
handler.run_constant_training(200, render=True, decay=0.99, a_rate=0.002, c_rate=0.01, e_encr=0.008, verbose=False)

0
10
20
30
40
50
60
70
80
90
100
110
120
130
140
150
160
170
180
190


In [48]:
handler.load()

INFO:tensorflow:Restoring parameters from .models/l1.cpt


InvalidArgumentError: Assign requires shapes of both tensors to match. lhs shape= [128,1] rhs shape= [64,1]
	 [[Node: save/Assign_45 = Assign[T=DT_FLOAT, _class=["loc:@critic-hidden/out/kernel"], use_locking=true, validate_shape=true, _device="/job:localhost/replica:0/task:0/device:CPU:0"](critic-hidden/out/kernel/Adam_1, save/RestoreV2:45)]]

Caused by op 'save/Assign_45', defined at:
  File "/usr/local/lib/python3.5/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/local/lib/python3.5/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/ian/Projects/github/notsciibot/.env/lib/python3.5/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/ian/Projects/github/notsciibot/.env/lib/python3.5/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/ian/Projects/github/notsciibot/.env/lib/python3.5/site-packages/ipykernel/kernelapp.py", line 486, in start
    self.io_loop.start()
  File "/home/ian/Projects/github/notsciibot/.env/lib/python3.5/site-packages/tornado/platform/asyncio.py", line 112, in start
    self.asyncio_loop.run_forever()
  File "/usr/local/lib/python3.5/asyncio/base_events.py", line 421, in run_forever
    self._run_once()
  File "/usr/local/lib/python3.5/asyncio/base_events.py", line 1425, in _run_once
    handle._run()
  File "/usr/local/lib/python3.5/asyncio/events.py", line 127, in _run
    self._callback(*self._args)
  File "/home/ian/Projects/github/notsciibot/.env/lib/python3.5/site-packages/tornado/ioloop.py", line 760, in _run_callback
    ret = callback()
  File "/home/ian/Projects/github/notsciibot/.env/lib/python3.5/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/ian/Projects/github/notsciibot/.env/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 536, in <lambda>
    self.io_loop.add_callback(lambda : self._handle_events(self.socket, 0))
  File "/home/ian/Projects/github/notsciibot/.env/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/home/ian/Projects/github/notsciibot/.env/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/ian/Projects/github/notsciibot/.env/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/home/ian/Projects/github/notsciibot/.env/lib/python3.5/site-packages/tornado/stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/ian/Projects/github/notsciibot/.env/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/ian/Projects/github/notsciibot/.env/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/ian/Projects/github/notsciibot/.env/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/ian/Projects/github/notsciibot/.env/lib/python3.5/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/ian/Projects/github/notsciibot/.env/lib/python3.5/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/ian/Projects/github/notsciibot/.env/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/ian/Projects/github/notsciibot/.env/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/ian/Projects/github/notsciibot/.env/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2903, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/ian/Projects/github/notsciibot/.env/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-47-3ec99c8de25c>", line 1, in <module>
    handler = ACHandler(actor, critic, env, sess, '.models/l1.cpt')
  File "<ipython-input-44-ddf3c0e04e55>", line 8, in __init__
    self.saver = tf.train.Saver()
  File "/home/ian/Projects/github/notsciibot/.env/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 1311, in __init__
    self.build()
  File "/home/ian/Projects/github/notsciibot/.env/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 1320, in build
    self._build(self._filename, build_save=True, build_restore=True)
  File "/home/ian/Projects/github/notsciibot/.env/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 1357, in _build
    build_save=build_save, build_restore=build_restore)
  File "/home/ian/Projects/github/notsciibot/.env/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 809, in _build_internal
    restore_sequentially, reshape)
  File "/home/ian/Projects/github/notsciibot/.env/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 470, in _AddRestoreOps
    assign_ops.append(saveable.restore(saveable_tensors, shapes))
  File "/home/ian/Projects/github/notsciibot/.env/lib/python3.5/site-packages/tensorflow/python/training/saver.py", line 162, in restore
    self.op.get_shape().is_fully_defined())
  File "/home/ian/Projects/github/notsciibot/.env/lib/python3.5/site-packages/tensorflow/python/ops/state_ops.py", line 281, in assign
    validate_shape=validate_shape)
  File "/home/ian/Projects/github/notsciibot/.env/lib/python3.5/site-packages/tensorflow/python/ops/gen_state_ops.py", line 61, in assign
    use_locking=use_locking, name=name)
  File "/home/ian/Projects/github/notsciibot/.env/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/ian/Projects/github/notsciibot/.env/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 3290, in create_op
    op_def=op_def)
  File "/home/ian/Projects/github/notsciibot/.env/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1654, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

InvalidArgumentError (see above for traceback): Assign requires shapes of both tensors to match. lhs shape= [128,1] rhs shape= [64,1]
	 [[Node: save/Assign_45 = Assign[T=DT_FLOAT, _class=["loc:@critic-hidden/out/kernel"], use_locking=true, validate_shape=true, _device="/job:localhost/replica:0/task:0/device:CPU:0"](critic-hidden/out/kernel/Adam_1, save/RestoreV2:45)]]


In [None]:
handler.save()

In [None]:
env.unwrapped.close()

In [39]:
while True: handler.play(verbose=True)

[[1.e-10 1.e-10 1.e-10 1.e+00]] [[  297.6108  1755.5369 -6798.5303  9113.424 ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e-10 1.e+00]] [[  234.37302  1329.7737  -6256.008    8863.769  ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e-10 1.e+00]] [[  167.72412   912.84436 -5714.8384   8609.787  ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e-10 1.e+00]] [[   94.69423   457.91653 -5123.977    8335.46   ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e-10 1.e+00]] [[   27.899088    71.21211  -4583.2627    8046.518   ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e-10 1.e+00]] [[  -42.170586  -320.42508  -4026.5054    7735.3096  ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e-10 1.e+00]] [[ -110.61337  -695.7963  -3484.7678   7424.968  ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e-10 1.e+00]] [[ -185.30452 -1087.7559  -2915.9753   7103.2407 ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e-10 1.e+00]] [[ -256.94238 -1428.6646  -2386.253    6809.894  ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e-10 1.e+00]] [[ -333.1385 -1819.2556 -1750.0089  6513.786 ]] [6.9077553e-09]


[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-2282.7712  4246.2173  4607.9785 -3670.193 ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-2296.657   4081.8386  4769.955  -3666.3867]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-2239.123   3929.1091  4680.622  -3554.4321]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-2249.125   3704.4268  4868.214  -3535.141 ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-2210.8352  3508.0308  4871.3525 -3443.6511]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-2150.5876  3008.485   5005.804  -3282.685 ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-2105.0835  2498.5977  5199.966  -3142.6218]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-2127.787   2024.4642  5609.215  -3111.9978]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-2106.4731  1528.7599  5874.3613 -3004.4868]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-2040.6537   973.6713  6017.709  -2812.2483]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-1960.

[[1.e-10 1.e-10 1.e-10 1.e+00]] [[ -958.91425 -4502.332    1891.9116   6086.825  ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e-10 1.e+00]] [[-1111.6033 -4806.3965  2643.2236  5708.879 ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e-10 1.e+00]] [[-1318.3029 -5095.764   3433.3215  5335.6016]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e-10 1.e+00]] [[-1545.547  -5422.1616  4295.3267  4954.513 ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-1756.8058 -5715.1465  5120.2275  4562.1504]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-1690.8503 -5664.5947  4850.484   4763.307 ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e-10 1.e+00]] [[-1585.8794 -5524.9517  4460.0396  4938.853 ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-1805.9075 -5808.7246  5306.482   4522.776 ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-1820.193  -5810.134   5366.0615  4466.757 ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-1683.584  -5700.6597  4843.496   4819.6177]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e-10 1.e+00]] [[-1

[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-2095.524   3566.619   4402.5874 -3297.839 ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-2055.9512  3359.3452  4409.284  -3206.5295]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-1955.5432  3022.7258  4294.0796 -3005.3318]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-1831.253   2960.654   3894.861  -2783.1548]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-1758.5935  2712.1082  3805.1047 -2624.9136]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-1734.4541  2356.0356  3958.5308 -2529.6643]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-1632.4597  2061.274   3798.7825 -2310.761 ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-1581.8273  1934.052   3688.928  -2186.7607]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-1480.0979  1710.6565  3456.0496 -1959.8673]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-1340.1056  1503.7592  3066.3264 -1660.9653]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-1275.

[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-3457.1084 -9485.13   10782.343   5397.557 ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-3474.1196 -9262.936  10953.647   4852.891 ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-3457.671  -8985.752  11016.94    4323.3877]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-3455.397  -8768.965  11107.248   3880.4587]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-3407.6255 -8462.497  11037.232   3454.5989]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-3467.9238 -8440.704  11296.882   3177.9514]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-3482.6067 -8285.712  11428.697   2798.9934]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-3554.5544 -8186.224  11778.729   2303.3093]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-3584.9043 -8035.157  11966.287   1901.4911]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-3651.3757 -7998.9697 12259.08    1584.7021]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-3694.

[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-1172.6725 -3288.5037  3566.6018  2286.3157]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e-10 1.e+00]] [[ -924.53784 -2990.7615   2616.856    2773.9832 ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-1166.2611 -3287.6553  3551.3003  2311.9827]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-1174.1428 -3269.1038  3577.818   2267.4287]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-1027.3458 -3033.948   3000.248   2503.7317]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-1063.2404 -2822.4182  3136.0608  2097.2195]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[ -889.8374 -2233.3389  2466.035   1979.9601]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[ -850.7429 -2024.9926  2321.8357  1864.5948]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[ -787.65546 -1642.5214   2050.5554   1650.8827 ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[ -683.00604 -1063.608    1520.9431   1458.6726 ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e-10 1.e+0

[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-1466.7606 -3964.6365  3723.514   3628.6665]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e-10 1.e+00]] [[-1464.2667 -4206.4766  3592.6506  4144.73  ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e-10 1.e+00]] [[-1521.44  -4424.883  3893.336  4148.201]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-1585.9147 -4639.326   4227.5254  4115.8965]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e-10 1.e+00]] [[-1468.6967 -4788.391   3452.4258  5229.324 ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e-10 1.e+00]] [[-1531.0203 -5038.717   3804.5742  5214.0806]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e-10 1.e+00]] [[-1594.1395 -5290.398   4159.1807  5198.291 ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e-10 1.e+00]] [[-1657.8911 -5563.278   4533.85    5189.6577]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e-10 1.e+00]] [[-1758.9019 -5794.553   4999.485   5050.2773]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-1864.443  -6036.8315  5403.318   5013.114 ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e-10 1.e+00]] [[-1861.331 

[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-3724.3455     88.96367  9793.054   -3564.7427 ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-3832.2188   411.6116  9969.884  -3788.9377]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-3943.0764   619.4115 10173.966  -3957.732 ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-4072.8      816.3077 10447.928  -4155.0894]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-4244.9033  1068.0574 10826.336  -4423.593 ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-4361.2607  1225.4897 11078.033  -4591.2573]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-4509.1743  1430.7064 11399.75   -4809.35  ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-4706.0854  1702.9408 11833.004  -5103.4395]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-4881.718   1769.7887 12351.783  -5363.155 ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-5023.1484  1840.5724 12746.511  -5562.989 ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-5

[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-1827.2532 -5752.008   5685.1445  4126.1226]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-1816.7314 -5527.768   5724.857   3736.6516]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-1774.1045 -5363.8887  5609.944   3597.6782]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-1847.3435 -5308.868   5920.9697  3217.5984]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-1848.2289 -5178.225   5943.855   3011.8918]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-1837.7749 -4778.3096  5950.6177  2422.4778]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-1759.5103 -4288.5874  5647.1914  2044.2643]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-1635.4385 -3586.595   5123.643   1638.9441]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-1643.033  -3136.055   5089.6025  1096.2009]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-1541.0388 -2618.4033  4628.134    916.5019]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-1439.

[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-3270.7227 -2591.2715 11400.977  -3324.4297]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-3262.667  -2992.681  11298.42   -2972.9983]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-3318.438  -3388.8657 11444.267  -2760.8484]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-3344.814  -3843.4792 11459.605  -2407.7493]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-3369.754  -4374.9824 11255.571  -1744.2623]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-3409.2988 -4839.374  11023.343  -1055.7306]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-3453.1526 -5168.5576 10919.514   -554.3468]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-3318.5066  -5532.9736  10309.626     467.76624]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-3176.6753 -5882.0254  9540.087   1673.5125]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-3099.2034 -6200.233   8906.394   2771.159 ]] [6.9077553e-09]
[[1.e-10 1.e-10 1.e+00 1.e-10]] [[-3

KeyboardInterrupt: 