In [1]:
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import time
import unittest
import math
import gym
import numpy as np

#env = gym.make('CartPole-v0')


env = gym.make('CartPole-v0')
env.reset()


# Qlearner for CartPole
class QLearner():
    def __init__(self, env):
        self.env = env
        self.epsilon_start = 1. # 1-epsilon probability of selecting best action. The epsilon is the greedy
        self.epsilon_end = 0.01 # final epsilon value
        self.epsilon_decay = 500 # steps before epsilon reaches epsilon_end
        self.epsilon_step = 0 #step tracker
        self.alpha = 0.1
        self.gamma = 0.99
        
        # get initial state, divide continuous states into discrete bins
#         self.bins = [np.linspace(env.observation_space.low[i], env.observation_space.high[i], 7) for i in range(4)]
        self.bins = []
        #self.bins.append(np.linspace(-2.4, 2.4, 5))
        #self.bins.append(np.linspace(-0.5, 0.5, 5))
        self.bins.append(np.linspace(-2.4, 2.4, 1))
        self.bins.append(np.linspace(-0.5, 0.5, 1))
        self.bins.append(np.linspace(-41.8, 41.8, 5))
        self.bins.append(np.linspace(-math.radians(50), math.radians(50), 5))
        self.reset()
        
        #initialize Q
#         self.Q = np.zeros((20, 20, 20, 20, 2))
        self.Q = {}
#         print(self.bins)

        self.episode_durations = []
        
    
    def getQ(self, s, action=None):
#         return self.Q[s[0], s[1], s[2], s[3], action]
#         print('getQ:', s, action)
        if not s in self.Q:
            self.Q[s] = [0, 0]

        if action is not None:
            return self.Q[s][action]
        else:
            return self.Q[s]
    
    
    def setQ(self, s, s_plus1, action, r):
#         print("Setting Q: ")
#         print("s: ", s, "s_plus1: ", s_plus1, " action: ", action, " r: ", r)
#         print("s:", s[0], s[1], s[2], s[3], "action: ", action)

        q = self.getQ(s, action)
    
        print "old q is ", q 
        
        
        
        max_q_splus1 = np.max(self.getQ(s_plus1))
        
        print "max q is ", max_q_splus1 
        
        print "R is ", r 
        
#         print("q: ", q, " max_q_splus1: ", max_q_splus1)
        new_q = q + self.alpha * (r + self.gamma * max_q_splus1 - q)
    
        print "new_q is:", new_q

        self.Q[s][action] = new_q
        
    def printQ(self):
        for s in self.Q:
            print(s, self.Q[s])
    
    def get_best_action(self, s):
        return np.argmax(self.getQ(s))
    
    def get_epsilon(self):
        return max(self.epsilon_end, self.epsilon_start + 
                      self.epsilon_step / self.epsilon_decay * (self.epsilon_end - self.epsilon_start))
    
    def get_training_action(self):
        epsilon = self.get_epsilon()

        if np.random.rand() >= epsilon:
            action = self.get_best_action(self.s)
        else:
            action = self.env.action_space.sample()
        
        self.epsilon_step += 1
        return action
    
    def reset(self):
        s = env.reset()
        self.s = tuple([np.asscalar(np.digitize(s[i], self.bins[i])) for i in range(4)])
    
    def step(self, action, training=False, step=0):
        observation, r, done, _ = self.env.step(action)
#         print("s0:", self.s)
#         print("s1:", s_plus1)
#         print("velocity:", s_plus1[3])
        s_plus1 = tuple([np.asscalar(np.digitize(observation[i], self.bins[i])) for i in range(4)])
        
        if training and done and step < 200:
            r = -100
        
        self.setQ(self.s, s_plus1, action, r)
            
        self.s = s_plus1
        
        return r, done
    
    def train(self, nb_episodes=2):
        
        for episode in range(nb_episodes):
            self.reset()
            episode_reward = 0
            
            while True:
                action = self.get_training_action()
                r, done = self.step(action, training=True, step=episode_reward+1)
                                
                episode_reward += r
                
                if done:
                    break;
            self.episode_durations.append(episode_reward + 1)
#             self.plot_durations()            
#             print("Episode %d reward: %d epsilon: %.4f" % (episode, episode_reward, self.get_epsilon()))

    def run(self):
        self.reset()
        self.env.render()
        episode_reward = 0
        while True:
            self.env.render()
            action = self.get_best_action(self.s)
            r, done = self.step(action)
            episode_reward += r
#             time.sleep(0.1)
            if done:
                self.env.close()
                break;
        
        self.env.render(close=True)
        print("Total Reward: ", episode_reward)
        
class TestQlearner(unittest.TestCase):
    def setUp(self):
        env = gym.make('CartPole-v0')
        self.qlearner = QLearner(env)
        self.qlearner.alpha = 0.01
        self.qlearner.gamma = 0.99

    def test_setQ(self):
        self.qlearner.setQ((4, 4, 4, 4), (4, 4, 4, 5), 1, 1)
        print(self.qlearner.Q)
        self.assertEqual(self.qlearner.getQ((4, 4, 4, 4), 1), 0.01)
        
        print(' ')
        self.qlearner.setQ((4, 4, 4, 5), (4, 4, 4, 4), 0, 1)
        print(self.qlearner.Q)
        newQ = self.qlearner.getQ((4, 4, 4, 5), 0)
        self.assertEqual(newQ, 0.010099)
        
        
    def runTest(self):
        self.test_setQ()
        

tester = TestQlearner()
tester.debug()

[2018-02-03 13:43:39,509] Making new env: CartPole-v0
[2018-02-03 13:43:39,539] Making new env: CartPole-v0


old q is  0
max q is  0
R is  1
new_q is: 0.01
{(4, 4, 4, 5): [0, 0], (4, 4, 4, 4): [0, 0.01]}
 
old q is  0
max q is  0.01
R is  1
new_q is: 0.010099
{(4, 4, 4, 5): [0.010099, 0], (4, 4, 4, 4): [0, 0.01]}


In [None]:
env = gym.make('CartPole-v0')
qlearner = QLearner(env)

qlearner.train(nb_episodes=200)
qlearner.printQ()
plt.figure(2)
plt.clf()
plt.title('Training...')
plt.xlabel('Episode')
plt.ylabel('Duration')
plt.plot(qlearner.episode_durations)

[2018-02-03 13:43:44,421] Making new env: CartPole-v0


old q is  0
max q is  0
R is  1.0
new_q is: 0.1
old q is  0
max q is  0.1
R is  1.0
new_q is: 0.1099
old q is  0
max q is  0.1
R is  1.0
new_q is: 0.1099
old q is  0.1
max q is  0.1099
R is  1.0
new_q is: 0.2008801
old q is  0.1099
max q is  0.2008801
R is  1.0
new_q is: 0.2187971299
old q is  0.2187971299
max q is  0
R is  1.0
new_q is: 0.29691741691
old q is  0
max q is  0
R is  1.0
new_q is: 0.1
old q is  0
max q is  0
R is  1.0
new_q is: 0.1
old q is  0.1
max q is  0.1
R is  1.0
new_q is: 0.1999
old q is  0.1999
max q is  0.1999
R is  1.0
new_q is: 0.2997001
old q is  0.2997001
max q is  0.2997001
R is  1.0
new_q is: 0.3994003999
old q is  0
max q is  0.3994003999
R is  1.0
new_q is: 0.13954063959
old q is  0.13954063959
max q is  0.3994003999
R is  1.0
new_q is: 0.265127215221
old q is  0.3994003999
max q is  0.3994003999
R is  -100
new_q is: -9.6009990005
old q is  0
max q is  0
R is  1.0
new_q is: 0.1
old q is  0
max q is  0
R is  1.0
new_q is: 0.1
old q is  0
max q is  0
R is  

new_q is: 1.33142966034
old q is  1.42125390222
max q is  1.60164376903
R is  1.0
new_q is: 1.53769124513
old q is  1.60164376903
max q is  1.60164376903
R is  1.0
new_q is: 1.70004212526
old q is  1.70004212526
max q is  1.61985141754
R is  1.0
new_q is: 1.79040320307
old q is  0.864033801686
max q is  0.334053302355
R is  1.0
new_q is: 0.910701698451
old q is  0.19
max q is  1.14990945008
R is  1.0
new_q is: 0.384841035558
old q is  1.14990945008
max q is  1.14990945008
R is  1.0
new_q is: 1.24875954063
old q is  1.24875954063
max q is  1.19641830557
R is  1.0
new_q is: 1.34232899882
old q is  1.13837654176
max q is  1.19641830557
R is  1.0
new_q is: 1.24298429984
old q is  1.19641830557
max q is  1.24298429984
R is  1.0
new_q is: 1.2998319207
old q is  1.2998319207
max q is  1.34232899882
R is  1.0
new_q is: 1.40273929951
old q is  -3.68801470664
max q is  -17.4609864557
R is  1.0
new_q is: -4.94785089509
old q is  -24.7074696178
max q is  1.34232899882
R is  1.0
new_q is: -22.00383

old q is  3.31650388579
max q is  3.31650388579
R is  1.0
new_q is: 3.41318738191
old q is  2.54217004691
max q is  3.41318738191
R is  1.0
new_q is: 2.72585859303
old q is  2.72585859303
max q is  2.48373159457
R is  1.0
new_q is: 2.79916216159
old q is  2.48373159457
max q is  3.41318738191
R is  1.0
new_q is: 2.67326398592
old q is  3.41318738191
max q is  3.41318738191
R is  1.0
new_q is: 3.50977419453
old q is  3.50977419453
max q is  3.01478916433
R is  1.0
new_q is: 3.55726090234
old q is  3.01478916433
max q is  3.55726090234
R is  1.0
new_q is: 3.16547907723
old q is  3.55726090234
max q is  3.16547907723
R is  1.0
new_q is: 3.61491724075
old q is  3.16547907723
max q is  3.61491724075
R is  1.0
new_q is: 3.30680797634
old q is  3.61491724075
max q is  3.30680797634
R is  1.0
new_q is: 3.68079950634
old q is  3.30680797634
max q is  3.68079950634
R is  1.0
new_q is: 3.44052632983
old q is  3.68079950634
max q is  3.44052632983
R is  1.0
new_q is: 3.75333166236
old q is  2.1239

max q is  5.58771786774
R is  1.0
new_q is: 5.44844191998
old q is  5.58771786774
max q is  5.58771786774
R is  1.0
new_q is: 5.68213014987
old q is  5.68213014987
max q is  5.44844191998
R is  1.0
new_q is: 5.75331288497
old q is  5.44844191998
max q is  5.75331288497
R is  1.0
new_q is: 5.5731757036
old q is  5.75331288497
max q is  5.5731757036
R is  1.0
new_q is: 5.82972599113
old q is  5.5731757036
max q is  5.82972599113
R is  1.0
new_q is: 5.69300100636
old q is  5.82972599113
max q is  5.82972599113
R is  1.0
new_q is: 5.92389626513
old q is  5.92389626513
max q is  5.69300100636
R is  1.0
new_q is: 5.99511373825
old q is  5.69300100636
max q is  5.99511373825
R is  1.0
new_q is: 5.81721716581
old q is  5.99511373825
max q is  5.81721716581
R is  1.0
new_q is: 6.07150686384
old q is  5.81721716581
max q is  6.07150686384
R is  1.0
new_q is: 5.93657462875
old q is  6.07150686384
max q is  6.07150686384
R is  1.0
new_q is: 6.16543535698
old q is  6.16543535698
max q is  5.9365746

new_q is: -11.7227658718
old q is  5.92198424837
max q is  6.31781456312
R is  1.0
new_q is: 6.05524946528
old q is  6.31781456312
max q is  6.05524946528
R is  1.0
new_q is: 6.38550280387
old q is  6.05524946528
max q is  6.38550280387
R is  1.0
new_q is: 6.18188929633
old q is  6.38550280387
max q is  6.18188929633
R is  1.0
new_q is: 6.45895956382
old q is  6.18188929633
max q is  6.45895956382
R is  1.0
new_q is: 6.30313736352
old q is  6.45895956382
max q is  6.30313736352
R is  1.0
new_q is: 6.53707420642
old q is  6.30313736352
max q is  6.53707420642
R is  1.0
new_q is: 6.4199939736
old q is  6.53707420642
max q is  6.4199939736
R is  1.0
new_q is: 6.61894618917
old q is  6.4199939736
max q is  6.61894618917
R is  1.0
new_q is: 6.53327024897
old q is  6.61894618917
max q is  6.53327024897
R is  1.0
new_q is: 6.7038453249
old q is  6.53327024897
max q is  6.7038453249
R is  1.0
new_q is: 6.64362391124
old q is  6.7038453249
max q is  6.64362391124
R is  1.0
new_q is: 6.791179559

old q is  9.99348327461
max q is  9.94871180207
R is  1.0
new_q is: 10.0790574156
old q is  9.94871180207
max q is  10.0790574156
R is  1.0
new_q is: 10.051667306
old q is  10.0790574156
max q is  10.051667306
R is  1.0
new_q is: 10.1662667373
old q is  10.051667306
max q is  10.1662667373
R is  1.0
new_q is: 10.1529609824
old q is  -0.00515217777756
max q is  -2.82823965475
R is  1.0
new_q is: -0.18463268582
old q is  -2.82823965475
max q is  10.1662667373
R is  1.0
new_q is: -1.43895528228
old q is  10.1662667373
max q is  10.1662667373
R is  1.0
new_q is: 10.2561004706
old q is  10.2561004706
max q is  1.36122348186
R is  1.0
new_q is: 9.4652515482
old q is  1.36122348186
max q is  1.44088201835
R is  1.0
new_q is: 1.46774845349
old q is  1.44088201835
max q is  1.46774845349
R is  1.0
new_q is: 1.54210091341
old q is  1.46774845349
max q is  1.46774845349
R is  1.0
new_q is: 1.56628070503
old q is  1.56628070503
max q is  1.54210091341
R is  1.0
new_q is: 1.66232062496
old q is  1.

old q is  11.4857850011
max q is  11.6803371205
R is  1.0
new_q is: 11.5935598759
old q is  11.6803371205
max q is  11.5935598759
R is  1.0
new_q is: 11.7600658361
old q is  11.5935598759
max q is  11.7600658361
R is  1.0
new_q is: 11.6984504061
old q is  11.7600658361
max q is  11.6984504061
R is  1.0
new_q is: 11.8422058427
old q is  11.6984504061
max q is  11.8422058427
R is  1.0
new_q is: 11.8009837439
old q is  11.8422058427
max q is  11.8009837439
R is  1.0
new_q is: 11.9262826491
old q is  11.8009837439
max q is  11.9262826491
R is  1.0
new_q is: 11.9015873518
old q is  11.9262826491
max q is  11.9015873518
R is  1.0
new_q is: 12.011911532
old q is  11.9015873518
max q is  12.011911532
R is  1.0
new_q is: 12.0006078583
old q is  12.011911532
max q is  12.0006078583
R is  1.0
new_q is: 12.0987805568
old q is  12.0006078583
max q is  12.0987805568
R is  1.0
new_q is: 12.0983263476
old q is  12.0987805568
max q is  12.0983263476
R is  1.0
new_q is: 12.1866368095
old q is  12.098326

new_q is: 11.0868561554
old q is  11.9965893224
max q is  11.0868561554
R is  1.0
new_q is: 11.9945291496
old q is  11.0868561554
max q is  11.9945291496
R is  1.0
new_q is: 11.2656289257
old q is  11.9945291496
max q is  11.2656289257
R is  1.0
new_q is: 12.0103734982
old q is  11.2656289257
max q is  12.0103734982
R is  1.0
new_q is: 11.4280930094
old q is  12.0103734982
max q is  11.4280930094
R is  1.0
new_q is: 12.0407173564
old q is  11.4280930094
max q is  14.0913392927
R is  1.0
new_q is: 11.7803262985
old q is  14.0913392927
max q is  12.0407173564
R is  1.0
new_q is: 13.9742363817
old q is  12.0407173564
max q is  13.9742363817
R is  1.0
new_q is: 12.3200950225
old q is  13.9742363817
max q is  12.3200950225
R is  1.0
new_q is: 13.8965021507
old q is  12.3200950225
max q is  13.8965021507
R is  1.0
new_q is: 12.5638392332
old q is  13.8965021507
max q is  12.5638392332
R is  1.0
new_q is: 13.8506720198
old q is  12.5638392332
max q is  13.8506720198
R is  1.0
new_q is: 12.778

max q is  18.5215277638
R is  1.0
new_q is: 18.5209210302
old q is  18.5215277638
max q is  18.5209210302
R is  1.0
new_q is: 18.6029461694
old q is  18.5209210302
max q is  18.6029461694
R is  1.0
new_q is: 18.6105205979
old q is  18.6029461694
max q is  18.6029461694
R is  1.0
new_q is: 18.6843432233
old q is  18.6843432233
max q is  18.6105205979
R is  1.0
new_q is: 18.7583504401
old q is  18.6105205979
max q is  18.7583504401
R is  1.0
new_q is: 18.7065452317
old q is  18.7583504401
max q is  18.7065452317
R is  1.0
new_q is: 18.8344633741
old q is  18.7065452317
max q is  18.8344633741
R is  1.0
new_q is: 18.8005025826
old q is  18.8344633741
max q is  18.8005025826
R is  1.0
new_q is: 18.9122667923
old q is  18.8005025826
max q is  18.9122667923
R is  1.0
new_q is: 18.8927667368
old q is  18.9122667923
max q is  18.8927667368
R is  1.0
new_q is: 18.99142402
old q is  18.8927667368
max q is  18.99142402
R is  1.0
new_q is: 18.9836410411
old q is  18.99142402
max q is  18.983641041

new_q is: 19.1333717525
old q is  19.1102180527
max q is  19.1333717525
R is  1.0
new_q is: 19.1934000509
old q is  19.1333717525
max q is  19.1934000509
R is  1.0
new_q is: 19.2201811823
old q is  19.1934000509
max q is  19.2201811823
R is  1.0
new_q is: 19.2768579829
old q is  19.2201811823
max q is  19.2768579829
R is  1.0
new_q is: 19.3065720044
old q is  19.2768579829
max q is  19.3065720044
R is  1.0
new_q is: 19.360522813
old q is  19.3065720044
max q is  19.360522813
R is  1.0
new_q is: 19.3926065624
old q is  19.360522813
max q is  19.3926065624
R is  1.0
new_q is: 19.4443385814
old q is  19.3926065624
max q is  19.4443385814
R is  1.0
new_q is: 19.4783354257
old q is  19.4443385814
max q is  19.4783354257
R is  1.0
new_q is: 19.5282599304
old q is  19.4783354257
max q is  19.5282599304
R is  1.0
new_q is: 19.5637996163
old q is  19.5282599304
max q is  19.5637996163
R is  1.0
new_q is: 19.6122500994
old q is  19.5637996163
max q is  19.6122500994
R is  1.0
new_q is: 19.649032

max q is  20.1946954277
R is  1.0
new_q is: 20.316148226
old q is  20.1946954277
max q is  20.316148226
R is  1.0
new_q is: 20.2865245593
old q is  20.316148226
max q is  20.2865245593
R is  1.0
new_q is: 20.3928993347
old q is  20.2865245593
max q is  20.3928993347
R is  1.0
new_q is: 20.3767691375
old q is  20.3928993347
max q is  20.3767691375
R is  1.0
new_q is: 20.4709095459
old q is  20.3767691375
max q is  20.4709095459
R is  1.0
new_q is: 20.4657122688
old q is  20.4709095459
max q is  20.4657122688
R is  1.0
new_q is: 20.5499241059
old q is  20.4657122688
max q is  20.5499241059
R is  1.0
new_q is: 20.5535835284
old q is  20.5499241059
max q is  20.5535835284
R is  1.0
new_q is: 20.6297364646
old q is  20.5535835284
max q is  20.6297364646
R is  1.0
new_q is: 20.6405690856
old q is  20.6297364646
max q is  20.6405690856
R is  1.0
new_q is: 20.7101791576
old q is  20.6405690856
max q is  20.7101791576
R is  1.0
new_q is: 20.7268199136
old q is  20.7101791576
max q is  20.726819

max q is  23.178562363
R is  1.0
new_q is: 20.3130592007
old q is  23.178562363
max q is  20.3130592007
R is  1.0
new_q is: 22.9716989876
old q is  20.3130592007
max q is  22.9716989876
R is  1.0
new_q is: 20.6559514804
old q is  22.9716989876
max q is  20.6559514804
R is  1.0
new_q is: 22.8194682854
old q is  20.6559514804
max q is  22.8194682854
R is  1.0
new_q is: 20.9494836926
old q is  22.8194682854
max q is  20.9494836926
R is  1.0
new_q is: 22.7115203424
old q is  20.9494836926
max q is  22.7115203424
R is  1.0
new_q is: 21.2029758373
old q is  22.7115203424
max q is  21.2029758373
R is  1.0
new_q is: 22.6394629161
old q is  21.2029758373
max q is  22.6394629161
R is  1.0
new_q is: 21.4239850822
old q is  22.6394629161
max q is  21.4239850822
R is  1.0
new_q is: 22.5964911476
old q is  21.4239850822
max q is  22.5964911476
R is  1.0
new_q is: 21.6186391976
old q is  22.5964911476
max q is  21.6186391976
R is  1.0
new_q is: 22.5770873134
old q is  21.6186391976
max q is  22.57708

R is  1.0
new_q is: 24.4672063794
old q is  25.0223852821
max q is  24.4672063794
R is  1.0
new_q is: 25.0424001854
old q is  24.4672063794
max q is  25.0424001854
R is  1.0
new_q is: 24.5996833598
old q is  25.0424001854
max q is  24.5996833598
R is  1.0
new_q is: 25.0735288195
old q is  24.5996833598
max q is  25.0735288195
R is  1.0
new_q is: 24.721994377
old q is  25.0735288195
max q is  24.721994377
R is  1.0
new_q is: 25.1136533809
old q is  24.721994377
max q is  25.1136533809
R is  1.0
new_q is: 24.836046624
old q is  25.1136533809
max q is  24.836046624
R is  1.0
new_q is: 25.1610566586
old q is  24.836046624
max q is  25.1610566586
R is  1.0
new_q is: 24.9433865708
old q is  25.1610566586
max q is  24.9433865708
R is  1.0
new_q is: 25.2143462632
old q is  24.9433865708
max q is  24.9433865708
R is  1.0
new_q is: 25.0184431842
old q is  25.0184431842
max q is  25.2143462632
R is  1.0
new_q is: 25.1128191458
old q is  25.2143462632
max q is  25.1128191458
R is  1.0
new_q is: 25

new_q is: 27.0973597149
old q is  27.1750465129
max q is  27.0973597149
R is  1.0
new_q is: 27.2401804734
old q is  27.0973597149
max q is  27.2401804734
R is  1.0
new_q is: 27.1844016103
old q is  27.2401804734
max q is  27.1844016103
R is  1.0
new_q is: 27.3074181855
old q is  27.1844016103
max q is  27.3074181855
R is  1.0
new_q is: 27.2693958496
old q is  27.3074181855
max q is  27.2693958496
R is  1.0
new_q is: 27.376346556
old q is  27.2693958496
max q is  27.376346556
R is  1.0
new_q is: 27.3527145737
old q is  27.376346556
max q is  27.3527145737
R is  1.0
new_q is: 27.4466306432
old q is  27.3527145737
max q is  27.4466306432
R is  1.0
new_q is: 27.43465955
old q is  27.4466306432
max q is  27.43465955
R is  1.0
new_q is: 27.5179988744
old q is  27.43465955
max q is  27.5179988744
R is  1.0
new_q is: 27.5154754836
old q is  27.5179988744
max q is  27.5154754836
R is  1.0
new_q is: 27.5902310598
old q is  27.5154754836
max q is  27.5902310598
R is  1.0
new_q is: 27.5953608101
o

max q is  28.1460371236
R is  1.0
new_q is: 28.6154970512
old q is  28.1460371236
max q is  28.6154970512
R is  1.0
new_q is: 28.2643676193
old q is  28.6154970512
max q is  28.2643676193
R is  1.0
new_q is: 28.6521197404
old q is  28.2643676193
max q is  28.6521197404
R is  1.0
new_q is: 28.3744907116
old q is  28.6521197404
max q is  28.3744907116
R is  1.0
new_q is: 28.6959823468
old q is  28.3744907116
max q is  28.6959823468
R is  1.0
new_q is: 28.4779438928
old q is  28.6959823468
max q is  28.4779438928
R is  1.0
new_q is: 28.7457005575
old q is  28.4779438928
max q is  28.7457005575
R is  1.0
new_q is: 28.5759738587
old q is  28.7457005575
max q is  34.575414543
R is  1.0
new_q is: 29.3940965415
old q is  34.575414543
max q is  29.3940965415
R is  1.0
new_q is: 34.1278886463
old q is  29.3940965415
max q is  34.1278886463
R is  1.0
new_q is: 29.9333478634
old q is  34.1278886463
max q is  35.2143945153
R is  1.0
new_q is: 34.3013248387
old q is  35.2143945153
max q is  34.30132

old q is  29.8322780673
max q is  30.0691785271
R is  1.0
new_q is: 29.9258989347
old q is  30.0691785271
max q is  29.9258989347
R is  1.0
new_q is: 30.1249246689
old q is  29.9258989347
max q is  30.1249246689
R is  1.0
new_q is: 30.0156765835
old q is  30.1249246689
max q is  30.0156765835
R is  1.0
new_q is: 30.1839841838
old q is  30.0156765835
max q is  30.0156765835
R is  1.0
new_q is: 30.0856609069
old q is  30.0856609069
max q is  30.1839841838
R is  1.0
new_q is: 30.1653092504
old q is  30.1839841838
max q is  30.1653092504
R is  1.0
new_q is: 30.2519513812
old q is  30.1653092504
max q is  30.2519513812
R is  1.0
new_q is: 30.2437215121
old q is  30.2519513812
max q is  30.2437215121
R is  1.0
new_q is: 30.3208846728
old q is  30.2437215121
max q is  30.3208846728
R is  1.0
new_q is: 30.3211169435
old q is  30.3208846728
max q is  30.3211169435
R is  1.0
new_q is: 30.3905867829
old q is  30.3211169435
max q is  30.3905867829
R is  1.0
new_q is: 30.3976733407
old q is  30.390

max q is  42.3809750127
R is  1.0
new_q is: 42.4385940377
old q is  42.4385940377
max q is  42.3834617497
R is  1.0
new_q is: 42.4906973472
old q is  42.3834617497
max q is  42.4906973472
R is  1.0
new_q is: 42.4516946121
old q is  42.4906973472
max q is  42.4516946121
R is  1.0
new_q is: 42.5443453791
old q is  42.4516946121
max q is  42.5443453791
R is  1.0
new_q is: 42.5184153434
old q is  42.5443453791
max q is  42.5184153434
R is  1.0
new_q is: 42.5992339601
old q is  42.5992339601
max q is  42.5184153434
R is  1.0
new_q is: 42.6486336831
old q is  42.5184153434
max q is  42.6486336831
R is  1.0
new_q is: 42.5887885437
old q is  42.6486336831
max q is  42.5887885437
R is  1.0
new_q is: 42.7000603806
old q is  42.5887885437
max q is  30.7459780407
R is  1.0
new_q is: 41.4737615154
old q is  30.7459780407
max q is  41.4737615154
R is  1.0
new_q is: 31.8772826266
old q is  41.4737615154
max q is  31.8772826266
R is  1.0
new_q is: 40.5822363439
old q is  31.8772826266
max q is  28.370

new_q is: 41.780223004
old q is  41.8974503029
max q is  41.8974503029
R is  1.0
new_q is: 41.9555528526
old q is  41.9555528526
max q is  41.780223004
R is  1.0
new_q is: 41.9962396448
old q is  41.780223004
max q is  41.9962396448
R is  1.0
new_q is: 41.8598284284
old q is  41.9962396448
max q is  41.8598284284
R is  1.0
new_q is: 42.0407386947
old q is  41.8598284284
max q is  42.0407386947
R is  1.0
new_q is: 41.9358787163
old q is  42.0407386947
max q is  41.9358787163
R is  1.0
new_q is: 42.0883168181
old q is  41.9358787163
max q is  42.0883168181
R is  1.0
new_q is: 42.0090342097
old q is  42.0883168181
max q is  42.0090342097
R is  1.0
new_q is: 42.1383795231
old q is  42.0090342097
max q is  42.1383795231
R is  1.0
new_q is: 42.0798303615
old q is  42.1383795231
max q is  42.0798303615
R is  1.0
new_q is: 42.1904447766
old q is  42.0798303615
max q is  42.1904447766
R is  1.0
new_q is: 42.1487013582
old q is  42.1904447766
max q is  42.1487013582
R is  1.0
new_q is: 42.244121

R is  1.0
new_q is: 45.20670044
old q is  45.1634304462
max q is  45.20670044
R is  1.0
new_q is: 45.2225507451
old q is  45.20670044
max q is  45.20670044
R is  1.0
new_q is: 45.2614937396
old q is  45.2614937396
max q is  45.2225507451
R is  1.0
new_q is: 45.3123768894
old q is  45.2225507451
max q is  45.3123768894
R is  1.0
new_q is: 45.2862209826
old q is  45.3123768894
max q is  45.2862209826
R is  1.0
new_q is: 45.3644750777
old q is  45.2862209826
max q is  45.3644750777
R is  1.0
new_q is: 45.3486819171
old q is  45.3644750777
max q is  45.3486819171
R is  1.0
new_q is: 45.4175470798
old q is  45.3486819171
max q is  45.4175470798
R is  1.0
new_q is: 45.4101508863
old q is  45.4175470798
max q is  45.4101508863
R is  1.0
new_q is: 45.4713973095
old q is  45.4101508863
max q is  45.4713973095
R is  1.0
new_q is: 45.4708041313
old q is  45.4713973095
max q is  45.4708041313
R is  1.0
new_q is: 45.5258671876
old q is  45.4708041313
max q is  45.5258671876
R is  1.0
new_q is: 45.5

R is  1.0
new_q is: 46.5564541908
old q is  42.4671304472
max q is  46.5564541908
R is  1.0
new_q is: 42.9295063673
old q is  46.5564541908
max q is  42.9295063673
R is  1.0
new_q is: 46.2508299021
old q is  42.9295063673
max q is  46.2508299021
R is  1.0
new_q is: 43.3153878909
old q is  46.2508299021
max q is  43.3153878909
R is  1.0
new_q is: 46.013970313
old q is  43.3153878909
max q is  46.013970313
R is  1.0
new_q is: 43.6392321628
old q is  46.013970313
max q is  43.6392321628
R is  1.0
new_q is: 45.8328572659
old q is  43.6392321628
max q is  45.8328572659
R is  1.0
new_q is: 43.9127618158
old q is  45.8328572659
max q is  43.9127618158
R is  1.0
new_q is: 45.696934959
old q is  43.9127618158
max q is  45.696934959
R is  1.0
new_q is: 44.1454821952
old q is  45.696934959
max q is  44.1454821952
R is  1.0
new_q is: 45.5976442005
old q is  0.504779625704
max q is  13.9865851926
R is  1.0
new_q is: 1.93897359721
old q is  13.9865851926
max q is  44.1454821952
R is  1.0
new_q is: 1

R is  1.0
new_q is: 48.9799930124
old q is  49.0010464929
max q is  48.9799930124
R is  1.0
new_q is: 49.0499611519
old q is  48.9799930124
max q is  49.0499611519
R is  1.0
new_q is: 49.0379398652
old q is  49.0499611519
max q is  49.0379398652
R is  1.0
new_q is: 49.0997210833
old q is  49.0379398652
max q is  49.0997210833
R is  1.0
new_q is: 49.095018266
old q is  49.0997210833
max q is  49.095018266
R is  1.0
new_q is: 49.1501557833
old q is  49.095018266
max q is  49.1501557833
R is  1.0
new_q is: 49.1513818619
old q is  49.1501557833
max q is  49.1513818619
R is  1.0
new_q is: 49.2011270093
old q is  49.1513818619
max q is  49.2011270093
R is  1.0
new_q is: 49.2071552496
old q is  49.2011270093
max q is  49.2071552496
R is  1.0
new_q is: 49.2525226781
old q is  49.2071552496
max q is  49.2525226781
R is  1.0
new_q is: 49.2624394698
old q is  49.2525226781
max q is  49.2624394698
R is  1.0
new_q is: 49.3042519178
old q is  49.2624394698
max q is  49.3042519178
R is  1.0
new_q is:

max q is  53.3968150303
R is  1.0
new_q is: 53.3698018747
old q is  53.3968150303
max q is  53.3698018747
R is  1.0
new_q is: 53.4407439129
old q is  53.3698018747
max q is  53.4407439129
R is  1.0
new_q is: 53.4234553346
old q is  53.4407439129
max q is  53.4234553346
R is  1.0
new_q is: 53.4855915997
old q is  53.4234553346
max q is  53.4855915997
R is  1.0
new_q is: 53.4761833695
old q is  53.4855915997
max q is  53.4761833695
R is  1.0
new_q is: 53.5311745933
old q is  53.4761833695
max q is  53.5311745933
R is  1.0
new_q is: 53.5281513173
old q is  53.5311745933
max q is  53.5281513173
R is  1.0
new_q is: 53.5773441144
old q is  53.5281513173
max q is  53.5773441144
R is  1.0
new_q is: 53.5794932529
old q is  53.5773441144
max q is  53.5794932529
R is  1.0
new_q is: 53.623979535
old q is  53.5794932529
max q is  53.623979535
R is  1.0
new_q is: 53.6303179016
old q is  53.623979535
max q is  53.623979535
R is  1.0
new_q is: 53.6703555555
old q is  53.6703555555
max q is  53.6303179

new_q is: 54.9887333486
old q is  53.6767467225
max q is  54.9887333486
R is  1.0
new_q is: 53.8529566518
old q is  54.9887333486
max q is  53.8529566518
R is  1.0
new_q is: 54.9213027223
old q is  53.8529566518
max q is  54.9213027223
R is  1.0
new_q is: 54.0048699561
old q is  54.9213027223
max q is  54.0048699561
R is  1.0
new_q is: 54.8756545757
old q is  54.0048699561
max q is  54.8756545757
R is  1.0
new_q is: 54.1370727635
old q is  54.8756545757
max q is  54.1370727635
R is  1.0
new_q is: 54.8476593217
old q is  54.1370727635
max q is  54.8476593217
R is  1.0
new_q is: 54.25328376
old q is  54.8476593217
max q is  54.25328376
R is  1.0
new_q is: 54.8339684818
old q is  54.25328376
max q is  54.8339684818
R is  1.0
new_q is: 54.3565182637
old q is  54.8339684818
max q is  54.3565182637
R is  1.0
new_q is: 54.8318669417
old q is  54.3565182637
max q is  54.8318669417
R is  1.0
new_q is: 54.4492212646
old q is  54.8318669417
max q is  54.4492212646
R is  1.0
new_q is: 54.839153152

R is  1.0
new_q is: 55.0185154627
old q is  54.6897319228
max q is  55.0185154627
R is  1.0
new_q is: 54.7675917614
old q is  55.0185154627
max q is  54.7675917614
R is  1.0
new_q is: 55.0386555008
old q is  54.7675917614
max q is  55.0386555008
R is  1.0
new_q is: 54.8396594798
old q is  55.0386555008
max q is  54.8396594798
R is  1.0
new_q is: 55.0639162392
old q is  54.8396594798
max q is  55.0639162392
R is  1.0
new_q is: 54.9070212395
old q is  55.0639162392
max q is  54.9070212395
R is  1.0
new_q is: 55.093319718
old q is  54.9070212395
max q is  55.093319718
R is  1.0
new_q is: 54.9705577676
old q is  55.093319718
max q is  54.9705577676
R is  1.0
new_q is: 55.1260729652
old q is  54.9705577676
max q is  55.1260729652
R is  1.0
new_q is: 55.0309832144
old q is  55.1260729652
max q is  55.0309832144
R is  1.0
new_q is: 55.1615330069
old q is  55.0309832144
max q is  55.1615330069
R is  1.0
new_q is: 55.0888766607
old q is  55.1615330069
max q is  55.0888766607
R is  1.0
new_q is:

R is  1.0
new_q is: 58.7843819373
old q is  58.7546901765
max q is  58.7843819373
R is  1.0
new_q is: 58.7988749707
old q is  58.7843819373
max q is  58.7843819373
R is  1.0
new_q is: 58.8255975553
old q is  58.8255975553
max q is  58.7988749707
R is  1.0
new_q is: 58.8641264219
old q is  58.7988749707
max q is  58.8641264219
R is  1.0
new_q is: 58.8465359894
old q is  58.8641264219
max q is  58.8465359894
R is  1.0
new_q is: 58.9035208427
old q is  58.8465359894
max q is  58.9035208427
R is  1.0
new_q is: 58.8933309538
old q is  58.9035208427
max q is  58.8933309538
R is  1.0
new_q is: 58.9436085228
old q is  58.8933309538
max q is  58.9436085228
R is  1.0
new_q is: 58.9394151022
old q is  58.9436085228
max q is  58.9394151022
R is  1.0
new_q is: 58.9842497657
old q is  58.9394151022
max q is  58.9842497657
R is  1.0
new_q is: 58.9849143188
old q is  58.9842497657
max q is  58.9849143188
R is  1.0
new_q is: 59.0253313067
old q is  58.9849143188
max q is  59.0253313067
R is  1.0
new_q 

old q is  58.8422298405
max q is  58.8703930385
R is  1.0
new_q is: 58.8861757673
old q is  58.8703930385
max q is  58.8861757673
R is  1.0
new_q is: 58.9130851356
old q is  58.8861757673
max q is  58.9130851356
R is  1.0
new_q is: 58.929953619
old q is  58.9130851356
max q is  58.9130851356
R is  1.0
new_q is: 58.9541720505
old q is  58.9541720505
max q is  58.929953619
R is  1.0
new_q is: 58.9928202537
old q is  58.929953619
max q is  58.9928202537
R is  1.0
new_q is: 58.9772474622
old q is  58.9928202537
max q is  58.9772474622
R is  1.0
new_q is: 59.0322857271
old q is  58.9772474622
max q is  59.0322857271
R is  1.0
new_q is: 59.023719003
old q is  59.0322857271
max q is  59.023719003
R is  1.0
new_q is: 59.0724053357
old q is  59.023719003
max q is  59.0724053357
R is  1.0
new_q is: 59.0695152309
old q is  59.0724053357
max q is  59.0695152309
R is  1.0
new_q is: 59.11304681
old q is  59.0695152309
max q is  59.11304681
R is  1.0
new_q is: 59.114755342
old q is  59.11304681
max q

max q is  60.0187934423
R is  1.0
new_q is: 60.0123211783
old q is  60.0187934423
max q is  60.0123211783
R is  1.0
new_q is: 60.0581338947
old q is  60.0123211783
max q is  60.0581338947
R is  1.0
new_q is: 60.0568443161
old q is  60.0581338947
max q is  60.0568443161
R is  1.0
new_q is: 60.0979480925
old q is  60.0568443161
max q is  60.0979480925
R is  1.0
new_q is: 60.1008567456
old q is  60.0979480925
max q is  60.1008567456
R is  1.0
new_q is: 60.1381381011
old q is  60.1008567456
max q is  60.1381381011
R is  1.0
new_q is: 60.1444467431
old q is  60.1381381011
max q is  60.1444467431
R is  1.0
new_q is: 60.1786245186
old q is  60.1444467431
max q is  60.1786245186
R is  1.0
new_q is: 60.1876858961
old q is  60.1786245186
max q is  60.1876858961
R is  1.0
new_q is: 60.2193429704
old q is  60.1876858961
max q is  60.2193429704
R is  1.0
new_q is: 60.2306322606
old q is  60.2193429704
max q is  60.2306322606
R is  1.0
new_q is: 60.2602412672
old q is  60.2306322606
max q is  60.260

R is  1.0
new_q is: 63.1774552394
old q is  63.1172667928
max q is  63.1774552394
R is  1.0
new_q is: 63.1601081822
old q is  63.1774552394
max q is  63.1601081822
R is  1.0
new_q is: 63.2125604255
old q is  63.1601081822
max q is  63.2125604255
R is  1.0
new_q is: 63.2021408461
old q is  63.2125604255
max q is  63.2021408461
R is  1.0
new_q is: 63.2483163267
old q is  63.2021408461
max q is  63.2483163267
R is  1.0
new_q is: 63.2435100779
old q is  63.2483163267
max q is  63.2435100779
R is  1.0
new_q is: 63.2845921917
old q is  63.2435100779
max q is  63.2845921917
R is  1.0
new_q is: 63.2843336971
old q is  63.2845921917
max q is  63.2843336971
R is  1.0
new_q is: 63.3212820086
old q is  63.2843336971
max q is  63.3212820086
R is  1.0
new_q is: 63.3247072462
old q is  63.3212820086
max q is  63.3247072462
R is  1.0
new_q is: 63.3582998251
old q is  63.3247072462
max q is  63.3582998251
R is  1.0
new_q is: 63.3647082043
old q is  63.3582998251
max q is  63.3647082043
R is  1.0
new_q 

new_q is: 66.7985434049
old q is  66.7416265283
max q is  66.7985434049
R is  1.0
new_q is: 66.7805196726
old q is  66.7985434049
max q is  66.7805196726
R is  1.0
new_q is: 66.829960512
old q is  66.7805196726
max q is  66.829960512
R is  1.0
new_q is: 66.818633796
old q is  66.829960512
max q is  66.818633796
R is  1.0
new_q is: 66.8620092066
old q is  66.818633796
max q is  66.8620092066
R is  1.0
new_q is: 66.8561093278
old q is  66.8620092066
max q is  66.8561093278
R is  1.0
new_q is: 66.8945631094
old q is  66.8561093278
max q is  66.8945631094
R is  1.0
new_q is: 66.8930601429
old q is  66.8945631094
max q is  66.8945631094
R is  1.0
new_q is: 66.9276685463
old q is  66.9276685463
max q is  66.8930601429
R is  1.0
new_q is: 66.9573146458
old q is  66.8930601429
max q is  66.9573146458
R is  1.0
new_q is: 66.9325282785
old q is  66.9573146458
max q is  66.9325282785
R is  1.0
new_q is: 66.9879034808
old q is  66.9325282785
max q is  66.9879034808
R is  1.0
new_q is: 66.971077895

max q is  65.27938654
R is  1.0
new_q is: 65.3127806805
old q is  65.27938654
max q is  65.3127806805
R is  1.0
new_q is: 65.3174131733
old q is  65.3127806805
max q is  65.3127806805
R is  1.0
new_q is: 65.3474678999
old q is  65.3474678999
max q is  65.3174131733
R is  1.0
new_q is: 65.379145014
old q is  65.3174131733
max q is  65.379145014
R is  1.0
new_q is: 65.3582072124
old q is  65.379145014
max q is  65.3582072124
R is  1.0
new_q is: 65.4116930267
old q is  65.3582072124
max q is  65.4116930267
R is  1.0
new_q is: 65.3981441008
old q is  65.4116930267
max q is  65.3981441008
R is  1.0
new_q is: 65.44493999
old q is  65.3981441008
max q is  65.44493999
R is  1.0
new_q is: 65.4373787497
old q is  65.44493999
max q is  65.4373787497
R is  1.0
new_q is: 65.4787464872
old q is  65.4373787497
max q is  65.4787464872
R is  1.0
new_q is: 65.476036777
old q is  65.4787464872
max q is  65.476036777
R is  1.0
new_q is: 65.5129994794
old q is  65.476036777
max q is  65.5129994794
R is  1.

R is  1.0
new_q is: 53.447363026
old q is  52.6287460403
max q is  53.447363026
R is  1.0
new_q is: 52.7571603758
old q is  53.447363026
max q is  52.7571603758
R is  1.0
new_q is: 53.4255856006
old q is  52.7571603758
max q is  53.4255856006
R is  1.0
new_q is: 52.8705773127
old q is  53.4255856006
max q is  52.8705773127
R is  1.0
new_q is: 53.4172141945
old q is  52.8705773127
max q is  53.4172141945
R is  1.0
new_q is: 52.9718237867
old q is  53.4172141945
max q is  52.9718237867
R is  1.0
new_q is: 53.4197033299
old q is  52.9718237867
max q is  53.4197033299
R is  1.0
new_q is: 53.0631920377
old q is  53.4197033299
max q is  53.0631920377
R is  1.0
new_q is: 53.4309890087
old q is  53.0631920377
max q is  53.4309890087
R is  1.0
new_q is: 53.1465407458
old q is  53.4309890087
max q is  53.1465407458
R is  1.0
new_q is: 53.4493976416
old q is  53.1465407458
max q is  53.4493976416
R is  1.0
new_q is: 53.2233770377
old q is  53.4493976416
max q is  53.2233770377
R is  1.0
new_q is:

R is  1.0
new_q is: 58.6449547205
old q is  58.6449547205
max q is  58.6159723222
R is  1.0
new_q is: 58.6834405083
old q is  58.6159723222
max q is  58.6834405083
R is  1.0
new_q is: 58.6640357003
old q is  58.6834405083
max q is  58.6640357003
R is  1.0
new_q is: 58.7228359918
old q is  50.3349297784
max q is  54.2638986777
R is  1.0
new_q is: 50.7735627696
old q is  54.2638986777
max q is  58.6640357003
R is  1.0
new_q is: 54.7452483443
old q is  58.6640357003
max q is  58.7228359918
R is  1.0
new_q is: 58.7111928935
old q is  58.7228359918
max q is  58.7111928935
R is  1.0
new_q is: 58.7629604891
old q is  58.7111928935
max q is  58.7629604891
R is  1.0
new_q is: 58.7576066925
old q is  58.7629604891
max q is  58.7576066925
R is  1.0
new_q is: 58.8036675027
old q is  58.7576066925
max q is  58.8036675027
R is  1.0
new_q is: 58.8034091061
old q is  58.8036675027
max q is  58.8034091061
R is  1.0
new_q is: 58.844838254
old q is  58.8034091061
max q is  58.844838254
R is  1.0
new_q is

max q is  60.0483484251
R is  1.0
new_q is: 60.0350587507
old q is  60.0483484251
max q is  60.0350587507
R is  1.0
new_q is: 60.0869843989
old q is  60.0350587507
max q is  60.0869843989
R is  1.0
new_q is: 60.0801643311
old q is  60.0869843989
max q is  60.0801643311
R is  1.0
new_q is: 60.1262222278
old q is  60.0801643311
max q is  60.1262222278
R is  1.0
new_q is: 60.1246438986
old q is  60.1262222278
max q is  60.1246438986
R is  1.0
new_q is: 60.165939751
old q is  60.1246438986
max q is  60.165939751
R is  1.0
new_q is: 60.168607544
old q is  60.165939751
max q is  60.168607544
R is  1.0
new_q is: 60.2060379227
old q is  54.6853196617
max q is  47.3179216227
R is  1.0
new_q is: 54.0012619361
old q is  47.3179216227
max q is  54.0012619361
R is  1.0
new_q is: 48.0322543921
old q is  54.0012619361
max q is  48.0322543921
R is  1.0
new_q is: 53.4563289273
old q is  48.0322543921
max q is  48.0322543921
R is  1.0
new_q is: 48.0842221377
old q is  48.0842221377
max q is  53.45632892

R is  1.0
new_q is: 63.2097969474
old q is  63.2196577124
max q is  63.2097969474
R is  1.0
new_q is: 63.255461839
old q is  63.2097969474
max q is  63.255461839
R is  1.0
new_q is: 63.2511079747
old q is  63.255461839
max q is  63.2511079747
R is  1.0
new_q is: 63.2917753446
old q is  63.2511079747
max q is  63.2917753446
R is  1.0
new_q is: 63.2918829363
old q is  63.2917753446
max q is  63.2918829363
R is  1.0
new_q is: 63.3284942208
old q is  63.2918829363
max q is  63.3284942208
R is  1.0
new_q is: 63.3322155705
old q is  63.3284942208
max q is  63.3284942208
R is  1.0
new_q is: 63.3651657266
old q is  63.3651657266
max q is  63.3322155705
R is  1.0
new_q is: 63.3985384954
old q is  63.3322155705
max q is  63.3985384954
R is  1.0
new_q is: 63.3754493245
old q is  63.3985384954
max q is  63.3754493245
R is  1.0
new_q is: 63.432854129
old q is  63.3754493245
max q is  63.432854129
R is  1.0
new_q is: 63.4177569509
old q is  63.432854129
max q is  63.4177569509
R is  1.0
new_q is: 63

max q is  66.7352192979
R is  1.0
new_q is: 66.7943741713
old q is  66.7352192979
max q is  66.7943741713
R is  1.0
new_q is: 66.774340411
old q is  66.7943741713
max q is  66.774340411
R is  1.0
new_q is: 66.8255964549
old q is  66.774340411
max q is  66.8255964549
R is  1.0
new_q is: 66.812640419
old q is  66.8255964549
max q is  66.812640419
R is  1.0
new_q is: 66.8574882109
old q is  66.812640419
max q is  66.8574882109
R is  1.0
new_q is: 66.8502677099
old q is  66.8574882109
max q is  66.8502677099
R is  1.0
new_q is: 66.8899158931
old q is  66.8502677099
max q is  66.8899158931
R is  1.0
new_q is: 66.8873426124
old q is  66.8899158931
max q is  66.8873426124
R is  1.0
new_q is: 66.9227712224
old q is  66.8873426124
max q is  66.9227712224
R is  1.0
new_q is: 66.9239627021
old q is  66.9227712224
max q is  66.9239627021
R is  1.0
new_q is: 66.9559664077
old q is  66.9239627021
max q is  66.9559664077
R is  1.0
new_q is: 66.9602071063
old q is  66.9559664077
max q is  66.955966407

R is  1.0
new_q is: 64.8029623561
old q is  64.8200495864
max q is  64.8200495864
R is  1.0
new_q is: 64.8552295368
old q is  64.8552295368
max q is  64.8029623561
R is  1.0
new_q is: 64.8851998564
old q is  64.8029623561
max q is  64.8851998564
R is  1.0
new_q is: 64.8463009062
old q is  64.8851998564
max q is  64.8463009062
R is  1.0
new_q is: 64.9164636605
old q is  64.8463009062
max q is  64.9164636605
R is  1.0
new_q is: 64.888400718
old q is  64.9164636605
max q is  64.888400718
R is  -100
new_q is: 54.8487689655
old q is  50.9340855178
max q is  52.6839813966
R is  1.0
new_q is: 51.1563911243
old q is  52.6839813966
max q is  51.1563911243
R is  1.0
new_q is: 52.5800659782
old q is  51.1563911243
max q is  52.5800659782
R is  1.0
new_q is: 51.3461785437
old q is  52.5800659782
max q is  51.3461785437
R is  1.0
new_q is: 52.5053310562
old q is  51.3461785437
max q is  52.5053310562
R is  1.0
new_q is: 51.5095884639
old q is  52.5053310562
max q is  51.5095884639
R is  1.0
new_q i

new_q is: 63.1689049427
old q is  63.1637648056
max q is  63.1637648056
R is  1.0
new_q is: 63.2006010408
old q is  63.2006010408
max q is  63.1689049427
R is  1.0
new_q is: 63.2342625261
old q is  63.1689049427
max q is  63.2342625261
R is  1.0
new_q is: 63.2122064385
old q is  63.2342625261
max q is  63.2122064385
R is  1.0
new_q is: 63.2688447109
old q is  63.2122064385
max q is  63.2688447109
R is  1.0
new_q is: 63.254601421
old q is  63.2688447109
max q is  63.254601421
R is  1.0
new_q is: 63.3041657805
old q is  63.254601421
max q is  63.3041657805
R is  1.0
new_q is: 63.2962536912
old q is  63.3041657805
max q is  63.2962536912
R is  1.0
new_q is: 63.3400783179
old q is  63.2962536912
max q is  63.3400783179
R is  1.0
new_q is: 63.3372960756
old q is  63.3400783179
max q is  63.3372960756
R is  1.0
new_q is: 63.3764627976
old q is  63.3372960756
max q is  63.3764627976
R is  1.0
new_q is: 63.377836285
old q is  53.7040494781
max q is  51.7762508408
R is  1.0
new_q is: 53.5594933

new_q is: 64.7480415244
old q is  64.691170778
max q is  64.7480415244
R is  1.0
new_q is: 64.7321098111
old q is  64.7480415244
max q is  64.7321098111
R is  1.0
new_q is: 64.7817162432
old q is  64.7321098111
max q is  64.7817162432
R is  1.0
new_q is: 64.772288738
old q is  64.7817162432
max q is  64.772288738
R is  1.0
new_q is: 64.816001204
old q is  64.772288738
max q is  64.816001204
R is  1.0
new_q is: 64.8118439834
old q is  64.816001204
max q is  64.816001204
R is  1.0
new_q is: 64.8511852028
old q is  64.8511852028
max q is  64.8118439834
R is  1.0
new_q is: 64.8824392369
old q is  64.8118439834
max q is  64.8824392369
R is  1.0
new_q is: 64.8540210695
old q is  64.8824392369
max q is  64.8540210695
R is  1.0
new_q is: 64.9147433991
old q is  64.8540210695
max q is  64.9147433991
R is  1.0
new_q is: 64.8951785591
old q is  64.9147433991
max q is  64.8951785591
R is  1.0
new_q is: 64.9478917365
old q is  64.8951785591
max q is  64.9478917365
R is  1.0
new_q is: 64.9355019851


new_q is: 66.7469938366
old q is  66.766510093
max q is  66.7469938366
R is  1.0
new_q is: 66.7978114735
old q is  66.7469938366
max q is  66.7978114735
R is  1.0
new_q is: 66.7852777888
old q is  66.7978114735
max q is  66.7852777888
R is  1.0
new_q is: 66.8297728272
old q is  66.7852777888
max q is  66.8297728272
R is  1.0
new_q is: 66.8228975198
old q is  66.8297728272
max q is  66.8228975198
R is  1.0
new_q is: 66.862262399
old q is  66.8228975198
max q is  66.862262399
R is  1.0
new_q is: 66.8599717454
old q is  66.862262399
max q is  66.8599717454
R is  1.0
new_q is: 66.8951733619
old q is  66.8599717454
max q is  66.8951733619
R is  1.0
new_q is: 66.8965967337
old q is  66.8951733619
max q is  66.8951733619
R is  1.0
new_q is: 66.9282781885
old q is  66.9282781885
max q is  66.8965967337
R is  1.0
new_q is: 66.9582134463
old q is  66.8965967337
max q is  66.9582134463
R is  1.0
new_q is: 66.9358001915
old q is  66.9582134463
max q is  66.9358001915
R is  1.0
new_q is: 66.9890363

R is  1.0
new_q is: 69.3667085292
old q is  69.3117700687
max q is  69.3667085292
R is  1.0
new_q is: 69.3478972063
old q is  69.3667085292
max q is  69.3478972063
R is  1.0
new_q is: 69.3954794997
old q is  69.3478972063
max q is  69.3954794997
R is  1.0
new_q is: 69.3832599561
old q is  69.3954794997
max q is  69.3832599561
R is  1.0
new_q is: 69.4248742854
old q is  69.3832599561
max q is  69.4248742854
R is  1.0
new_q is: 69.4179965147
old q is  69.4248742854
max q is  69.4248742854
R is  1.0
new_q is: 69.4554494111
old q is  69.4554494111
max q is  69.4179965147
R is  1.0
new_q is: 69.482286125
old q is  69.4179965147
max q is  69.482286125
R is  1.0
new_q is: 69.4549431896
old q is  69.482286125
max q is  69.4549431896
R is  1.0
new_q is: 69.5100968883
old q is  69.4549431896
max q is  69.5100968883
R is  -100
new_q is: 59.3909484626
old q is  55.7801490246
max q is  52.5059570237
R is  1.0
new_q is: 55.5002238675
old q is  52.5059570237
max q is  55.5002238675
R is  1.0
new_q is

new_q is: 67.4269185851
old q is  67.4432272763
max q is  67.4269185851
R is  1.0
new_q is: 67.4741694886
old q is  67.4269185851
max q is  67.4741694886
R is  1.0
new_q is: 67.4641695059
old q is  67.4741694886
max q is  67.4641695059
R is  1.0
new_q is: 67.5057053208
old q is  67.4641695059
max q is  67.5057053208
R is  1.0
new_q is: 67.5008173821
old q is  67.5057053208
max q is  67.5008173821
R is  1.0
new_q is: 67.5377157095
old q is  67.5008173821
max q is  67.5377157095
R is  1.0
new_q is: 67.5369694991
old q is  67.5377157095
max q is  67.5369694991
R is  1.0
new_q is: 67.570104119
old q is  67.5369694991
max q is  67.570104119
R is  1.0
new_q is: 67.572712857
old q is  67.570104119
max q is  67.572712857
R is  1.0
new_q is: 67.6027922799
old q is  67.572712857
max q is  67.6027922799
R is  1.0
new_q is: 67.608118007
old q is  67.6027922799
max q is  67.608118007
R is  1.0
new_q is: 67.6357167346
old q is  67.608118007
max q is  67.6357167346
R is  1.0
new_q is: 67.643242163
ol

old q is  68.019796511
max q is  68.0555810146
R is  1.0
new_q is: 68.0553193803
old q is  68.0555810146
max q is  68.0553193803
R is  1.0
new_q is: 68.0874995318
old q is  68.0553193803
max q is  68.0874995318
R is  1.0
new_q is: 68.090449896
old q is  68.0874995318
max q is  68.090449896
R is  1.0
new_q is: 68.1197041183
old q is  68.090449896
max q is  68.1197041183
R is  1.0
new_q is: 68.1252556141
old q is  68.1197041183
max q is  68.1252556141
R is  1.0
new_q is: 68.1521340123
old q is  68.1252556141
max q is  68.1521340123
R is  1.0
new_q is: 68.1597913199
old q is  68.1521340123
max q is  68.1521340123
R is  1.0
new_q is: 68.1839818783
old q is  68.1839818783
max q is  68.1597913199
R is  1.0
new_q is: 68.2134030311
old q is  68.1597913199
max q is  68.2134030311
R is  1.0
new_q is: 68.196939088
old q is  68.2134030311
max q is  68.196939088
R is  1.0
new_q is: 68.2435596977
old q is  68.196939088
max q is  68.2435596977
R is  1.0
new_q is: 68.2333575893
old q is  68.2435596977

new_q is: 54.998998456
old q is  54.9736192992
max q is  54.998998456
R is  1.0
new_q is: 55.0211582164
old q is  54.998998456
max q is  55.0211582164
R is  1.0
new_q is: 55.0461932738
old q is  55.0211582164
max q is  55.0461932738
R is  1.0
new_q is: 55.0686155289
old q is  55.0461932738
max q is  55.0686155289
R is  1.0
new_q is: 55.0933668838
old q is  55.0686155289
max q is  55.0933668838
R is  1.0
new_q is: 55.1159972975
old q is  55.0933668838
max q is  55.1159972975
R is  1.0
new_q is: 55.1405139279
old q is  55.1159972975
max q is  55.1405139279
R is  1.0
new_q is: 55.1633084466
old q is  55.1405139279
max q is  55.1405139279
R is  1.0
new_q is: 55.1853734139
old q is  55.1853734139
max q is  20.1995026946
R is  1.0
new_q is: 51.7665868393
old q is  20.1995026946
max q is  24.9989928593
R is  1.0
new_q is: 20.7544527182
old q is  24.9989928593
max q is  20.7544527182
R is  1.0
new_q is: 24.6537843924
old q is  20.7544527182
max q is  24.6537843924
R is  1.0
new_q is: 21.219732

max q is  69.1324103372
R is  1.0
new_q is: 69.1788916476
old q is  69.1324103372
max q is  69.1788916476
R is  1.0
new_q is: 69.1678795766
old q is  69.1788916476
max q is  69.1678795766
R is  1.0
new_q is: 69.2086225609
old q is  69.1678795766
max q is  69.2086225609
R is  1.0
new_q is: 69.2027452524
old q is  69.2086225609
max q is  69.2027452524
R is  1.0
new_q is: 69.2388320848
old q is  69.2027452524
max q is  69.2388320848
R is  1.0
new_q is: 69.2371151036
old q is  69.2388320848
max q is  69.2371151036
R is  1.0
new_q is: 69.2694232716
old q is  69.2371151036
max q is  69.2694232716
R is  1.0
new_q is: 69.2710764971
old q is  69.2694232716
max q is  69.2710764971
R is  1.0
new_q is: 69.3003175177
old q is  69.2710764971
max q is  69.3003175177
R is  1.0
new_q is: 69.3047002817
old q is  69.3003175177
max q is  69.3047002817
R is  1.0
new_q is: 69.3314510938
old q is  69.3047002817
max q is  69.3314510938
R is  1.0
new_q is: 69.3380439118
old q is  69.3314510938
max q is  69.331

R is  1.0
new_q is: 71.1120265602
old q is  71.0727862492
max q is  71.1120265602
R is  1.0
new_q is: 71.1055982538
old q is  71.1120265602
max q is  71.1055982538
R is  1.0
new_q is: 71.1402781313
old q is  71.1055982538
max q is  71.1402781313
R is  1.0
new_q is: 71.1379259634
old q is  71.1402781313
max q is  71.1379259634
R is  1.0
new_q is: 71.1689049885
old q is  71.1379259634
max q is  71.1689049885
R is  1.0
new_q is: 71.1698549609
old q is  71.1689049885
max q is  71.1698549609
R is  1.0
new_q is: 71.1978301308
old q is  71.1698549609
max q is  71.1978301308
R is  1.0
new_q is: 71.2014546478
old q is  71.1978301308
max q is  71.1978301308
R is  1.0
new_q is: 71.2266323007
old q is  71.2266323007
max q is  71.2014546478
R is  1.0
new_q is: 71.2529130808
old q is  71.2014546478
max q is  71.2529130808
R is  1.0
new_q is: 71.235347578
old q is  71.2529130808
max q is  71.235347578
R is  1.0
new_q is: 71.2799211829
old q is  71.235347578
max q is  71.2799211829
R is  1.0
new_q is:

R is  1.0
new_q is: 67.3996903975
old q is  67.3823588355
max q is  67.3996903975
R is  1.0
new_q is: 67.4166923013
old q is  67.3996903975
max q is  67.4166923013
R is  1.0
new_q is: 67.4339738956
old q is  67.4166923013
max q is  67.4166923013
R is  1.0
new_q is: 67.449275609
old q is  67.449275609
max q is  67.4339738956
R is  1.0
new_q is: 67.4803114637
old q is  67.4339738956
max q is  67.4803114637
R is  1.0
new_q is: 67.4711273409
old q is  67.4803114637
max q is  67.4711273409
R is  1.0
new_q is: 67.5119219241
old q is  67.4711273409
max q is  67.5119219241
R is  1.0
new_q is: 67.5076948773
old q is  67.5119219241
max q is  67.5076948773
R is  1.0
new_q is: 67.5439915245
old q is  67.5076948773
max q is  67.5439915245
R is  1.0
new_q is: 67.5437805505
old q is  67.5439915245
max q is  67.5437805505
R is  1.0
new_q is: 67.5764266466
old q is  67.5437805505
max q is  67.5764266466
R is  1.0
new_q is: 67.5794687335
old q is  67.5764266466
max q is  67.5794687335
R is  1.0
new_q is

In [7]:
qlearner.run()

old q is  83.80023171
max q is  82.6958560084
R is  1.0
new_q is: 83.7070982838
old q is  82.6958560084
max q is  83.7070982838
R is  1.0
new_q is: 82.8132731377
old q is  83.7070982838
max q is  82.8132731377
R is  1.0
new_q is: 83.6349024961
old q is  82.8132731377
max q is  83.6349024961
R is  1.0
new_q is: 82.911801171
old q is  83.6349024961
max q is  82.911801171
R is  1.0
new_q is: 83.5796805624
old q is  82.911801171
max q is  83.5796805624
R is  1.0
new_q is: 82.9950094296
old q is  83.5796805624
max q is  82.9950094296
R is  1.0
new_q is: 83.5382184397
old q is  82.9950094296
max q is  83.5382184397
R is  1.0
new_q is: 83.0657921122
old q is  83.5382184397
max q is  83.0657921122
R is  1.0
new_q is: 83.5079100148
old q is  83.0657921122
max q is  83.5079100148
R is  1.0
new_q is: 83.1264959924
old q is  83.5079100148
max q is  83.1264959924
R is  1.0
new_q is: 83.4866421166
old q is  83.1264959924
max q is  83.4866421166
R is  1.0
new_q is: 83.1790239627
old q is  83.48664211

In [2]:
learner = QLearner(env)
learner.train()
learner.run()

('Total Reward: ', 14.0)
