In [1]:
import tensorflow as tf
import numpy as np
from selenium import webdriver
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
import matplotlib.pyplot as plt
import time
import os

In [28]:
# TODO: make env parent class
class env2048:  
    def __init__(self):
        self.driver = webdriver.Firefox()
        self.driver.implicitly_wait(3)
        self.driver.get('https://play2048.co')
        self.action_map = {0:'up', 1:'right', 2:'down', 3:'left'}
        
        # setup score memory for finding rewards
        self.score = 0
    
    # for this project, input will never be of magnitude 0 so I don't do a divide by 0 check
    def normalize(self, data):
        norm = np.linalg.norm(data)
        return data/norm
    
    def get_state(self, normalized = True):
        successful = False
        while not successful:
            successful = True
            tiles = self.driver.find_element_by_class_name('tile-container')
            values = [0]*16
            for tile in tiles.find_elements_by_tag_name('div'):
                className = tile.get_attribute('class')
                # lazy method of avoiding the tile-inner class
                if len(className) > 20:
                    # must do this to isolate the string w/ pos information
                    tile_pos = className.split(' ')[2]
                    # use indexes since size of string won't change so this is fast
                    x = int(tile_pos[14]) - 1
                    y = int(tile_pos[16]) - 1
                    index = 4*y + x
                    if tile.text != '':
                        values[index] = int(tile.text)
                    else:
                        # if here that means page hadn't fully updated
                        successful = False
        if normalized:
            return self.normalize(np.array([values]))
        else:
            return np.array([values])
    
    # while there is a score-addition tag that could make this more efficient
    # there were some issues where at times the text wouldn't be read properly
    # therefore I just manually calculate score addition for more consistent data
    def get_reward(self):
        useHighestBlock = False
        self.board_sum = True
        if useHighestBlock:
            return np.max(self.get_state(normalized = False))
        elif self.board_sum:
            return np.sum(self.get_state(normalized = False))
        else:
            r = self.driver.find_element_by_class_name('score-container')
            # do the split in case the score addition text is in the element
            score = int(r.text.split('\n')[0])
            r = score - self.score
            self.score = score
            return r
    
    def get_done(self):
        status = self.driver.find_element_by_class_name('game-message')
        return status.text != ""
    
    def take_action(self, action):
        body = self.driver.find_element_by_tag_name('body')
        if action == 0:
            body.send_keys(u'\ue013')
        elif action == 1:
            body.send_keys(u'\ue014')
        elif action == 2:
            body.send_keys(u'\ue015')
        elif action == 3:
            body.send_keys(u'\ue012')
        
        time.sleep(.1)
        
        return self.get_state(), self.get_reward(), self.get_done()
    
    def __del__(self):
        self.driver.close()

In [21]:
class DqnAgent(tf.keras.Model):
    def __init__(self, input_space_n, action_space_n):
        # setup model
        super(DqnAgent, self).__init__()
        self.INPUT_N = input_space_n
        self.ACTION_N = action_space_n
        self.h1 = tf.keras.layers.Dense(64, input_shape = (input_space_n,))
        self.prediction = tf.keras.layers.Dense(self.ACTION_N,  activation = tf.keras.activations.linear)
        
        # setup loss function
        self.loss = tf.keras.losses.MeanSquaredError()
        self.optimizer = tf.keras.optimizers.SGD(learning_rate=.01)
    
    def call(self, x):
        # forward propogation
        x = self.h1(x)
        return self.prediction(x)
    
    
# experiences is numpy list s.t.
# 0 = state
# 1 = action
# 2 = next_state
# 3 = reward
# 4 = done


def train_step(model, experiences, repeat = 3, debug = False):
    DISCOUNT = .9
    if debug:
        print('experiences shape: ', experiences.shape)
    # get shapes to right shape
    states = np.array([e[0].reshape(16,) for e in experiences])
    
    # get q_vals/rewards setup and correct shape
    rewards = np.zeros((len(states) , 4))
    for i in range(states.shape[0]):
        e = experiences[i]
        if e[3] > 0:
            rewards[i,e[1]] = e[3]
        else:
            rewards[i,e[1]] = 0
        if not e[4] == True:
            rewards[i] += DISCOUNT * model(np.array([states[i]]).reshape(-1, 16))
    
    # normalize the rewards
    rewards = np.array(rewards)
    rewards = (rewards - np.mean(rewards, axis=0))/np.std(rewards, axis=0)
    print('mean of rewards, ', np.mean(rewards))
    
    # get sample
    batch_ratio = .1
    for r in range(repeat):
        if r % 10 == 0:
            print('iteration: ', r)
        batch_size = int(experiences.shape[0]*batch_ratio)
        select = np.random.randint(experiences.shape[0], size = batch_size)
        trainX = np.array(states[select]).reshape(-1, 16,)
        trainY = np.array(rewards[select]).reshape(-1, 4,)
        if debug:
            print('TrainX shape: ', trainX.shape)
            print('TrainY mean', np.mean(trainY))
            print('TrainY shape: ', trainY.shape)
            print(select[:10])
        with tf.GradientTape() as tape:
            predict = model(trainX)
            loss = model.loss(trainY, predict)
            if r % 10 == 0:
                print('Iteration', r, ', the loss is currently', loss.numpy())
        gradients = tape.gradient(loss, model.trainable_variables)
        model.optimizer.apply_gradients(zip(gradients, model.trainable_variables))

model = DqnAgent(16, 4)

In [22]:
e_greedy = False
def get_action(state):
    q_vals = model(state)[0]
    exp = np.exp(q_vals)
    softmax_vals = exp/np.sum(exp)
    #print(softmax_vals)
    EPSILON = .15
    if e_greedy:
        choice = np.random.choice([0,1], p=[EPSILON, 1- EPSILON])
        if choice == 1:
            try:
                return np.argmax(q_vals, axis = 0)
            except:
                print(softmax_vals)
                print(q_vals)
        else:
            return np.random.choice(4, p = [.25]*4)
    else:
        return np.random.choice(4, p= softmax_vals)

In [5]:
memo = []

In [6]:
scores = []

In [30]:
# train model
env = env2048()
num_episodes = 600
for episode in range(num_episodes):
    done = False
    state = env.get_state()
    experiences = []
    while not done:
        action = get_action(state)

        next_state, reward, done = env.take_action(action)

        experience = [state, action, next_state, reward, done]
        experiences.append(experience)
        state = next_state
     
    # cleanup experience
    scores.append(env.score)
    reward = env.get_reward()
    for experience in experiences:
        # punish uselss moves
        if (experience[0] == experience[2]).all():
            experience[3] = 0
        if env.board_sum: # give all experiences in this episode a rewards that is equal to sum of board
            experience[3] = reward
    
    # add experiences to memory
    memo.extend(experiences)
    data = np.array(memo)
    
    # occasionally train model on some of the collected data
    if (1+episode) % 5 == 0:
        train_step(model, data, repeat= 100)
        print(episode, 'th training episode')
    env.driver.find_element_by_class_name('restart-button').click()


mean of rewards,  2.5386270159162802e-15
iteration:  0
Iteration 0 , the loss is currently 1.0663928985595703
iteration:  10
Iteration 10 , the loss is currently 1.0371688604354858
iteration:  20
Iteration 20 , the loss is currently 1.032721757888794
iteration:  30
Iteration 30 , the loss is currently 1.0566681623458862
iteration:  40
Iteration 40 , the loss is currently 1.0347036123275757
iteration:  50
Iteration 50 , the loss is currently 1.035218358039856
iteration:  60
Iteration 60 , the loss is currently 1.0203419923782349
iteration:  70
Iteration 70 , the loss is currently 1.0382144451141357
iteration:  80
Iteration 80 , the loss is currently 1.0065908432006836
iteration:  90
Iteration 90 , the loss is currently 1.017730951309204
4 th training episode
mean of rewards,  -4.458467550436599e-15
iteration:  0
Iteration 0 , the loss is currently 1.0259482860565186
iteration:  10
Iteration 10 , the loss is currently 1.0323914289474487
iteration:  20
Iteration 20 , the loss is currently

iteration:  90
Iteration 90 , the loss is currently 1.019686222076416
54 th training episode
mean of rewards,  2.056034040826524e-15
iteration:  0
Iteration 0 , the loss is currently 1.0004160404205322
iteration:  10
Iteration 10 , the loss is currently 1.0028302669525146
iteration:  20
Iteration 20 , the loss is currently 1.0056594610214233
iteration:  30
Iteration 30 , the loss is currently 1.0061280727386475
iteration:  40
Iteration 40 , the loss is currently 1.0041742324829102
iteration:  50
Iteration 50 , the loss is currently 0.9975989460945129
iteration:  60
Iteration 60 , the loss is currently 1.0265319347381592
iteration:  70
Iteration 70 , the loss is currently 0.9889385104179382
iteration:  80
Iteration 80 , the loss is currently 1.0115809440612793
iteration:  90
Iteration 90 , the loss is currently 1.02306067943573
59 th training episode


W0817 01:08:29.523164 140228307916608 connectionpool.py:662] Retrying (Retry(total=2, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7f89323e6cf8>: Failed to establish a new connection: [Errno 111] Connection refused')': /session/73c3bbb3-59df-4edc-b48c-9fd1d407c9c0/window
W0817 01:08:29.524304 140228307916608 connectionpool.py:662] Retrying (Retry(total=1, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7f893033d9b0>: Failed to establish a new connection: [Errno 111] Connection refused')': /session/73c3bbb3-59df-4edc-b48c-9fd1d407c9c0/window
W0817 01:08:29.525345 140228307916608 connectionpool.py:662] Retrying (Retry(total=0, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7f89281019e8>

W0817 01:08:29.554677 140228307916608 connectionpool.py:662] Retrying (Retry(total=1, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7f8928110c50>: Failed to establish a new connection: [Errno 111] Connection refused')': /session/20a3da34-5837-4d9c-a04a-006b6d0ef6d5/window
W0817 01:08:29.555813 140228307916608 connectionpool.py:662] Retrying (Retry(total=0, connect=None, read=None, redirect=None, status=None)) after connection broken by 'NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7f8928110a58>: Failed to establish a new connection: [Errno 111] Connection refused')': /session/20a3da34-5837-4d9c-a04a-006b6d0ef6d5/window
Exception ignored in: <function env2048.__del__ at 0x7f89284aeea0>
Traceback (most recent call last):
  File "<ipython-input-20-8a21c0e81e54>", line 81, in __del__
  File "/home/brett/anaconda3/lib/python3.7/site-packages/selenium/webdriver/remote/

mean of rewards,  1.7115428196039562e-15
iteration:  0
Iteration 0 , the loss is currently 1.0146466493606567
iteration:  10
Iteration 10 , the loss is currently 0.9840714931488037
iteration:  20
Iteration 20 , the loss is currently 0.9886392951011658
iteration:  30
Iteration 30 , the loss is currently 1.003871202468872
iteration:  40
Iteration 40 , the loss is currently 0.9963464140892029
iteration:  50
Iteration 50 , the loss is currently 1.009663701057434
iteration:  60
Iteration 60 , the loss is currently 0.9877459406852722
iteration:  70
Iteration 70 , the loss is currently 0.9986816644668579
iteration:  80
Iteration 80 , the loss is currently 0.9916666746139526
iteration:  90
Iteration 90 , the loss is currently 1.00174081325531
64 th training episode
mean of rewards,  3.111017559380731e-16
iteration:  0
Iteration 0 , the loss is currently 0.9961923360824585
iteration:  10
Iteration 10 , the loss is currently 0.9882507920265198
iteration:  20
Iteration 20 , the loss is currently 

iteration:  90
Iteration 90 , the loss is currently 0.9874922037124634
114 th training episode
mean of rewards,  1.6151352635817836e-15
iteration:  0
Iteration 0 , the loss is currently 0.9853821992874146
iteration:  10
Iteration 10 , the loss is currently 1.0109453201293945
iteration:  20
Iteration 20 , the loss is currently 0.9852930307388306
iteration:  30
Iteration 30 , the loss is currently 0.9902034401893616
iteration:  40
Iteration 40 , the loss is currently 0.983303427696228
iteration:  50
Iteration 50 , the loss is currently 1.0007520914077759
iteration:  60
Iteration 60 , the loss is currently 0.990890622138977
iteration:  70
Iteration 70 , the loss is currently 0.9988718628883362
iteration:  80
Iteration 80 , the loss is currently 0.999021053314209
iteration:  90
Iteration 90 , the loss is currently 0.9928194284439087
119 th training episode
mean of rewards,  -2.199882733779365e-15
iteration:  0
Iteration 0 , the loss is currently 0.9998800754547119
iteration:  10
Iteration 

StaleElementReferenceException: Message: The element reference of <div class="tile tile-8 tile-position-3-2"> is stale; either the element is no longer attached to the DOM, it is not in the current frame context, or the document has been refreshed


In [17]:
train_step(model, data, repeat= 300)
print('done training')

mean of rewards,  -3.917610456673311e-16
iteration:  0
Iteration 0 , the loss is currently 0.97614586353302
iteration:  10
Iteration 10 , the loss is currently 0.9879736304283142
iteration:  20
Iteration 20 , the loss is currently 0.9930416345596313
iteration:  30
Iteration 30 , the loss is currently 0.9863708019256592
iteration:  40
Iteration 40 , the loss is currently 0.977749228477478
iteration:  50
Iteration 50 , the loss is currently 0.9935055375099182
iteration:  60
Iteration 60 , the loss is currently 0.982075572013855
iteration:  70
Iteration 70 , the loss is currently 0.9909610748291016
iteration:  80
Iteration 80 , the loss is currently 0.9928528070449829
iteration:  90
Iteration 90 , the loss is currently 0.9759883880615234
iteration:  100
Iteration 100 , the loss is currently 1.0141304731369019
iteration:  110
Iteration 110 , the loss is currently 0.9921926856040955
iteration:  120
Iteration 120 , the loss is currently 0.9871148467063904
iteration:  130
Iteration 130 , the 

iteration:  1140
Iteration 1140 , the loss is currently 0.9995238780975342
iteration:  1150
Iteration 1150 , the loss is currently 1.0040082931518555
iteration:  1160
Iteration 1160 , the loss is currently 0.9946731328964233
iteration:  1170
Iteration 1170 , the loss is currently 0.9765210747718811
iteration:  1180
Iteration 1180 , the loss is currently 0.9822522401809692
iteration:  1190
Iteration 1190 , the loss is currently 0.9834169745445251
iteration:  1200
Iteration 1200 , the loss is currently 1.0011552572250366
iteration:  1210
Iteration 1210 , the loss is currently 0.9737172722816467
iteration:  1220
Iteration 1220 , the loss is currently 0.9755123853683472
iteration:  1230
Iteration 1230 , the loss is currently 0.98545902967453
iteration:  1240
Iteration 1240 , the loss is currently 0.9850686192512512
iteration:  1250
Iteration 1250 , the loss is currently 0.9848220944404602
iteration:  1260
Iteration 1260 , the loss is currently 1.0045324563980103
iteration:  1270
Iteration 

iteration:  2250
Iteration 2250 , the loss is currently 1.0005340576171875
iteration:  2260
Iteration 2260 , the loss is currently 0.9980844855308533
iteration:  2270
Iteration 2270 , the loss is currently 1.0030970573425293
iteration:  2280
Iteration 2280 , the loss is currently 0.9952865839004517
iteration:  2290
Iteration 2290 , the loss is currently 0.9883131384849548
iteration:  2300
Iteration 2300 , the loss is currently 0.9885216355323792
iteration:  2310
Iteration 2310 , the loss is currently 0.9821539521217346
iteration:  2320
Iteration 2320 , the loss is currently 0.9952273368835449
iteration:  2330
Iteration 2330 , the loss is currently 1.0068597793579102
iteration:  2340
Iteration 2340 , the loss is currently 1.0064204931259155
iteration:  2350
Iteration 2350 , the loss is currently 0.986929714679718
iteration:  2360
Iteration 2360 , the loss is currently 0.9917290210723877
iteration:  2370
Iteration 2370 , the loss is currently 0.9792357087135315
iteration:  2380
Iteration

iteration:  3360
Iteration 3360 , the loss is currently 0.9928527474403381
iteration:  3370
Iteration 3370 , the loss is currently 0.9926616549491882
iteration:  3380
Iteration 3380 , the loss is currently 0.9823941588401794
iteration:  3390
Iteration 3390 , the loss is currently 0.9912351965904236
iteration:  3400
Iteration 3400 , the loss is currently 1.0117264986038208
iteration:  3410
Iteration 3410 , the loss is currently 0.9964115023612976
iteration:  3420
Iteration 3420 , the loss is currently 0.9906454682350159
iteration:  3430
Iteration 3430 , the loss is currently 1.0032330751419067
iteration:  3440
Iteration 3440 , the loss is currently 0.9999409317970276
iteration:  3450
Iteration 3450 , the loss is currently 0.993975043296814
iteration:  3460
Iteration 3460 , the loss is currently 0.9714041352272034
iteration:  3470
Iteration 3470 , the loss is currently 0.9740735292434692
iteration:  3480
Iteration 3480 , the loss is currently 1.0092452764511108
iteration:  3490
Iteration

iteration:  4470
Iteration 4470 , the loss is currently 1.0195415019989014
iteration:  4480
Iteration 4480 , the loss is currently 0.9794303178787231
iteration:  4490
Iteration 4490 , the loss is currently 0.9861177206039429
iteration:  4500
Iteration 4500 , the loss is currently 0.9904816746711731
iteration:  4510
Iteration 4510 , the loss is currently 0.990121066570282
iteration:  4520
Iteration 4520 , the loss is currently 0.9908421635627747
iteration:  4530
Iteration 4530 , the loss is currently 0.9721967577934265
iteration:  4540
Iteration 4540 , the loss is currently 1.0068144798278809
iteration:  4550
Iteration 4550 , the loss is currently 1.0056573152542114
iteration:  4560
Iteration 4560 , the loss is currently 0.992931067943573
iteration:  4570
Iteration 4570 , the loss is currently 0.9811499118804932
iteration:  4580
Iteration 4580 , the loss is currently 0.9841657280921936
iteration:  4590
Iteration 4590 , the loss is currently 1.0058313608169556
iteration:  4600
Iteration 

iteration:  5580
Iteration 5580 , the loss is currently 0.9906922578811646
iteration:  5590
Iteration 5590 , the loss is currently 1.0025815963745117
iteration:  5600
Iteration 5600 , the loss is currently 0.9916584491729736
iteration:  5610
Iteration 5610 , the loss is currently 0.9889116287231445
iteration:  5620
Iteration 5620 , the loss is currently 0.982477605342865
iteration:  5630
Iteration 5630 , the loss is currently 0.9914987087249756
iteration:  5640
Iteration 5640 , the loss is currently 0.9838247895240784
iteration:  5650
Iteration 5650 , the loss is currently 0.9825847744941711
iteration:  5660
Iteration 5660 , the loss is currently 0.9846867322921753
iteration:  5670
Iteration 5670 , the loss is currently 0.9918268918991089
iteration:  5680
Iteration 5680 , the loss is currently 0.985223650932312
iteration:  5690
Iteration 5690 , the loss is currently 0.9902732968330383
iteration:  5700
Iteration 5700 , the loss is currently 0.9735593199729919
iteration:  5710
Iteration 

iteration:  6690
Iteration 6690 , the loss is currently 1.0074599981307983
iteration:  6700
Iteration 6700 , the loss is currently 1.0046104192733765
iteration:  6710
Iteration 6710 , the loss is currently 0.9921020269393921
iteration:  6720
Iteration 6720 , the loss is currently 0.9868764281272888
iteration:  6730
Iteration 6730 , the loss is currently 0.9858216047286987
iteration:  6740
Iteration 6740 , the loss is currently 0.9833104014396667
iteration:  6750
Iteration 6750 , the loss is currently 0.9979032874107361
iteration:  6760
Iteration 6760 , the loss is currently 1.0127909183502197
iteration:  6770
Iteration 6770 , the loss is currently 1.0001381635665894
iteration:  6780
Iteration 6780 , the loss is currently 0.9947106838226318
iteration:  6790
Iteration 6790 , the loss is currently 0.9831426739692688
iteration:  6800
Iteration 6800 , the loss is currently 0.9838276505470276
iteration:  6810
Iteration 6810 , the loss is currently 1.0028942823410034
iteration:  6820
Iteratio

iteration:  7800
Iteration 7800 , the loss is currently 0.9724952578544617
iteration:  7810
Iteration 7810 , the loss is currently 0.9944964051246643
iteration:  7820
Iteration 7820 , the loss is currently 0.9892306327819824
iteration:  7830
Iteration 7830 , the loss is currently 0.9857139587402344
iteration:  7840
Iteration 7840 , the loss is currently 1.0018149614334106
iteration:  7850
Iteration 7850 , the loss is currently 0.997289776802063
iteration:  7860
Iteration 7860 , the loss is currently 0.994086503982544
iteration:  7870
Iteration 7870 , the loss is currently 1.0099509954452515
iteration:  7880
Iteration 7880 , the loss is currently 0.9739608764648438
iteration:  7890
Iteration 7890 , the loss is currently 0.9926446676254272
iteration:  7900
Iteration 7900 , the loss is currently 0.9687709212303162
iteration:  7910
Iteration 7910 , the loss is currently 0.9847828149795532
iteration:  7920
Iteration 7920 , the loss is currently 0.9779659509658813
iteration:  7930
Iteration 

iteration:  8910
Iteration 8910 , the loss is currently 0.997951090335846
iteration:  8920
Iteration 8920 , the loss is currently 1.0111066102981567
iteration:  8930
Iteration 8930 , the loss is currently 0.9950307011604309
iteration:  8940
Iteration 8940 , the loss is currently 0.9838929176330566
iteration:  8950
Iteration 8950 , the loss is currently 0.9857507944107056
iteration:  8960
Iteration 8960 , the loss is currently 1.0230776071548462
iteration:  8970
Iteration 8970 , the loss is currently 0.986275851726532
iteration:  8980
Iteration 8980 , the loss is currently 0.9959891438484192
iteration:  8990
Iteration 8990 , the loss is currently 0.98146653175354
iteration:  9000
Iteration 9000 , the loss is currently 1.0016330480575562
iteration:  9010
Iteration 9010 , the loss is currently 0.985580563545227
iteration:  9020
Iteration 9020 , the loss is currently 0.9920894503593445
iteration:  9030
Iteration 9030 , the loss is currently 0.9892982840538025
iteration:  9040
Iteration 904

iteration:  10020
Iteration 10020 , the loss is currently 0.9777771234512329
iteration:  10030
Iteration 10030 , the loss is currently 0.9861372709274292
iteration:  10040
Iteration 10040 , the loss is currently 0.9717156887054443
iteration:  10050
Iteration 10050 , the loss is currently 1.0182220935821533
iteration:  10060
Iteration 10060 , the loss is currently 0.998673677444458
iteration:  10070
Iteration 10070 , the loss is currently 1.0169265270233154
iteration:  10080
Iteration 10080 , the loss is currently 0.9837365746498108
iteration:  10090
Iteration 10090 , the loss is currently 0.9962963461875916
iteration:  10100
Iteration 10100 , the loss is currently 0.9900927543640137
iteration:  10110
Iteration 10110 , the loss is currently 0.9956473708152771
iteration:  10120
Iteration 10120 , the loss is currently 0.9796669483184814
iteration:  10130
Iteration 10130 , the loss is currently 0.9772153496742249
iteration:  10140
Iteration 10140 , the loss is currently 0.9668422341346741


iteration:  11100
Iteration 11100 , the loss is currently 0.9866983890533447
iteration:  11110
Iteration 11110 , the loss is currently 0.9720643758773804
iteration:  11120
Iteration 11120 , the loss is currently 0.9827709197998047
iteration:  11130
Iteration 11130 , the loss is currently 0.9870912432670593
iteration:  11140
Iteration 11140 , the loss is currently 0.9874051213264465
iteration:  11150
Iteration 11150 , the loss is currently 0.9844852685928345
iteration:  11160
Iteration 11160 , the loss is currently 0.9901267886161804
iteration:  11170
Iteration 11170 , the loss is currently 0.9731242656707764
iteration:  11180
Iteration 11180 , the loss is currently 0.9964451193809509
iteration:  11190
Iteration 11190 , the loss is currently 0.9895825982093811
iteration:  11200
Iteration 11200 , the loss is currently 1.0048234462738037
iteration:  11210
Iteration 11210 , the loss is currently 0.9921010136604309
iteration:  11220
Iteration 11220 , the loss is currently 0.9872345924377441

iteration:  12180
Iteration 12180 , the loss is currently 0.9854208827018738
iteration:  12190
Iteration 12190 , the loss is currently 0.9881625771522522
iteration:  12200
Iteration 12200 , the loss is currently 1.0098131895065308
iteration:  12210
Iteration 12210 , the loss is currently 0.9974445700645447
iteration:  12220
Iteration 12220 , the loss is currently 1.0076849460601807
iteration:  12230
Iteration 12230 , the loss is currently 0.9905790686607361
iteration:  12240
Iteration 12240 , the loss is currently 0.9801363945007324
iteration:  12250
Iteration 12250 , the loss is currently 0.9818775057792664
iteration:  12260
Iteration 12260 , the loss is currently 0.9927117824554443
iteration:  12270
Iteration 12270 , the loss is currently 0.9812253713607788
iteration:  12280
Iteration 12280 , the loss is currently 0.988093376159668
iteration:  12290
Iteration 12290 , the loss is currently 0.9739258885383606
iteration:  12300
Iteration 12300 , the loss is currently 0.9817134141921997


iteration:  13260
Iteration 13260 , the loss is currently 0.9883837699890137
iteration:  13270
Iteration 13270 , the loss is currently 1.004470944404602
iteration:  13280
Iteration 13280 , the loss is currently 0.9823307991027832
iteration:  13290
Iteration 13290 , the loss is currently 0.9938676953315735
iteration:  13300
Iteration 13300 , the loss is currently 0.9961507320404053
iteration:  13310
Iteration 13310 , the loss is currently 1.0095739364624023
iteration:  13320
Iteration 13320 , the loss is currently 0.980341374874115
iteration:  13330
Iteration 13330 , the loss is currently 0.9846641421318054
iteration:  13340
Iteration 13340 , the loss is currently 0.9807814955711365
iteration:  13350
Iteration 13350 , the loss is currently 0.9867941737174988
iteration:  13360
Iteration 13360 , the loss is currently 0.9836260676383972
iteration:  13370
Iteration 13370 , the loss is currently 0.9863780736923218
iteration:  13380
Iteration 13380 , the loss is currently 1.0041435956954956
i

iteration:  14340
Iteration 14340 , the loss is currently 1.0035208463668823
iteration:  14350
Iteration 14350 , the loss is currently 0.9758137464523315
iteration:  14360
Iteration 14360 , the loss is currently 0.9985358715057373
iteration:  14370
Iteration 14370 , the loss is currently 0.9768197536468506
iteration:  14380
Iteration 14380 , the loss is currently 0.9839121103286743
iteration:  14390
Iteration 14390 , the loss is currently 0.9799050092697144
iteration:  14400
Iteration 14400 , the loss is currently 0.9947698712348938
iteration:  14410
Iteration 14410 , the loss is currently 0.9813967347145081
iteration:  14420
Iteration 14420 , the loss is currently 0.9595072269439697
iteration:  14430
Iteration 14430 , the loss is currently 0.990152895450592
iteration:  14440
Iteration 14440 , the loss is currently 0.9832794070243835
iteration:  14450
Iteration 14450 , the loss is currently 0.9885333776473999
iteration:  14460
Iteration 14460 , the loss is currently 0.993457555770874
i

iteration:  15420
Iteration 15420 , the loss is currently 0.9711156487464905
iteration:  15430
Iteration 15430 , the loss is currently 0.9842302799224854
iteration:  15440
Iteration 15440 , the loss is currently 1.0150657892227173
iteration:  15450
Iteration 15450 , the loss is currently 0.987668514251709
iteration:  15460
Iteration 15460 , the loss is currently 0.9742649793624878
iteration:  15470
Iteration 15470 , the loss is currently 0.999855637550354
iteration:  15480
Iteration 15480 , the loss is currently 0.9847425222396851
iteration:  15490
Iteration 15490 , the loss is currently 0.976605236530304
iteration:  15500
Iteration 15500 , the loss is currently 0.9899902939796448
iteration:  15510
Iteration 15510 , the loss is currently 0.9950124621391296
iteration:  15520
Iteration 15520 , the loss is currently 0.996405839920044
iteration:  15530
Iteration 15530 , the loss is currently 0.9855204224586487
iteration:  15540
Iteration 15540 , the loss is currently 0.9989834427833557
ite

iteration:  16500
Iteration 16500 , the loss is currently 0.9915052652359009
iteration:  16510
Iteration 16510 , the loss is currently 0.9886942505836487
iteration:  16520
Iteration 16520 , the loss is currently 1.0021309852600098
iteration:  16530
Iteration 16530 , the loss is currently 0.990723729133606
iteration:  16540
Iteration 16540 , the loss is currently 0.994973361492157
iteration:  16550
Iteration 16550 , the loss is currently 0.9829904437065125
iteration:  16560
Iteration 16560 , the loss is currently 0.9905364513397217
iteration:  16570
Iteration 16570 , the loss is currently 0.9805038571357727
iteration:  16580
Iteration 16580 , the loss is currently 0.9977240562438965
iteration:  16590
Iteration 16590 , the loss is currently 0.9895977973937988
iteration:  16600
Iteration 16600 , the loss is currently 0.9938777685165405
iteration:  16610
Iteration 16610 , the loss is currently 1.0064867734909058
iteration:  16620
Iteration 16620 , the loss is currently 0.9919673204421997
i

iteration:  17580
Iteration 17580 , the loss is currently 1.0013272762298584
iteration:  17590
Iteration 17590 , the loss is currently 1.0137308835983276
iteration:  17600
Iteration 17600 , the loss is currently 0.9986933469772339
iteration:  17610
Iteration 17610 , the loss is currently 0.989320695400238
iteration:  17620
Iteration 17620 , the loss is currently 0.9832044243812561
iteration:  17630
Iteration 17630 , the loss is currently 0.9716085195541382
iteration:  17640
Iteration 17640 , the loss is currently 0.9749566316604614
iteration:  17650
Iteration 17650 , the loss is currently 0.9925912022590637
iteration:  17660
Iteration 17660 , the loss is currently 0.9887502789497375
iteration:  17670
Iteration 17670 , the loss is currently 0.9663140773773193
iteration:  17680
Iteration 17680 , the loss is currently 1.005567193031311
iteration:  17690
Iteration 17690 , the loss is currently 0.9854892492294312
iteration:  17700
Iteration 17700 , the loss is currently 0.9780148267745972
i

iteration:  18660
Iteration 18660 , the loss is currently 1.027497410774231
iteration:  18670
Iteration 18670 , the loss is currently 0.9680482149124146
iteration:  18680
Iteration 18680 , the loss is currently 0.9893553256988525
iteration:  18690
Iteration 18690 , the loss is currently 0.9779942035675049
iteration:  18700
Iteration 18700 , the loss is currently 0.9983576536178589
iteration:  18710
Iteration 18710 , the loss is currently 0.9840375781059265
iteration:  18720
Iteration 18720 , the loss is currently 0.9965671896934509
iteration:  18730
Iteration 18730 , the loss is currently 0.996973991394043
iteration:  18740
Iteration 18740 , the loss is currently 0.9904482364654541
iteration:  18750
Iteration 18750 , the loss is currently 0.9989230036735535
iteration:  18760
Iteration 18760 , the loss is currently 0.9830607175827026
iteration:  18770
Iteration 18770 , the loss is currently 0.9883025288581848
iteration:  18780
Iteration 18780 , the loss is currently 0.9943391680717468
i

iteration:  19740
Iteration 19740 , the loss is currently 0.9920167326927185
iteration:  19750
Iteration 19750 , the loss is currently 1.0000993013381958
iteration:  19760
Iteration 19760 , the loss is currently 1.0095094442367554
iteration:  19770
Iteration 19770 , the loss is currently 0.9978790283203125
iteration:  19780
Iteration 19780 , the loss is currently 1.0032709836959839
iteration:  19790
Iteration 19790 , the loss is currently 1.0015290975570679
iteration:  19800
Iteration 19800 , the loss is currently 0.9785874485969543
iteration:  19810
Iteration 19810 , the loss is currently 0.9890326261520386
iteration:  19820
Iteration 19820 , the loss is currently 1.0049214363098145
iteration:  19830
Iteration 19830 , the loss is currently 0.9956891536712646
iteration:  19840
Iteration 19840 , the loss is currently 0.9907717108726501
iteration:  19850
Iteration 19850 , the loss is currently 0.9978715181350708
iteration:  19860
Iteration 19860 , the loss is currently 0.9768799543380737

iteration:  20820
Iteration 20820 , the loss is currently 0.9747292995452881
iteration:  20830
Iteration 20830 , the loss is currently 0.9927077889442444
iteration:  20840
Iteration 20840 , the loss is currently 0.9994415640830994
iteration:  20850
Iteration 20850 , the loss is currently 0.9967254400253296
iteration:  20860
Iteration 20860 , the loss is currently 1.0054774284362793
iteration:  20870
Iteration 20870 , the loss is currently 0.9871771335601807
iteration:  20880
Iteration 20880 , the loss is currently 0.9890139102935791
iteration:  20890
Iteration 20890 , the loss is currently 0.9846139550209045
iteration:  20900
Iteration 20900 , the loss is currently 0.9970303177833557
iteration:  20910
Iteration 20910 , the loss is currently 0.9765793681144714
iteration:  20920
Iteration 20920 , the loss is currently 1.017295479774475
iteration:  20930
Iteration 20930 , the loss is currently 0.9728692770004272
iteration:  20940
Iteration 20940 , the loss is currently 0.9865672588348389


iteration:  21900
Iteration 21900 , the loss is currently 0.9812270402908325
iteration:  21910
Iteration 21910 , the loss is currently 0.9998965263366699
iteration:  21920
Iteration 21920 , the loss is currently 0.998857855796814
iteration:  21930
Iteration 21930 , the loss is currently 0.9837761521339417
iteration:  21940
Iteration 21940 , the loss is currently 1.0084295272827148
iteration:  21950
Iteration 21950 , the loss is currently 0.9769117832183838
iteration:  21960
Iteration 21960 , the loss is currently 0.9796057939529419
iteration:  21970
Iteration 21970 , the loss is currently 0.9872303605079651
iteration:  21980
Iteration 21980 , the loss is currently 0.9917846918106079
iteration:  21990
Iteration 21990 , the loss is currently 0.9971393942832947
iteration:  22000
Iteration 22000 , the loss is currently 1.0098016262054443
iteration:  22010
Iteration 22010 , the loss is currently 0.9882063865661621
iteration:  22020
Iteration 22020 , the loss is currently 0.9894012808799744


iteration:  22980
Iteration 22980 , the loss is currently 1.0136096477508545
iteration:  22990
Iteration 22990 , the loss is currently 1.00276780128479
iteration:  23000
Iteration 23000 , the loss is currently 0.9824845194816589
iteration:  23010
Iteration 23010 , the loss is currently 0.9836524128913879
iteration:  23020
Iteration 23020 , the loss is currently 0.9988936185836792
iteration:  23030
Iteration 23030 , the loss is currently 0.9970113635063171
iteration:  23040
Iteration 23040 , the loss is currently 0.9791132807731628
iteration:  23050
Iteration 23050 , the loss is currently 0.9930678606033325
iteration:  23060
Iteration 23060 , the loss is currently 1.0050262212753296
iteration:  23070
Iteration 23070 , the loss is currently 0.992952823638916
iteration:  23080
Iteration 23080 , the loss is currently 0.9903762936592102
iteration:  23090
Iteration 23090 , the loss is currently 1.0131757259368896
iteration:  23100
Iteration 23100 , the loss is currently 0.9815166592597961
it

iteration:  24060
Iteration 24060 , the loss is currently 0.9902696013450623
iteration:  24070
Iteration 24070 , the loss is currently 0.9778307676315308
iteration:  24080
Iteration 24080 , the loss is currently 0.9932267665863037
iteration:  24090
Iteration 24090 , the loss is currently 1.0046112537384033
iteration:  24100
Iteration 24100 , the loss is currently 0.9765614867210388
iteration:  24110
Iteration 24110 , the loss is currently 1.0101029872894287
iteration:  24120
Iteration 24120 , the loss is currently 0.9828639626502991
iteration:  24130
Iteration 24130 , the loss is currently 0.9923657774925232
iteration:  24140
Iteration 24140 , the loss is currently 0.985520601272583
iteration:  24150
Iteration 24150 , the loss is currently 0.9741178154945374
iteration:  24160
Iteration 24160 , the loss is currently 0.983437716960907
iteration:  24170
Iteration 24170 , the loss is currently 0.9864318370819092
iteration:  24180
Iteration 24180 , the loss is currently 0.9896134734153748
i

iteration:  25140
Iteration 25140 , the loss is currently 0.9725174307823181
iteration:  25150
Iteration 25150 , the loss is currently 1.009662389755249
iteration:  25160
Iteration 25160 , the loss is currently 1.0090049505233765
iteration:  25170
Iteration 25170 , the loss is currently 0.9961330890655518
iteration:  25180
Iteration 25180 , the loss is currently 0.9922797679901123
iteration:  25190
Iteration 25190 , the loss is currently 1.0038747787475586
iteration:  25200
Iteration 25200 , the loss is currently 0.9941167235374451
iteration:  25210
Iteration 25210 , the loss is currently 0.9965454936027527
iteration:  25220
Iteration 25220 , the loss is currently 1.005096435546875
iteration:  25230
Iteration 25230 , the loss is currently 0.9851616621017456
iteration:  25240
Iteration 25240 , the loss is currently 0.986116886138916
iteration:  25250
Iteration 25250 , the loss is currently 0.9888976812362671
iteration:  25260
Iteration 25260 , the loss is currently 0.9836450815200806
it

iteration:  26220
Iteration 26220 , the loss is currently 0.9644482135772705
iteration:  26230
Iteration 26230 , the loss is currently 0.978066086769104
iteration:  26240
Iteration 26240 , the loss is currently 1.0037972927093506
iteration:  26250
Iteration 26250 , the loss is currently 0.98975670337677
iteration:  26260
Iteration 26260 , the loss is currently 1.0033060312271118
iteration:  26270
Iteration 26270 , the loss is currently 0.9817700982093811
iteration:  26280
Iteration 26280 , the loss is currently 0.9944252967834473
iteration:  26290
Iteration 26290 , the loss is currently 0.9970530271530151
iteration:  26300
Iteration 26300 , the loss is currently 0.9894599914550781
iteration:  26310
Iteration 26310 , the loss is currently 0.9933525919914246
iteration:  26320
Iteration 26320 , the loss is currently 0.9817519187927246
iteration:  26330
Iteration 26330 , the loss is currently 0.9889774322509766
iteration:  26340
Iteration 26340 , the loss is currently 0.987904965877533
ite

iteration:  27300
Iteration 27300 , the loss is currently 0.9924706220626831
iteration:  27310
Iteration 27310 , the loss is currently 0.9809175133705139
iteration:  27320
Iteration 27320 , the loss is currently 0.9852770566940308
iteration:  27330
Iteration 27330 , the loss is currently 0.997734546661377
iteration:  27340
Iteration 27340 , the loss is currently 0.993989109992981
iteration:  27350
Iteration 27350 , the loss is currently 0.9727087616920471
iteration:  27360
Iteration 27360 , the loss is currently 0.9816675186157227
iteration:  27370
Iteration 27370 , the loss is currently 0.9995225667953491
iteration:  27380
Iteration 27380 , the loss is currently 0.9930263161659241
iteration:  27390
Iteration 27390 , the loss is currently 0.9899560809135437
iteration:  27400
Iteration 27400 , the loss is currently 0.9927341938018799
iteration:  27410
Iteration 27410 , the loss is currently 0.987888753414154
iteration:  27420
Iteration 27420 , the loss is currently 0.9980634450912476
it

iteration:  28380
Iteration 28380 , the loss is currently 0.9775968790054321
iteration:  28390
Iteration 28390 , the loss is currently 0.9797887206077576
iteration:  28400
Iteration 28400 , the loss is currently 1.012380599975586
iteration:  28410
Iteration 28410 , the loss is currently 0.9885318279266357
iteration:  28420
Iteration 28420 , the loss is currently 1.0035251379013062
iteration:  28430
Iteration 28430 , the loss is currently 0.9961012005805969
iteration:  28440
Iteration 28440 , the loss is currently 0.9831315875053406
iteration:  28450
Iteration 28450 , the loss is currently 1.0010151863098145
iteration:  28460
Iteration 28460 , the loss is currently 0.9960935115814209
iteration:  28470
Iteration 28470 , the loss is currently 0.9845823645591736
iteration:  28480
Iteration 28480 , the loss is currently 1.0015654563903809
iteration:  28490
Iteration 28490 , the loss is currently 0.9703657627105713
iteration:  28500
Iteration 28500 , the loss is currently 0.9892054796218872


iteration:  29460
Iteration 29460 , the loss is currently 0.9806605577468872
iteration:  29470
Iteration 29470 , the loss is currently 0.9830146431922913
iteration:  29480
Iteration 29480 , the loss is currently 0.9805755615234375
iteration:  29490
Iteration 29490 , the loss is currently 0.9895049333572388
iteration:  29500
Iteration 29500 , the loss is currently 0.9602532386779785
iteration:  29510
Iteration 29510 , the loss is currently 1.019428014755249
iteration:  29520
Iteration 29520 , the loss is currently 0.9868649840354919
iteration:  29530
Iteration 29530 , the loss is currently 0.9815470576286316
iteration:  29540
Iteration 29540 , the loss is currently 1.0023115873336792
iteration:  29550
Iteration 29550 , the loss is currently 1.023728370666504
iteration:  29560
Iteration 29560 , the loss is currently 1.004820704460144
iteration:  29570
Iteration 29570 , the loss is currently 1.0099045038223267
iteration:  29580
Iteration 29580 , the loss is currently 1.0027841329574585
it

In [14]:
def visualize(state):
    for x in range(16):
        if x % 4 == 0 and not x == 0:
            print()
        print(state[x], end=' ')


In [61]:
e = env2048()

In [62]:
s = e.get_state(normalized=False)

In [31]:
s = memo[3000][0]
visualize(s.reshape(16,))
print()
print(memo[3000][1])
print(model(s))
get_action(s)

NameError: name 'visualize' is not defined

In [39]:
np.save('modelWeights150E', np.array(model.weights))