In [None]:
import keras
from keras import models
from keras.layers import core, convolutional, LSTM, Embedding, Dense, Dropout
from keras.optimizers import SGD, Adam
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
#import cv2
import os
from pprint import pprint
import random
import time
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

In [20]:
class Game:
    def __init__(self, custom_config=False):
        chrome_options = webdriver.chrome.options.Options()
        chrome_options.add_argument("disable_infobars")
        self.driver = webdriver.Chrome(executable_path="chromedriver", options=chrome_options)
        self.driver.set_window_position(x=-10,y=0)
        self.driver.set_window_size(200, 700)
        self.driver.get("https://play2048.co/")
        self.driver.execute_script("myGM = new GameManager(4, KeyboardInputManager, HTMLActuator, LocalStorageManager);")


    def get_crashed(self):
        return self.driver.execute_script("return myGM.isGameTerminated();")
    def get_board(self):
        grid = self.driver.execute_script("return myGM.grid.cells;")
        #print(grid)
        mygrid = []
        for line in grid:
            a = [x['value'] if x != None else 0 for x in line]
            #a = [1 if x != None else 0 for x in line]
            mygrid.append(a)
        #pprint(mygrid)
        return np.array(mygrid).reshape(1,16)

    def get_score(self):
        return self.driver.execute_script("return myGM.score;")
        #return self.driver.find_element_by_class_name("score-container").text
    def restart(self):
        self.driver.execute_script("myGM.restart();")
    def move(self, direction="up"):
        if direction==0:
            key = Keys.ARROW_UP
        elif direction==1:
            key = Keys.ARROW_DOWN
        elif direction==2:
            key = Keys.ARROW_LEFT
        elif direction==3:
            key = Keys.ARROW_RIGHT
        self.driver.find_element_by_tag_name("body").send_keys(key)
        

In [17]:
def play(game, n=1, slow=False, once=False, restartAfterXMoves=None, with_replay=True, verbose=False):
    scores = []
    game_counter = 0
    prediction = None
    epsilon = 75
    while game_counter < n:
        x_moves = 0
        while not g.get_crashed():
            if slow:
                time.sleep(0.75)
            prev_score = g.get_score()
            old_state = g.get_board()
            
            # exploring randomly some new branches
            if random.randint(0, 200) < epsilon:
                final_move = random.randint(0,3)
            else:
                print(np.shape(old_state))
                prediction = agent.model.predict(old_state)
                predicted_move = np.argmax(prediction)
                final_move = predicted_move
            g.move(final_move)
            new_state = g.get_board()
            
            # if no points achieved, give negative reward
            reward = g.get_score() - prev_score -1
            done = g.get_crashed()
            #print(final_move)
            
            
            if done:
                reward = -10
            agent.train_short_memory(old_state, final_move, reward, new_state, done, verbose=verbose)
            agent.remember_long_term(old_state, final_move, reward, new_state, done)
            if verbose:
                print(final_move)
                #print(prediction)
                print(reward)
            if once:
                return
            x_moves += 1
            if restartAfterXMoves and x_moves>=restartAfterXMoves:
                break
        if with_replay:
            agent.replay_new(agent.memory)
        game_counter += 1
        print("Game", game_counter, "\tScore: ", g.get_score())
        scores.append(g.get_score())
        g.restart()
    return scores

In [15]:
class myDQNAgent():
    def __init__(self, game):
        #self.reward = 0
        self.gamma = 0.5
        #self.dataframe = pd.DataFrame()
        self.short_memory = np.array([])
        #self.agent_target = 1
        #self.agent_predict = 0
        self.learning_rate = 0.001
        self.game = game
        #self.model = self.network()
        #self.model = self.network("weights.hdf5")
        self.epsilon = 0
        #self.actual = []
        self.memory = []
        self.model = self.buildmodel()
        
    def buildmodel(self):
        print("building a model")
        model = models.Sequential()
        model.add(Dense(16, activation='relu', input_dim=16))
        #model.add(Dropout(0.15))
        #model.add(Dense(50, activation='relu'))
        #model.add(Dropout(0.15))
        model.add(Dense(20, activation='relu'))
        #model.add(Dropout(0.15))

        #model.add(Dense(30, input_dim=16, activation='relu'))
        #model.add(Dropout(0.15))
        #model.add(Dense(30, activation='relu'))
        #model.add(Dropout(0.15))
        #model.add(Dense(8, activation='relu'))
        #model.add(Dropout(0.15))
        model.add(Dense(4, activation='linear'))
        opt = Adam(lr=self.learning_rate)
        model.compile(loss='mse', optimizer=opt)
        print("model built")
        return model
    
    def remember_long_term(self, state, action, reward, next_state, terminated):
        self.memory.append((state, action, reward, next_state, terminated))
        
    def train_short_memory(self, state, action, reward, next_state, terminated, verbose=False):
        target = reward
        #print(state)
        #print(np.shape(state))
        q = self.model.predict(state)
        qn = self.model.predict(next_state)
        if not terminated:
            target = reward + self.gamma * np.max(qn[0])
        if verbose:
            print(q)
        q[0][action] = target
        if verbose:
            print(q)
        self.model.fit(state, q, epochs=1, verbose=0)
   
    def replay_new(self, memory):
        if len(memory) > 500:
            minibatch = random.sample(memory, 200)
        else:
            minibatch = memory
        for state, action, reward, next_state, terminated in minibatch:
            q = self.model.predict(state)
            qn = self.model.predict(next_state)
            target = reward

            if not terminated:
                target = reward + self.gamma * np.max(qn[0])
            #target_f = self.model.predict(state.reshape((1,16)))
            #print(q)
            #print(target)
            q[0][action] = target
            #t = time.time()
            r = self.model.fit(state, q, epochs=1, verbose=0)
            

In [21]:
g = Game()

In [22]:
agent = myDQNAgent(g)

building a model
model built


In [24]:
s = play(g, n=100, once=False,slow=False, restartAfterXMoves=10, with_replay=True, verbose=True)

[[0.21946144 0.7323235  0.97219634 1.313447  ]]
[[ 0.21946144  0.7323235   0.97219634 -0.63056266]]
3
-1
[[0.27353138 0.33071965 0.5013925  0.73709583]]
[[ 0.27353138  0.33071965  0.5013925  -0.6314521 ]]
3
-1
[[0.27182907 0.33081287 0.50052696 0.7326331 ]]
[[3.8603675  0.33081287 0.50052696 0.7326331 ]]
0
3
(1, 16)
[[0.0290044  0.08876414 1.4750804  1.7184734 ]]
[[ 0.0290044   0.08876414  1.4750804  -0.2362678 ]]
3
-1
(1, 16)
[[0.59944916 0.6712584  1.1961762  1.5209591 ]]
[[0.59944916 0.6712584  1.1961762  0.24796176]]
3
-1
[[0.95610523 0.9286252  1.9194146  2.4885986 ]]
[[0.95610523 4.022174   1.9194146  2.4885986 ]]
1
3
[[2.0436428  1.7933857  0.41941538 1.3034611 ]]
[[2.0436428 1.7933857 0.4879657 1.3034611]]
2
-1
[[0.80688953 0.8790352  2.9725761  2.7714043 ]]
[[0.80688953 0.8790352  0.14749515 2.7714043 ]]
2
-1
(1, 16)
[[1.4760984 2.2959063 2.1392767 1.7837287]]
[[ 1.4760984 12.800332   2.1392767  1.7837287]]
1
11
[[1.0819104 1.2880995 3.5742161 3.5854092]]
[[1.0819104 0.7927046

Game 8 	Score:  28
(1, 16)
[[0.82294196 2.7949898  0.59454525 2.524902  ]]
[[0.82294196 0.28554738 0.59454525 2.524902  ]]
1
-1
(1, 16)
[[2.5070264 2.1280475 2.5949664 2.2731473]]
[[2.5070264 2.1280475 5.5694704 2.2731473]]
2
3
(1, 16)
[[1.5176874 2.701476  4.5399723 5.1736937]]
[[1.5176874 2.701476  4.5399723 9.06554  ]]
3
7
[[3.3289938 4.098054  1.0106015 2.0120761]]
[[0.74189997 4.098054   1.0106015  2.0120761 ]]
0
-1
(1, 16)
[[ 3.4615004  1.6417698 -1.0453583 -1.8279567]]
[[ 0.623312   1.6417698 -1.0453583 -1.8279567]]
0
-1
(1, 16)
[[ 3.1918383  1.7743449  1.3560121 -0.2335113]]
[[ 1.0313833  1.7743449  1.3560121 -0.2335113]]
0
-1
[[ 3.9826496   2.2022245  -0.32724714 -1.4860231 ]]
[[ 0.7927636   2.2022245  -0.32724714 -1.4860231 ]]
0
-1
(1, 16)
[[3.4963796  2.1176262  2.76731    0.80804956]]
[[1.3877437  2.1176262  2.76731    0.80804956]]
0
-1
[[ 4.6611547   3.3834517   0.6909099  -0.28630364]]
[[ 4.6611547   3.2393928   0.6909099  -0.28630364]]
1
-1
[[5.481785  8.107567  7.081524

Game 16 	Score:  20
[[1.2440703 1.0907204 2.3170342 2.2277894]]
[[1.2440703 1.0907204 2.3170342 0.540804 ]]
3
-1
(1, 16)
[[1.236081  1.5759574 2.5381005 3.0706978]]
[[1.236081  1.5759574 2.5381005 4.9607973]]
3
3
(1, 16)
[[2.204401  1.854179  3.9129615 2.169638 ]]
[[2.204401  1.854179  1.6300817 2.169638 ]]
2
-1
(1, 16)
[[4.377715  5.200998  2.8352623 3.2565608]]
[[4.377715  5.870229  2.8352623 3.2565608]]
1
3
(1, 16)
[[5.4191723 5.6931    5.3690963 4.7473383]]
[[ 5.4191723 10.172908   5.3690963  4.7473383]]
1
7
[[3.1946952 1.6531974 6.3088737 3.0122929]]
[[3.1946952 1.6531974 6.3088737 4.9747443]]
3
3
(1, 16)
[[ 3.7573068   0.08374771  3.9088798  -1.3661268 ]]
[[ 3.7573068   0.08374771  1.9646752  -1.3661268 ]]
2
-1
(1, 16)
[[3.5486617 3.1285305 5.903232  3.522627 ]]
[[3.5486617 3.1285305 1.359685  3.522627 ]]
2
-1
(1, 16)
[[4.7500896 4.2248964 4.4282703 2.570945 ]]
[[8.073708  4.2248964 4.4282703 2.570945 ]]
0
3
(1, 16)
[[ 9.030967  10.16152    0.9085814  3.2806969]]
[[9.030967  7.66

Game 24 	Score:  16
(1, 16)
[[0.43313143 0.9422017  0.9814303  0.17723598]]
[[0.43313143 0.9422017  3.7348695  0.17723598]]
2
3
(1, 16)
[[0.2572733 1.4653995 0.7300062 1.1321713]]
[[0.2572733 1.4131672 0.7300062 1.1321713]]
1
-1
[[1.248525   0.88166034 4.8213034  2.9130125 ]]
[[1.248525  1.4106517 4.8213034 2.9130125]]
1
-1
(1, 16)
[[1.2587708 0.8816005 4.8153543 2.879007 ]]
[[1.2587708 0.8816005 6.0537634 2.879007 ]]
2
3
(1, 16)
[[3.2276726 3.6511505 4.687819  6.0913296]]
[[3.2276726 3.6511505 4.687819  8.447782 ]]
3
7
[[ 2.90426     2.7383063   2.5289571  -0.11453516]]
[[ 4.0977745   2.7383063   2.5289571  -0.11453516]]
0
3
[[ 1.2118186  2.2010107  1.2937512 -1.7674347]]
[[ 1.2118186  0.786996   1.2937512 -1.7674347]]
1
-1
[[2.299838  0.8138623 3.5734477 2.142207 ]]
[[2.299838  0.8138623 4.6759014 2.142207 ]]
2
-1
[[ 6.8843527  8.203454   8.207175  11.338126 ]]
[[6.8843527 8.203454  8.207175  0.8489531]]
3
-1
(1, 16)
[[3.7097669 3.5147102 1.4146738 1.305513 ]]
[[11.172295   3.5147102

Game 32 	Score:  12
(1, 16)
[[1.8797154  2.2534738  2.5367405  0.92136747]]
[[1.8797154  2.2534738  1.7698038  0.92136747]]
2
-1
[[1.860657  2.2263484 5.536096  1.6600935]]
[[1.860657  2.0885942 5.536096  1.6600935]]
1
-1
[[2.8995728 3.6075683 6.15392   1.2753944]]
[[2.8995728 3.6075683 6.15392   4.6829495]]
3
3
(1, 16)
[[2.076246  3.3651094 1.7064687 1.8465503]]
[[2.076246  9.081629  1.7064687 1.8465503]]
1
7
[[3.1073024 4.245406  1.7448683 2.4295118]]
[[3.1073024 4.245406  2.8138819 2.4295118]]
2
-1
[[5.144352  6.092658  7.6108246 3.656594 ]]
[[3.5319352 6.092658  7.6108246 3.656594 ]]
0
-1
(1, 16)
[[6.8449564 8.914054  8.9573555 9.076879 ]]
[[6.8449564 8.914054  8.9573555 8.35788  ]]
3
3
[[ 8.722836    2.3980997  10.701961   -0.20198695]]
[[11.280179    2.3980997  10.701961   -0.20198695]]
0
7
(1, 16)
[[8.627762  7.323937  7.0962405 1.5893363]]
[[8.4662    7.323937  7.0962405 1.5893363]]
0
3
(1, 16)
[[10.30421    7.2039742 10.888877   3.7878463]]
[[10.30421    7.2039742 24.254879   

Game 40 	Score:  32
(1, 16)
[[1.0645506  0.54029524 2.1237655  1.2327601 ]]
[[1.0645506  0.54029524 0.06188273 1.2327601 ]]
2
-1
(1, 16)
[[1.0522224 0.5319841 2.1251283 1.2689219]]
[[1.0522224  0.5319841  0.06256413 1.2689219 ]]
2
-1
(1, 16)
[[1.0438206  0.52774584 2.1193335  1.2982228 ]]
[[1.0438206  0.52774584 0.05966675 1.2982228 ]]
2
-1
[[1.0389467  0.52716494 2.107117   1.3213402 ]]
[[1.0389467  0.52716494 2.107117   0.83451104]]
3
-1
(1, 16)
[[ 1.9805927 -1.1830556  3.6645339 -0.0861287]]
[[ 1.9805927 -1.1830556  1.1845045 -0.0861287]]
2
-1
[[2.2425535 1.6161689 4.3552437 3.7496243]]
[[2.2425535 1.6161689 4.3552437 5.3328695]]
3
3
(1, 16)
[[ 3.4548614 -1.4435807  4.651646   1.2518142]]
[[ 3.4548614 -1.4435807  5.5147657  1.2518142]]
2
3
(1, 16)
[[3.9069064 4.6106224 5.020813  3.8745046]]
[[3.9069064 4.6106224 9.872045  3.8745046]]
2
7
(1, 16)
[[3.4667373 3.379799  5.8157225 2.7469146]]
[[3.4667373 3.379799  5.8774447 2.7469146]]
2
3
(1, 16)
[[5.7090235 5.459819  5.407247  3.96470

Game 48 	Score:  4
(1, 16)
[[2.654528  3.9693491 2.784554  3.6203933]]
[[2.654528   0.27379775 2.784554   3.6203933 ]]
1
-1
(1, 16)
[[2.5451262 1.576178  1.6869221 1.0051694]]
[[4.259034  1.576178  1.6869221 1.0051694]]
0
3
[[ 2.3691614  1.8983213  2.5153606 -2.4803488]]
[[ 2.3691614  1.8983213  1.3626645 -2.4803488]]
2
-1
(1, 16)
[[3.1520107 3.7241337 4.108509  4.7163153]]
[[3.1520107 3.7241337 4.108509  1.378597 ]]
3
-1
(1, 16)
[[2.016809  2.1663527 4.7366495 2.1126347]]
[[2.016809  2.1663527 2.7201517 2.1126347]]
2
-1
(1, 16)
[[5.6515923 7.415271  7.254652  7.3807774]]
[[5.6515923 7.945846  7.254652  7.3807774]]
1
3
(1, 16)
[[7.1962843 9.887023  8.152769  8.690633 ]]
[[7.1962843 4.184729  8.152769  8.690633 ]]
1
-1
(1, 16)
[[ 5.747975   6.705996  10.3567095  9.028309 ]]
[[ 5.747975  6.705996 15.546987  9.028309]]
2
11
(1, 16)
[[9.072807  8.262555  2.5291414 4.091667 ]]
[[5.788252  8.262555  2.5291414 4.091667 ]]
0
-1
[[ 9.661143 13.469025 10.785714 11.585199]]
[[ 9.661143 13.469025 

[[6.0561852 6.666428  4.7188635 5.999184 ]]
[[ 6.0561852 10.541448   4.7188635  5.999184 ]]
1
7
(1, 16)
[[7.0617948  5.075062   1.9861734  0.86327654]]
[[2.6047673  5.075062   1.9861734  0.86327654]]
0
-1
(1, 16)
[[7.188922 6.941787 5.844734 6.226883]]
[[2.4612079 6.941787  5.844734  6.226883 ]]
0
-1
[[6.8321085 5.346128  6.73072   6.247195 ]]
[[6.8321085 5.346128  6.73072   8.02093  ]]
3
3
(1, 16)
[[ 9.6815405  4.2193866 10.077459   7.249275 ]]
[[9.6815405 4.2193866 4.5712957 7.249275 ]]
2
-1
(1, 16)
[[11.0538025 10.255495   4.5989685  4.675334 ]]
[[ 4.3518014 10.255495   4.5989685  4.675334 ]]
0
-1
(1, 16)
[[10.176965 10.555894  7.003635  6.72157 ]]
[[10.176965   3.8757849  7.003635   6.72157  ]]
1
-1
Game 57 	Score:  16
(1, 16)
[[1.6088281 2.0189834 1.1512862 1.6176255]]
[[1.6088281  0.95187306 1.1512862  1.6176255 ]]
1
-1
(1, 16)
[[3.147035 3.895381 1.340399 1.768255]]
[[3.147035  6.4873247 1.340399  1.768255 ]]
1
3
[[3.337037  3.2097077 6.2471523 6.921458 ]]
[[0.4503901 3.2097077 

[[3.2559035 3.681122  7.978099  4.9672885]]
[[3.2559035 3.681122  4.50423   4.9672885]]
2
-1
(1, 16)
[[ 6.044646   5.4626546 10.978791  10.137678 ]]
[[ 6.044646   5.4626546  4.60278   10.137678 ]]
2
-1
(1, 16)
[[ 8.037524  10.79201    7.8219194 11.1051   ]]
[[ 8.037524  10.79201    7.8219194  7.0287714]]
3
3
(1, 16)
[[5.805778  5.9647675 7.9947586 7.673398 ]]
[[5.805778  5.9647675 3.1797633 7.673398 ]]
2
-1
[[5.865945  7.049206  8.242601  7.1088266]]
[[ 5.865945  15.627002   8.242601   7.1088266]]
1
11
[[3.9615896 4.2962384 9.017068  6.389877 ]]
[[3.9615896 3.508534  9.017068  6.389877 ]]
1
-1
(1, 16)
[[3.9205532 4.307787  8.8028555 6.2488647]]
[[ 3.9205532  4.307787  11.03956    6.2488647]]
2
7
Game 65 	Score:  28
[[0.6440855 1.2403955 3.2047207 1.5170647]]
[[0.35572314 1.2403955  3.2047207  1.5170647 ]]
0
-1
[[0.59137905 1.1004064  2.7137256  1.0041738 ]]
[[0.59137905 0.8212004  2.7137256  1.0041738 ]]
1
-1
(1, 16)
[[0.8661498  0.21809399 3.6445723  2.7161698 ]]
[[0.8661498  0.218093

[[4.625373 4.188463 5.136124 5.62776 ]]
[[4.625373 4.188463 5.136124 9.316328]]
3
7
(1, 16)
[[2.3649468 4.643176  2.3091176 2.1233735]]
[[ 2.3649468 -0.4731838  2.3091176  2.1233735]]
1
-1
[[ 1.0504559  -0.21758136  0.9769493  -0.72782165]]
[[ 2.1849835  -0.21758136  0.9769493  -0.72782165]]
0
-1
(1, 16)
[[2.950207  3.5931306 6.3733463 2.5624042]]
[[2.950207  3.5931306 7.490321  2.5624042]]
2
3
[[7.541577  3.7917414 9.015509  5.1818995]]
[[7.541577  3.7917414 4.3760676 5.1818995]]
2
-1
(1, 16)
[[ 8.650038   4.953908  10.6854315  7.905928 ]]
[[8.650038  4.953908  2.3383152 7.905928 ]]
2
-1
(1, 16)
[[6.210896  5.0918164 6.4326606 5.9990892]]
[[6.210896  5.0918164 2.2163303 5.9990892]]
2
-1
Game 73 	Score:  20
[[3.1731    2.6859396 3.276334  3.5847251]]
[[1.1257532 2.6859396 3.276334  3.5847251]]
0
-1
(1, 16)
[[4.2503457 3.1954029 1.9671992 1.567604 ]]
[[4.2750673 3.1954029 1.9671992 1.567604 ]]
0
3
[[2.5519023 1.8196424 1.2593453 2.0269322]]
[[2.841533  1.8196424 1.2593453 2.0269322]]
0


[[3.2117043 4.0543833 3.4662933 3.1709042]]
[[3.2117043 2.4668188 3.4662933 3.1709042]]
1
-1
(1, 16)
[[4.125632  3.954638  6.9202423 5.3688397]]
[[4.125632  3.954638  6.2183695 5.3688397]]
2
3
[[5.526776  6.3805737 4.558293  3.7047293]]
[[ 5.526776   6.3805737 15.613484   3.7047293]]
2
11
Game 81 	Score:  20
[[ 1.199839   1.872967   1.258946  -0.5515885]]
[[1.199839 1.872967 1.258946 4.816271]]
3
3
(1, 16)
[[1.8582087 2.9315526 3.613942  3.551513 ]]
[[1.8582087 2.9315526 1.7167118 3.551513 ]]
2
-1
[[4.4642267 5.419512  1.7710502 1.2245704]]
[[4.4642267 1.9653957 1.7710502 1.2245704]]
1
-1
(1, 16)
[[5.400368  5.8940353 4.5615597 3.8577912]]
[[5.400368  5.769993  4.5615597 3.8577912]]
1
3
(1, 16)
[[4.2985015 5.499892  3.4190142 3.052688 ]]
[[4.2985015 1.7499461 3.4190142 3.052688 ]]
1
-1
(1, 16)
[[4.215321  5.386416  3.390935  3.0272214]]
[[4.215321  1.693208  3.390935  3.0272214]]
1
-1
[[4.0893564 5.2120314 3.3725665 3.0023923]]
[[4.0893564 1.6060157 3.3725665 3.0023923]]
1
-1
(1, 16)
[

Game 89 	Score:  24
(1, 16)
[[ 1.7474155   1.1081697   0.7531458  -0.03929557]]
[[ 1.0651112   1.1081697   0.7531458  -0.03929557]]
0
-1
(1, 16)
[[3.654787  3.1249049 3.3283193 4.1268487]]
[[3.654787  3.1249049 3.3283193 5.010116 ]]
3
3
[[3.4172864 4.0126295 1.3392578 1.3552481]]
[[3.4172864 1.1122386 1.3392578 1.3552481]]
1
-1
(1, 16)
[[3.2529707 4.211291  1.186348  0.8269039]]
[[3.2529707 4.3268867 1.186348  0.8269039]]
1
3
(1, 16)
[[1.4841669  1.4423625  2.660328   0.58683324]]
[[1.4841669  1.4423625  2.3379056  0.58683324]]
2
-1
[[6.657832  6.660453  4.88912   4.4293246]]
[[6.657832  6.660453  4.88912   2.8544035]]
3
-1
(1, 16)
[[7.700115  4.8164124 4.7935667 4.6423845]]
[[3.590898  4.8164124 4.7935667 4.6423845]]
0
-1
[[9.143609  5.6649103 8.5851755 7.84396  ]]
[[ 9.143609   5.6649103 18.936275   7.84396  ]]
2
15
(1, 16)
[[6.6208577 7.8433747 7.248171  7.4306016]]
[[6.6208577 4.539541  7.248171  7.4306016]]
1
-1
[[ 5.334136  7.420132 11.147255 10.011305]]
[[ 4.0957575  7.420132  1

[[3.502574 4.093525 6.968857 8.66076 ]]
[[3.502574 4.093525 9.539832 8.66076 ]]
2
7
[[0.88397   0.9552541 5.1008635 4.252591 ]]
[[0.88397   5.0078335 5.1008635 4.252591 ]]
1
3
(1, 16)
[[4.0134797 2.9677556 1.7438614 1.823922 ]]
[[2.6036108 2.9677556 1.7438614 1.823922 ]]
0
-1
(1, 16)
[[6.814146  4.961776  7.257649  6.9302216]]
[[6.814146  4.961776  9.882692  6.9302216]]
2
3
(1, 16)
[[11.086223  13.798939   5.2932763  5.2154555]]
[[11.086223   4.0772805  5.2932763  5.2154555]]
1
-1
[[ 2.46762   3.196968 10.193852  8.212077]]
[[ 3.3747435  3.196968  10.193852   8.212077 ]]
0
-1
(1, 16)
[[7.6819754 8.621705  8.438541  7.48669  ]]
[[7.6819754 9.872054  8.438541  7.48669  ]]
1
3
Game 98 	Score:  24
(1, 16)
[[0.4790601  0.2532988  0.5124939  0.06257642]]
[[0.4790601  0.2532988  0.6373595  0.06257642]]
2
-1
(1, 16)
[[2.4208963 2.229597  3.2808967 2.8193192]]
[[2.4208963 2.229597  5.268576  2.8193192]]
2
3
[[4.55358   4.5573206 4.3266997 3.6067238]]
[[1.0071476 4.5573206 4.3266997 3.6067238]]


In [None]:
a = np.average(s)
plt.subplot(211)
plt.plot(s)
plt.plot(pd.Series(s).rolling(window=5).mean().iloc[5-1:].values)
plt.hlines(a, 0, 10, colors='r', linestyles="dashed")
plt.subplot(212)
plt.hist(s, bins=10)

In [None]:
min(s)

In [19]:
old_state = g.get_board()
prediction = agent.model.predict(old_state.reshape(1,16))
print(prediction)

[[-0.5217494   0.0365449   0.06543536 -0.30124956]]


In [None]:
for i in agent.memory:
    if np.array_equal(i[0], old_state):
        print(i)

In [None]:
q = np.array([[0, 0, 1, 1]], dtype="float32")

In [None]:
h = agent.model.fit(b, q, epochs=1, verbose=1)

In [None]:
b = g.get_board().reshape(1,16)
for i in range(100):
    agent.model.fit(b, q, epochs=1, verbose=0)

In [None]:
a = agent.memory
agent = myDQNAgent(g)
agent.memory = a

In [None]:
for i in range(100):
    agent.replay_new(a)

In [None]:
b = g.get_board().reshape(1,16)
agent.model.predict(b)

In [None]:
b = np.array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]).reshape(1,16)
agent.model.predict(b)

In [None]:
b = np.zeros(16).reshape(1,16)
agent.model.predict(b)

In [None]:
pprint(agent.memory)

In [None]:
agent.model.optimizer.get_weights()

In [None]:
s_ver

In [None]:
s_hor

In [None]:
for i in range(1000):
    agent.model.fit(s_hor.reshape(1, 16), np.array([[-1, -1, 1, 1]], dtype="float32"))
    agent.model.fit(s_ver.reshape(1, 16), np.array([[1, 1, -1, -1]]))

In [None]:
agent.model.predict(s_hor.reshape(1,16))

In [None]:
agent.model.predict(s_ver.reshape(1,16))

In [None]:
old_state=old_state.reshape((16))

In [None]:
print(old_state)

In [None]:
np.shape(old_state)