In [1]:
import sys 
sys.path.append("/home/robert/Player-of-Games/src")


from g2048 import Game2048
import numpy as np

from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.optimizers import Adam

from tensorflow.keras.utils import to_categorical

In [2]:
class RL_Player():
    def __init__(self):
        self.reward = 0
        self.gamma = 0.9
        # self.dataframe = pd.DataFrame()
        self.short_memory = np.array([])
        self.agent_target = []
        self.agent_predict = 0
        self.learning_rate = 0.0005
        self.model = self.neural_net()
        #self.model = self.network("weights.hdf5")
        self.epsilon = 0
        self.actual = []
        self.memory = [[0,0,0,0,0],[0,0,0,0,0],[0,0,0,0,0],[0,0,0,0,0],[0,0,0,0,0]]
            
    
    def tokenize_board(self, board):
        '''method takes the game board and tokenizes all the arrays into 16 other arrays for if there's a 2, a 4, an 8, etc in a location to just a 1. hopefully...makes matching better
        Attributes 
        game( game2084 object): the game. 
        Returns
        tokenized 256x1 tokenization of all* possible game states. 
        
        * does not account for the 136k tile. I just..don't expect to need that. Frankly, 16k and 32k seem a reach
        '''
        tokenized = np.array([])
        
        for x in board.ravel():
            blank = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
            if x != 0:
                blank[int(np.log2(x))] = 1
            tokenized = np.append(tokenized, blank)
        return tokenized
    
    
    def ai_suggest_move(self, game):
        '''
        method which takes the game and the model (the ai we are training) and suggests a move.
        
        Attributes:
        Game(g2048 object): the game the AI is currently playing
        
        '''
        
        board = game.board
        
        # tokenize_board block takes the game board and tokenizes all the arrays into 16 other arrays for if there's a 2, a 4, an 8, etc in a location to just a 1. hopefully...makes matching better

        tokenized = self.tokenize_board(board) 
        
        pred= self.model.predict(tokenized.reshape(1,-1))
        output = [i for i in pred[0]]
        
        values = [2, 4, 6, 8]
        valid_moves = game.valid_moves
        
        

        d = dict(zip(np.array(output)[valid_moves], np.array(values)[valid_moves]))
        if len(d.keys()) == 0:
            #print('ai_suggest_move is out of possible moves')
            game.game_over = True #game is over
            return  -1

        if game.strict == True:
            if max(output) in d.keys(): #if the suggested move is not in valid_moves, it wont be in d, and so this will be negative, and the game will end
                move = d.get(max(d.keys()))
                self.memory.pop(0)
                output.append(move)
                self.memory.append(output)
                return move
            else:
                game.game_over = True #game is over because this setting does not allow invaild moves:
                return  -1
        else:
            move = d.get(max(d.keys())) #takes the next best move that is valid
            self.memory.pop(0)
            output.append(move)
            self.memory.append(output) 
            
            return move
    
    def calc_reward(self, game, move):
        '''
        Runs right after AI suggest move
        checks if reinforcement is merited after the last move, if so, updates the fit of model. 
        Attributes
        game (Game2048 object)
        move (int): int passed from ai_suggest_move()
        
        Updates reward
        
        '''
        self.reward = 0

        
        ## get old board and new board by checking move against the game's built in moves. 
        old_board = game.board
        
        if move == 2: 
            next_board = game.slide_down()
        elif move == 4:
            next_board = game.slide_left()
        elif move == 6:
            next_board = game.slide_right()
        elif move == 8:
            next_board = game.slide_up()
        
        tiles_combined = empties_state(old_board, next_board)
        
        
        big_tiles = dict({32:2, 64: 2, 128: 2, 256: 4, 512: 6, 1024: 8, 2048: 10, 4096: 10})
        if np.amax(old_board) < np.amax(next_board): # checks to see if the bot has combined tiles or gotten a big one. 
            if np.amax(game.board) in big_tiles.keys():
                self.reward += big_tiles[np.amax(game.board)]
            else: 
                self.reward +=1  
        
        
        if game.game_over == True:
            self.reward = -10
        elif tiles_combined > 0:
            self.reward += 2
        
        elif move == -1: # the game is over or an illegal move was made
            self.reward = -2
        old_score = game.score
        
    def give_reward(self, game):
        '''if there is a reward, this will get the game history and the memory of the outputs, multiply the outputs by the reward and its time discount
        
        '''
        
        
        if self.reward != 0:
            h = game.history[-6:-1]
            m = self.memory[-6:-1]
            #get moves 
            
            
            values = [2, 4, 6, 8]
            discount = 1
            self.agent_target = []
            for items in m:  ##make mem shotened to the length you want, currently its five. 

                move = items.pop()

                k = values.index(move)
                #value in array that corresponds to the move taken
                if reward*discount > 1:
                    items[k] = items[k]*self.reward*discount
                else:
                    items[k] = items[k]*1.05
                self.agent_target.append(items)
                discount -=.2
                
                
            #$ !! tokenize the items in h
            h_tokens =[agent.tokenize_board(i).reshape(1,-1) for i in h]

            agent.model.fit(h_tokens,self.agent_target)
        
        
    def neural_net(self, weights = None):

        model = Sequential()
        model.add(Dense(32, activation='relu', input_shape= (256,)  ))
        model.add(Dropout(0.3))
        
        model.add(Dense(32, activation='relu'))
        model.add(Dropout(0.3))
        
        model.add(Dense(32, activation='relu'))
        model.add(Dropout(0.3))
        
        model.add(Dense(4, activation='softmax'))
        opt = Adam(self.learning_rate)
        
        model.compile(loss='mse', optimizer=opt)

        if weights:
            model.load_weights
        return model

In [3]:
agent = RL_Player()
game = Game2048(ai = True, strict = True)

In [4]:
agent.model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 32)                8224      
_________________________________________________________________
dropout (Dropout)            (None, 32)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 32)                1056      
_________________________________________________________________
dropout_1 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 32)                1056      
_________________________________________________________________
dropout_2 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 4)                 1

In [5]:
agent.check_reward(game)

In [6]:
agent.ai_suggest_move(game)

6

In [None]:
mem = [[0.24795952, 0.24854049, 0.25363788, 0.24986216, 6],
    [0.24795952, 0.24854049, 0.25363788, 0.24986216, 2],
    [0.24795952, 0.24854049, 0.25363788, 0.24986216, 6],
    [0.24795952, 0.24854049, 0.25363788, 0.24986216, 8],
    [0.24795952, 0.24854049, 0.25363788, 0.24986216, 4]]


In [None]:
game

In [None]:
game_h = game.hisory[-6:-1]

In [None]:
values = [2, 4, 6, 8]
discount = 1
for items in mem:  ##make mem shotened to the length you want, currently its five. 
    
    move = items.pop()
    
    k = values.index(move)
    #value in array that corresponds to the move taken
    if reward*discount > 1:
        items[k] = items[k]*reward*discount
    else:
        items[k] = items[k]*1.05
    print(reward*discount)
    print(items)
    discount -=.2


In [None]:
reward = 3
l = [10,10,10,10]

In [None]:
discount = 1
for items in l:
    items8

In [31]:
h_tokens

[array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.

In [None]:
game.history

In [None]:
game.

In [None]:
game.board

In [10]:
game = Game2048(ai = True)

In [None]:
np.amax(game.board)

In [None]:


h = game.history[-6:-1]
agent.tokenize_board()

In [None]:
game.game_over

In [None]:
game.history[-6:-1]

In [11]:
game.show_board()

[[0 0 0 0]
 [0 0 0 0]
 [0 0 2 0]
 [0 0 0 0]]


In [24]:
move = 2
game.get_move(move)
game.game_step()

[[ 0  0  0  0]
 [ 0  0  0  0]
 [ 0  4  0  4]
 [ 0  0  8 16]]


In [23]:
game.show_board()

[[ 0  0  0  0]
 [ 0  0  0  2]
 [ 0  0  8  2]
 [ 0  0  0 16]]


In [25]:
game.history

[array([0, 0, 0, 0, 0, 4, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 2, 0, 0, 0]),
 array([0, 0, 0, 2, 0, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0]),
 array([0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 2]),
 array([0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 4]),
 array([2, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 8, 0, 0, 4]),
 array([0, 0, 0, 0, 0, 2, 0, 0, 4, 0, 0, 0, 8, 0, 0, 4]),
 array([0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 8, 2, 2, 4]),
 array([0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 2, 8, 4, 4, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 2, 0, 0, 8, 8]),
 array([ 0,  0,  0,  0,  0,  0,  0,  2,  0,  0,  8,  2,  0,  0,  0, 16]),
 array([ 0,  0,  0,  0,  0,  0,  0,  0,  0,  4,  0,  4,  0,  0,  8, 16])]

In [27]:
np. array([0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 2, 0, 0, 8, 8]).reshape(4,4)

array([[0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 4, 4, 2],
       [0, 0, 8, 8]])