In [1]:
import sys 
sys.path.append("/home/robert/Player-of-Games/src")


from g2048 import Game2048
import numpy as np

from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.optimizers import Adam

from tensorflow.keras.utils import to_categorical

In [2]:
class RL_Player():
    def __init__(self):
            self.reward = 0
            self.gamma = 0.9
            # self.dataframe = pd.DataFrame()
            self.short_memory = np.array([])
            self.agent_target = 1
            self.agent_predict = 0
            self.learning_rate = 0.0005
            self.model = self.neural_net()
            #self.model = self.network("weights.hdf5")
            self.epsilon = 0
            self.actual = []
            self.memory = []
            
    
    def tokenize_board(self, board):
        '''method takes the game board and tokenizes all the arrays into 16 other arrays for if there's a 2, a 4, an 8, etc in a location to just a 1. hopefully...makes matching better
        Attributes 
        board (np.array): 4x4 Game2048 board object
        Returns
        tokenized 256x1 tokenization of all* possible game states. 
        
        * does not account for the 136k tile. I just..don't expect to need that. Frankly, 16k and 32k seem a reach
        '''
        tokenized = np.array([])
        
        for x in game.board.ravel():
            blank = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
            if x != 0:
                blank[int(np.log2(x))] = 1
        tokenized = np.append(tokenized, blank)
        return tokenized
    
    
    def ai_suggest_move(self, game):
        '''
        method which takes the game and the model (the ai we are training) and suggests a move.
        
        Attributes:
        Game(g2048 object): the game the AI is currently playing
        
        '''
        
        board = game.board.ravel()
        
        # tokenize_board block takes the game board and tokenizes all the arrays into 16 other arrays for if there's a 2, a 4, an 8, etc in a location to just a 1. hopefully...makes matching better

        tokenized = self.tokenize_board(board) 
        
        pred= self.model.predict(tokenized.reshape(1,-1))
        output = [i for i in pred[0]]
        
        values = [2, 4, 6, 8]
        valid_moves = game.valid_moves
        
        

        d = dict(zip(np.array(output)[valid_moves], np.array(values)[valid_moves]))
        if len(d.keys()) == 0:
            #print('ai_suggest_move is out of possible moves')
            game.game_over = True #game is over
            return  -1

        if game.strict == True:
            if max(output) in d.keys(): #if the suggested move is not in valid_moves, it wont be in d, and so this will be negative, and the game will end
                move = d.get(max(d.keys()))
                self.memory.pop(0)
                self.memory.append(output)
                return move
            else:
                game.game_over = True #game is over because this setting does not allow invaild moves:
                return  -1
        else:
            move = d.get(max(d.keys())) #takes the next best move that is valid
            self.memory.pop(0)
            self.memory.append(output) #$$! we could get clever and reward the correct move more. 
            return move
    
    def give_reward(self, game):
        
        game.history()
        
    def neural_net(self, weights = None):

        model = Sequential()
        model.add(Dense(32, activation='relu', input_shape= (256,)  ))
        model.add(Dropout(0.3))
        
        model.add(Dense(32, activation='relu'))
        model.add(Dropout(0.3))
        
        model.add(Dense(32, activation='relu'))
        model.add(Dropout(0.3))
        
        model.add(Dense(4, activation='softmax'))
        opt = Adam(self.learning_rate)
        
        model.compile(loss='mse', optimizer=opt)

        if weights:
            model.load_weights
        return model

In [3]:
agent = RL_Player()
game = Game2048(ai = True)

In [4]:
agent.model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 32)                8224      
_________________________________________________________________
dropout (Dropout)            (None, 32)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 32)                1056      
_________________________________________________________________
dropout_1 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 32)                1056      
_________________________________________________________________
dropout_2 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 4)                 1

In [6]:
game.board

array([[0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 2]])

In [16]:
game.get_move()
game.game_step()

[[0 0 0 0]
 [0 4 0 2]
 [0 0 2 8]
 [0 0 2 4]]


In [27]:
h = game.history
h

[array([0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 2, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 2]),
 array([0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 2, 0, 0, 0, 2]),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 2, 0, 4]),
 array([0, 0, 0, 0, 2, 0, 0, 0, 8, 0, 0, 0, 2, 4, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 8, 0, 0, 2, 4]),
 array([0, 0, 0, 0, 0, 4, 0, 2, 0, 0, 2, 8, 0, 0, 2, 4])]

In [26]:
game_h = game.hisory[-6:-1]

[array([0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 0, 0, 0, 0, 0, 2]),
 array([0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 2, 0, 0, 0, 2]),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 2, 0, 4]),
 array([0, 0, 0, 0, 2, 0, 0, 0, 8, 0, 0, 0, 2, 4, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 2, 0, 2, 0, 8, 0, 0, 2, 4])]

In [38]:

def update_lmem(x):
    
    
    


[[11, 3], [11, 3], [11, 3], [11, 3]]

In [58]:
def give_reward(self, game):
        '''
        checks if reinforcement is merited after the last move, if so, updates the fit of model. 
        '''
        game.history()

[[11, 3], [11, 3], [11, 3], [11, 3]]