In [1]:
import sys 
sys.path.append("/home/robert/Player-of-Games/src")


from g2048 import Game2048
from train_2048 import empties_state
import numpy as np

from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, LSTM
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
import random
import matplotlib.pyplot as plt
from tools import update_progress
import matplotlib.pyplot as plt

In [2]:
class RL_Player():
    def __init__(self, reward_depth = 5, model = None, demo = False, headless = False):
        self.reward = 0
        self.gamma = 0.9
        self.long_memory =[]
        # self.dataframe = pd.DataFrame()
        self.oldest_mem = 0
        self.target = []
        self.X = []
        self.learning_rate = .005
        self.reward_depth = reward_depth
        if model == None:
            self.model = self.neural_net()
        else:
            self.model = keras.models.load_model(model)
        self.headless = headless 
        self.demo = demo
        self.bad_move = 0
        self.memory = [[] for _ in range(self.reward_depth)]
        self.debug = 0
        self.debug1 = 0
        self.debug2 = -1
    
    def tokenize_board(self, board):
        '''method takes the game board and tokenizes all the arrays into 16 other arrays for if there's a 2, a 4, an 8, etc in a location to just a 1. hopefully...makes matching better
        Attributes 
        game( game2084 object): the game. 
        Returns
        tokenized 256x1 tokenization of all* possible game states. 
        
        * does not account for the 136k tile. I just..don't expect to need that. Frankly, 16k and 32k seem a reach
        '''
        tokenized = np.array([])
        
        for x in board.ravel():
            blank = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
            if x != 0:
                blank[int(np.log2(x))] = 1
            tokenized = np.append(tokenized, blank)
        return tokenized
    
    def clear_memory(self, game):
        if game.game_over == True:
            self.memory = [[] for _ in range(self.reward_depth)]
            self.long_memory = []
            self.oldest_mem = 0
    
    def ai_suggest_move(self, game):
        '''
        method which takes the game and the model (the ai we are training) and suggests a move.
        
        Attributes:
        Game(g2048 object): the game the AI is currently playing
        
        '''
        self.bad_move = 0
        board = game.board
        
        # tokenize_board block takes the game board and tokenizes all the arrays into 16 other arrays for if there's a 2, a 4, an 8, etc in a location to just a 1. hopefully...makes matching better

        tokenized = self.tokenize_board(board) 
        
        pred= self.model.predict(tokenized.reshape(1,-1))
        output = [i for i in pred[0]]
        
        values = [2, 4, 6, 8]
        valid_moves = game.valid_moves
        
        

        d = dict(zip(np.array(output)[valid_moves], np.array(values)[valid_moves])) # all valid moves
        
        
        
        if self.demo == False: # only have random mvoe interference and gambling in learning rounds. 
            if np.random.randint(50+len(game.history)) < 3 and len(list(d.values())) > 0: # 5% of the time in the early game and decreasing as game length goes on, take random gamble moves to hopefully learn new tactics
                rand_move = random.choice(list(d.values()))
                self.memory.pop(0)
                output.append(rand_move)
                self.memory.append(np.array(output).reshape(1,-1))
                if self.headless == False: 
                    print('random Move')
                return rand_move
        
        if len(d.keys()) == 0:
            #print('ai_suggest_move is out of possible moves')
            game.game_over = True #game is over
            return  -1

        if game.strict == True:
            if max(output) in d.keys(): #if the suggested move is not in valid_moves, it wont be in d, and so this will be negative, and the game will end
                move = d.get(max(d.keys()))
                self.memory.pop(0)
                output.append(move)
                self.long_memory.append(np.array(output).reshape(1,-1))
                self.memory.append(np.array(output).reshape(1,-1))
                return move
            else:
                game.game_over = True #game is over because this setting does not allow invaild moves:
                return  -1
        else:
            if max(output) not in d.keys():
                self.bad_move = -1 #triggers bad move if the thing has to take the next best move. 
                self.memory.pop()
                self.memory.insert(0, self.oldest_mem) #puts the oldest memory back on because invalid moves do not update board
            move = d.get(max(d.keys())) #takes the next best move that is valid
            self.oldest_mem = self.memory.pop(0)
            output.append(move)
            self.long_memory.append(np.array(output).reshape(1,-1))
            self.memory.append(np.array(output).reshape(1,-1))
            
            return move
    
    def calc_reward(self, game, move):
        '''
        Runs right after AI suggest move
        checks if reinforcement is merited after the last move, if so, updates the fit of model. 
        Attributes
        game (Game2048 object)
        move (int): int passed from ai_suggest_move()
        
        Updates reward
        
        '''
        self.reward = 0

        
        ## get old board and new board by checking move against the game's built in moves. 
        old_board = game.board
        
        if move == 2: 
            next_board = game.slide_down()
        elif move == 4:
            next_board = game.slide_left()
        elif move == 6:
            next_board = game.slide_right()
        elif move == 8:
            next_board = game.slide_up()
        else:
            next_board = np.array([[ 1,  1,  1,  1],  #simple full board for end-game board state comparison. 
                                   [ 1,  1, 1, 1],
                                    [ 1, 1, 1,  1],
                                   [ 1,  1,  1,  1]])
        
        tiles_combined = empties_state(old_board, next_board)
        
        
        big_tiles = dict({32:6, 64: 6, 128: 10, 256: 10, 512: 20, 1024: 30, 2048: 100, 4096: 1000})
        if np.amax(old_board) < np.amax(next_board): # checks to see if the bot has combined tiles or gotten a big one. 
            if np.amax(game.board) in big_tiles.keys():
                self.reward += big_tiles[np.amax(game.board)]*2
                if self.headless == False:    
                    print('big tiles')
            else: 
                self.reward +=4 
                if self.headless == False:
                    print('biggest tile yet')
        else:
            a = list(old_board.ravel())
            b = list(next_board.ravel())
            for i in big_tiles.keys():
            
                if b.count(i) - a.count(i) > 0:
                    self.reward+= big_tiles[i]
                    if self.headless == False:
                        print('big tiles')
        
        
        
        if tiles_combined > 2:
            self.reward += 1
            if self.headless == False:
                print('board managment bonus!')
        
        elif self.bad_move == -1 and len(game.history) > 10: # doesn't start penalizing for bad moves untill after a few turns of play
            self.reward += -2
            if self.headless == False:
                print('invalid move')
            
        if game.game_over == True:
            self.reward = -30
            if self.headless == False:
                print('game over penalty')
        self.bad_move == 0 # Reset bad move!
        old_score = game.score
        
    def give_reward(self, game):
        '''if there is a reward, this will get the game history and the memory of the outputs, multiply the outputs by the reward and its time discount
        
        '''
        values = [2, 4, 6, 8]
        discount = 1
        self.target = []
        
        if self.reward != 0:
            #print('hey, the reward is this {}, so triggering reward steps'.format(self.reward))
            if self.reward == -2:
                h = game.history[-1]
                m = self.memory[-2][0]
                
                self.debug = m
                move = m[-1]
                
                items_short = m[:4]
                self.debug1 = items_short
                k = values.index(move)
                #value in array that corresponds to the move taken  
                items_short[k] = items_short[k]*self.reward
                self.target.append(items_short)
                
                self.X = agent.tokenize_board(game.history[-1]).reshape(1,-1)
                self.model.fit(np.array(self.X),np.array(self.target))
                print ('trained on invalid move')
                reward = 0 
                self.debug2 = 0
                return
                
            
            elif len(game.history) <= self.reward_depth:
                h = game.history[:-1]
                m = self.memory[-(len(game.history)-1):] #indexes into the last last point in memory attached to the current game.
                return # do nothing here to avoid unsolved index crash in items_short[0][:4]
                self.debug2 = 1
            else:
                h = game.history[-(1+self.reward_depth):-1] #index into these arrays from the back, up to a height of however far the depth is
                m = self.memory[-(1+self.reward_depth):] # memory is np array, game.history is list
                self.debug2 = 2
            #get moves 
            
            
            
           
            
            for items in m:  ##make mem shotened to the length you want, currently its five. 
                self.debug1 = m
                 #hacky nonsense way to check for this because it seems like the way I'm hadling these tensors is casting them into higher and lower order tensors
                #self.debug = items
                move = items[0][-1]

                items_short = items[0][:4]
                k = values.index(move)
                #value in array that corresponds to the move taken

                if self.reward*discount > 1:
                    items_short[k] = items_short[k]*self.reward*discount
                elif self.reward < 0:
                    items_short[k] = items_short[k]*self.reward*discount
                else:
                    items_short[k] = items_short[k]*1.05
                self.target.append(items_short)
                discount -=(1/self.reward_depth)
            self.X =[self.tokenize_board(i) for i in h]
            print('reward: {}'.format(self.reward))
            self.model.fit(np.array(self.X),np.array(self.target))
            self.reward = 0
                
                
                
                
                
            
            
            
           
            
        
        
    def neural_net(self,):
        '''
        a method which creates a neural net for the agent
        Returns:
        model, a tensorflow nn
        '''

        model = Sequential()
        model.add(Dense(128, activation='relu', input_dim = 256 ))
        model.add(Dropout(0.3))
        
        model.add(Dense(64, activation='relu') )
        model.add(Dropout(0.3))
        
        model.add(Dense(64, activation='relu'))
        model.add(Dropout(0.3))
        
        model.add(Dense(32, activation='relu'))
        model.add(Dropout(0.3))
        
        model.add(Dense(32, activation='relu'))
        model.add(Dropout(0.3))
        
        model.add(Dense(4, activation='softmax'))
        opt = Adam(self.learning_rate)
        
        model.compile(loss='mse', optimizer=opt)

        
        return model

In [3]:
agent = RL_Player(5)

In [4]:
game = Game2048(ai = True, strict = False)
scores = []
best_tiles = []

In [5]:
def print_move(x):
    if x == 6:
        print('slide right')
    if x == 8:
        print('slide up')
    if x ==4:
        print('slide left')
    if x == 2:
        print('slide down')

In [6]:


for i in range(50):
    agent.clear_memory(game)
    game = Game2048(ai = True, headless = True, strict = False)
    
    print('new game')
    print('')
            

    while game.game_over == False:
        last_board = game.board
        #print('Game Over: {}'.format(game.game_over))
        
        move = agent.ai_suggest_move(game)
        game.show_board()
        agent.calc_reward(game, move)
             
        agent.give_reward(game)
        
        game.get_move(move)
        game.game_step()
#         game.show_board()
        print_move(move)
#         print('')
#         print('new move')
#         print('')
        
        new_board = game.board
    agent.calc_reward(game, move) # gives game-over rewards
    #agent.give_reward(game)
    
    best_tiles.append(game.board.max())
    scores.append(game.score)
    update_progress(i/500)
    print(game.board.max())
        #print('reward: {}'.format(agent.reward))
    

Progress: [####------------------------------------] 9.8%
64


In [24]:
for i in game.history:
    print('')
    print(i.reshape(4,4))
    


[[0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 2 0 0]]

[[0 0 0 0]
 [0 0 2 0]
 [0 0 0 0]
 [0 0 0 2]]

[[0 0 0 0]
 [2 0 0 0]
 [0 0 0 0]
 [0 0 2 2]]

[[0 0 0 0]
 [0 0 0 2]
 [0 0 0 0]
 [2 0 2 2]]

[[0 0 2 0]
 [0 0 0 0]
 [0 0 0 0]
 [2 0 2 4]]

[[0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]
 [2 4 4 4]]

[[0 0 2 0]
 [0 0 0 0]
 [0 0 0 0]
 [0 2 4 8]]

[[0 0 0 0]
 [0 0 0 0]
 [0 2 2 0]
 [0 2 4 8]]

[[0 0 0 0]
 [0 0 2 0]
 [0 0 2 0]
 [0 4 4 8]]

[[0 0 0 0]
 [0 0 0 0]
 [4 0 4 0]
 [0 4 4 8]]

[[0 0 4 0]
 [0 0 0 0]
 [0 0 0 0]
 [4 4 8 8]]

[[0 0 0 0]
 [0 0 0 0]
 [0 0 4 2]
 [4 4 8 8]]

[[ 0  0  0  0]
 [ 0  0  0  0]
 [ 4  2  0  4]
 [ 8 16  0  0]]

[[ 0  0  0  0]
 [ 0  0  0  4]
 [ 4  2  0  0]
 [ 8 16  0  4]]

[[ 0  0  0  0]
 [ 4  2  0  0]
 [ 4  2  0  0]
 [ 8 16  4  0]]

[[ 0  0  4  0]
 [ 0  0  0  0]
 [ 8  4  0  0]
 [ 8 16  4  0]]

[[ 0  0  0  0]
 [ 0  0  2  0]
 [ 0  4  0  0]
 [16 16  8  0]]

[[ 0  0  0  0]
 [ 0  0  0  0]
 [ 2  4  2  0]
 [16 16  8  0]]

[[ 0  0  0  0]
 [ 0  0  0  0]
 [ 2  4  2  0]
 [32  8  2  0]]

[[ 0  0  0  0

In [23]:
agent.long_memory[-56:]

[array([[0.28457183, 0.22300322, 0.27599838, 0.21642654, 2.        ]]),
 array([[0.2905387 , 0.24591221, 0.27799472, 0.18555441, 2.        ]]),
 array([[0.28311852, 0.26175189, 0.26134905, 0.19378054, 2.        ]]),
 array([[0.28311852, 0.26175189, 0.26134905, 0.19378054, 4.        ]]),
 array([[0.28698263, 0.2540279 , 0.27408034, 0.18490906, 2.        ]]),
 array([[0.2734867 , 0.26305574, 0.27548823, 0.18796939, 6.        ]]),
 array([[0.2869986 , 0.22297493, 0.27774301, 0.21228354, 2.        ]]),
 array([[0.28033248, 0.28108048, 0.22507563, 0.21351129, 4.        ]]),
 array([[0.26865405, 0.2812303 , 0.26352659, 0.18658906, 4.        ]]),
 array([[0.26865405, 0.2812303 , 0.26352659, 0.18658906, 2.        ]]),
 array([[0.29412198, 0.24263068, 0.27950475, 0.18374263, 2.        ]]),
 array([[0.29412198, 0.24263068, 0.27950475, 0.18374263, 6.        ]]),
 array([[0.26174062, 0.27885544, 0.19247541, 0.26692849, 4.        ]]),
 array([[0.25526392, 0.28009632, 0.26458693, 0.20005278, 4.     

In [7]:
agent.model.save('fixed a prlbem.h5') 

In [8]:
kka +k

NameError: name 'kka' is not defined

In [None]:
j = [i for i in zip((scores, best_tiles))]
fig, ax = plt.subplots(1,3, figsize = (18,6))
ax[0].hist(j[0], bins = 12)
ax[0].set_title('histogram of game scores')
ax[0].set_xlabel('number of games')
ax[0].set_ylabel('final score')

ax[1].hist(j[1], bins = 10)
ax[1].set_title('histograms of best tiles reached')
ax[1].set_xlabel('tile value')
ax[1].set_ylabel('number of games')

move_avg = np.convolve(scores, np.ones((10,))/10, mode='valid')    
ax[2].plot(move_avg)
ax[2].set_title('score over evolutions')
ax[2].set_xlabel('number of games')
ax[2].set_ylabel('score')
plt.tight_layout()
plt.show()


In [None]:
gens = enumerate(scores)
fig, ax = plt.plot(gens)

In [None]:
j = [(i, j)for i, j in enumerate(scores)]

In [None]:
move_avg_tiles = np.convolve(best_tiles, np.ones((10,))/10, mode='valid')    

In [None]:
fig, ax = plt.subplots()
ax.plot(np.log2(move_avg_tiles))

In [None]:
agent.model.summary()

In [None]:
#agent.model.save('my_model.h5') 

#new_model = keras.models.load_model('my_model.h5')


In [None]:
def demo_QR_learner(ai_path, headless = True):
    '''
    function that runs the AI without training it for showcasing and metrics of the AI at specific training levels. 
    Attributes 
    ai_path (str): path of the ai model
    Returns
    tuple (biggest tile reached, score of game)
    '''
    agent = RL_Player(model = ai_path, demo = True)
    game = Game2048(ai = True, headless = False, strict = False)

    

    while game.game_over == False:
        last_board = game.board
        #print('Game Over: {}'.format(game.game_over))

        move = agent.ai_suggest_move(game)
        if headless == False:
            game.show_board()
        #agent.calc_reward(game, move)

        #agent.give_reward(game)

        game.get_move(move)
        game.game_step()
        if headless == False:
            print_move(move)
            print('')

        new_board = game.board
    return game.board.max(), game.score


In [None]:
def ai_averages(ai_path, num_runs, is_strict = False):

    
    scores = []
    best_tiles = []
    empties = []
    for i in range(num_runs):
        
        

        tile, score = demo_QR_learner(ai_path, headless = True)

        print(i)        
        
        
        best_tiles.append(tile)
        scores.append(score)
        update_progress(i/num_runs)  
    return best_tiles, scores

In [None]:
#tiles, scores = ai_averages('my_model.h5', 20)

In [None]:
#demo_QR_learner('my_model.h5', headless = False)

# 500 Gen Model

In [None]:
tiles500, scores500 = ai_averages('Q500.h5', 100)

In [None]:
#tiles500

In [None]:
tiles1000, scores1000 = ai_averages('Q1000.h5', 100)

In [None]:
#tiles1000

In [None]:
tiles1500, scores1500 = ai_averages('Q1500.h5', 100)

In [None]:
#tiles1500

In [None]:
tiles2000, scores2000 = ai_averages('Q2000.h5', 100)


In [None]:

fig, ax = plt.subplots(1,2, figsize = (10,4))

fig.suptitle('performance after 500 generations', fontsize=16)
ax[0].hist(scores500, bins = 12)
ax[0].set_title('number of games')
ax[0].set_xlabel('histogram of game scores')
ax[0].set_ylabel('final score')

ax[1].hist(tiles500, bins = 10)
ax[1].set_title('histograms of best tiles reached')
ax[1].set_xlabel('tile value')
ax[1].set_ylabel('number of games')
fig.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.show()

In [None]:

fig, ax = plt.subplots(1,2, figsize = (10,4))

fig.suptitle('performance after 1000 generations', fontsize=16)
ax[0].hist(scores1000, bins = 12)
ax[0].set_title('number of games')
ax[0].set_xlabel('histogram of game scores')
ax[0].set_ylabel('final score')

ax[1].hist(tiles1000, bins = 10)
ax[1].set_title('histograms of best tiles reached')
ax[1].set_xlabel('tile value')
ax[1].set_ylabel('number of games')
fig.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.show()

In [None]:

fig, ax = plt.subplots(1,2, figsize = (10,4))

fig.suptitle('performance after 1500 generations', fontsize=16)
ax[0].hist(scores1500, bins = 12)
ax[0].set_title('number of games')
ax[0].set_xlabel('histogram of game scores')
ax[0].set_ylabel('final score')

ax[1].hist(tiles1500, bins = 10)
ax[1].set_title('histograms of best tiles reached')
ax[1].set_xlabel('tile value')
ax[1].set_ylabel('number of games')
fig.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.show()

In [None]:

fig, ax = plt.subplots(1,3, figsize = (10,4))

fig.suptitle('performance after 2000 generations', fontsize=16)
ax[0].hist(scores2000, bins = 12)
ax[0].set_title('number of games')
ax[0].set_xlabel('histogram of game scores')
ax[0].set_ylabel('final score')

ax[1].hist(tiles2000, bins = 10)
ax[1].set_title('histograms of best tiles reached')
ax[1].set_xlabel('tile value')
ax[1].set_ylabel('number of games')

ax[2].bar(t3, v3, align='center', alpha=0.5)
#ax[0].xticks(y_pos, objects)
ax[2].set_ylabel('Frequency')
ax[2].set_xlabel('tile value')
ax[2].set_title('2000 games')


fig.tight_layout(rect=[0, 0.03, 1, 0.95])

plt.show()

In [None]:
len(tiles1500)

In [None]:
from collections import Counter

In [None]:
plt.hist(tiles2000)

In [None]:
tiles = Counter(tiles500)
tiles1 = Counter(tiles1000)
tiles2 = Counter(tiles1500)
tiles3 = Counter(tiles2000)

In [None]:
a = list(tiles.values())
b = list(tiles.keys())
s = sorted(zip(a,b))
t,v0 = map(list, zip(*s))
t0 = [str(i) for i in t]

In [None]:
tiles = Counter(tiles500)
v = list(tiles1.values())
t = list(tiles1.keys())
s = sorted(zip(t,v))
t,v1 = map(list, zip(*s))
t1 = [str(i) for i in t]

In [None]:
v = list(tiles2.values())
t = list(tiles2.keys())
s = sorted(zip(t,v))
t,v2 = map(list, zip(*s))
t2 = [str(i) for i in t]

In [None]:
v = list(tiles3.values())
t = list(tiles3.keys())
s = sorted(zip(t,v))
t,v3 = map(list, zip(*s))
t3 = [str(i) for i in t]

In [None]:

y_pos = t
performance = v

fig, ax = plt.subplots(1,4, figsize = (10,4))

ax[0].bar(t0, v0, align='center', color = 'k')
#ax[0].xticks(y_pos, objects)
ax[0].set_ylabel('Frequency')
ax[0].set_xlabel('tile value')
ax[0].set_title('by 500 games')

ax[1].bar(t1, v1, align='center', color = 'k')
#ax[0].xticks(y_pos, objects)
ax[1].set_ylabel('Frequency')
ax[1].set_xlabel('tile value')
ax[1].set_title('by of 1000 games')

ax[2].bar(t2, v2, align='center', color = 'k')
#ax[0].xticks(y_pos, objects)
ax[2].set_ylabel('Frequency')
ax[2].set_xlabel('tile value')
ax[2].set_title('by 1500 games')

ax[3].bar(t3, v3, align='center',  color = 'k')
#ax[0].xticks(y_pos, objects)
ax[3].set_ylabel('Frequency')
ax[3].set_xlabel('tile value')
ax[3].set_title('by 2000 games')

plt.suptitle('Best tiles reached with 100 trials')

plt.savefig('best tile reached by Q-Learner Bar.png')
plt.tight_layout(rect=[0, 0.03, 1, .90])
plt.show()


In [None]:
all_scores = scores500+scores1000+scores1500+scores2000

In [None]:
moving_avg = np.convolve(all_scores, np.ones((10,))/10, mode='valid')


In [None]:
fig, ax = plt.subplots()
ax.plot(moving_avg, color = 'k', linewidth = 1.2)

ax.set_xticklabels(xtickslist)


ax.set_ylabel('Score')
ax.set_xlabel('Game Iterations')
ax.set_title('Q-Learning Score per Games Played')
plt.savefig('Q-Learning Score per Games Played.png')
plt.tight_layout()
plt.show()


In [None]:
xtickslist = [str(200*i) for i in range(11)]

In [None]:
xtickslist