In [1]:
from game2048.game import Game
from game2048.displays import Display, IPythonDisplay
import numpy as np
import random
from collections import deque
#from game2048.agents import ExpectiMaxAgent

display1 = Display()
display2 = IPythonDisplay()

In [2]:
class Agent_train:
    '''Agent Base.'''

    def __init__(self, game, display=None):
        self.game = game
        self.display = display

    def play(self, max_iter=np.inf, verbose=False):
        n_iter = 0
        while (n_iter < max_iter) and (not self.game.end):
            direction = self.step()
            self.game.move(direction)
            n_iter += 1
            if verbose:
                print("Iter: {}".format(n_iter))
                print("======Direction: {}======".format(
                    ["left", "down", "right", "up"][direction]))
                if self.display is not None:
                    self.display.display(self.game)

    def step(self):
        direction = int(input("0: left, 1: down, 2: right, 3: up = ")) % 4
        return direction


class ExpectiMaxAgent_train(Agent_train):

    def __init__(self, game, display=None):
        if game.size != 4:
            raise ValueError(
                "`%s` can only work with game of `size` 4." % self.__class__.__name__)
        super().__init__(game, display)
        from game2048.expectimax import board_to_move
        self.search_func = board_to_move
        self.data = []

    def step(self):
        direction = self.search_func(self.game.board)
        self.data.append([game.board, direction])
        return direction

In [3]:
import keras
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization, AveragePooling2D, Input, GlobalAveragePooling2D

Using TensorFlow backend.


In [4]:
from tensorflow.python.client import device_lib
tf.device = '/GPU:0'
tf.test.gpu_device_name()
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 2842469107702107972,
 name: "/device:XLA_CPU:0"
 device_type: "XLA_CPU"
 memory_limit: 17179869184
 locality {
 }
 incarnation: 14994975492029029114
 physical_device_desc: "device: XLA_CPU device",
 name: "/device:XLA_GPU:0"
 device_type: "XLA_GPU"
 memory_limit: 17179869184
 locality {
 }
 incarnation: 13642446321367770659
 physical_device_desc: "device: XLA_GPU device",
 name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 7390920704
 locality {
   bus_id: 1
   links {
   }
 }
 incarnation: 706276631005643353
 physical_device_desc: "device: 0, name: Tesla P4, pci bus id: 0000:00:08.0, compute capability: 6.1"]

In [7]:
X_train = deque(maxlen=5000)
Y_train = deque(maxlen=5000)
data_x = []
data_y = []
NUM_Y_CLASSES = 4
NUM_X_CLASSES = 12

In [6]:
model = Sequential()
model(Input(shape=(4, 4, NUM_X_CLASSES)))

#model.add(Conv2D(16, (4, 4), padding='same', activation='relu'))

model.add(Conv2D(8, (4, 4), padding='same', activation='relu'))
model.add(Conv2D(16, (1, 1), padding='same', activation='relu'))

model.add(GlobalAveragePooling2D())
#model.add(Flatten())

'''model.add(BatchNormalization())
model.add(Dropout(0.05))
model.add(Dense(units=384, activation='relu'))

model.add(BatchNormalization())
model.add(Dropout(0.05))
model.add(Dense(units=256, activation='relu'))

model.add(BatchNormalization())
model.add(Dropout(0.05))
model.add(Dense(units=256, activation='relu'))

model.add(BatchNormalization())
model.add(Dense(units=64, activation='relu'))'''

#model.add(BatchNormalization())
model.add(Dense(units=24, activation='relu'))
model.add(Dense(units=24, activation='relu'))

#model.add(BatchNormalization())
model.add(Dense(units=4, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()
history = []

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 4, 4, 8)           1544      
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 4, 4, 16)          144       
_________________________________________________________________
global_average_pooling2d_1 ( (None, 16)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 24)                408       
_________________________________________________________________
dense_2 (Dense)              (None, 24)                600       
_________________________________________________________________
dense_3 (Dense)              (None, 4)                 100       
Total params: 2,796
Trainable params: 2,796
Non-trainable params: 0
____________________________________________________

In [None]:
history = []
tf.device = '/GPU:0'
tf.test.gpu_device_name()
device_lib.list_local_devices()
import matplotlib.pyplot as plt

'''for i in range(100):
    history = model.fit(X_train, Y_train, epochs=5, batch_size=128, verbose=0)
    # evaluate
    print('In episode %2d:' % (i))
    loss_train, score_train = model.evaluate(X_train, Y_train)
    print('Training loss: %.4f, Training accuracy: %.2f%%' %
          (loss_train, score_train * 100))
    loss_test, score_test = model.evaluate(X_test, Y_test)
    print('Testing loss: %.4f, Testing accuracy: %.2f%%' %
          (loss_test, score_test * 100))
    print()
'''
for i in range(100000):
    for j in range(4, 10):
        times = 2 ** j
        game = Game(4, score_to_win=times, random=False)
        agent = ExpectiMaxAgent_train(game, display=display2)
        agent.play(verbose=False)

        for x, y in agent.data:
            data_x.append(x)
            data_y.append(y)

        X_train.extend(keras.utils.np_utils.to_categorical(np.array(np.log2(np.maximum(data_x, 1))).reshape(len(data_x), 4, 4, 1), NUM_X_CLASSES))
        Y_train.extend(np.array(keras.utils.np_utils.to_categorical(data_y, NUM_Y_CLASSES)))

    history = model.fit(np.array(X_train), np.array(Y_train), epochs=10, batch_size=256, verbose=0).history
    loss_train, score_train = model.evaluate(np.array(X_train), np.array(Y_train))
    print('Training loss: %.4f, Training accuracy: %.2f%%' %
          (loss_train, score_train * 100))

#print(history)
plt.plot(history['accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

plt.plot(history['loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show() 

Training loss: 1.1144, Training accuracy: 47.27%
Training loss: 1.0492, Training accuracy: 51.14%
Training loss: 1.0412, Training accuracy: 52.38%
Training loss: 1.0279, Training accuracy: 52.08%
Training loss: 1.0471, Training accuracy: 53.02%
Training loss: 1.0661, Training accuracy: 51.60%
Training loss: 1.0725, Training accuracy: 51.64%
Training loss: 1.0884, Training accuracy: 50.96%
Training loss: 1.0899, Training accuracy: 50.20%
Training loss: 1.0831, Training accuracy: 49.82%
Training loss: 1.0841, Training accuracy: 48.58%
Training loss: 1.0846, Training accuracy: 49.34%
Training loss: 1.0966, Training accuracy: 47.64%
Training loss: 1.0968, Training accuracy: 47.48%
Training loss: 1.0937, Training accuracy: 47.52%
Training loss: 1.0881, Training accuracy: 47.88%
Training loss: 1.0951, Training accuracy: 47.34%
Training loss: 1.0905, Training accuracy: 47.42%
Training loss: 1.0844, Training accuracy: 48.16%
Training loss: 1.0822, Training accuracy: 48.52%
Training loss: 1.082

In [15]:
history1 = history

In [16]:
from game2048.agents import Agent

class MyAgent(Agent):

    def step(self):
        board = np.log2(np.maximum(np.array(self.game.board), 1)).reshape(1, 4, 4, 1)
        board = keras.utils.np_utils.to_categorical(board, NUM_X_CLASSES)
        direction=model.predict_classes(board)[0]
        return direction

In [17]:
%%time
def single_run(size, score_to_win, AgentClass, **kwargs):
    game = Game(size, score_to_win)
    agent = AgentClass(game, display=Display(), **kwargs)
    agent.play(verbose=False)
    return game.score

N_TESTS = 50
scores = []
for _ in range(N_TESTS):
    score = single_run(GAME_SIZE, SCORE_TO_WIN,
                       AgentClass=MyAgent)
    scores.append(score)

print("Average scores: @%s times" % N_TESTS, sum(scores) / len(scores))

NameError: name 'GAME_SIZE' is not defined

In [18]:
%%time
game = Game(4, score_to_win=2048, random=False)
display2.display(game)
agent = MyAgent(game, display=display2)
agent.play(verbose=True)

0,1,2,3
,,,4.0
,,,2.0
,,,
,,,


Iter: 1


0,1,2,3
4.0,,,
2.0,,,
,,,
,,2.0,


Iter: 2


0,1,2,3
,,2.0,4.0
,,,2.0
,,,
,,,2.0


Iter: 3


0,1,2,3
,,2.0,4.0
,,,4.0
,4.0,,
,,,


Iter: 4


0,1,2,3
,4.0,2.0,8.0
,,4.0,
,,,
,,,


Iter: 5


0,1,2,3
,,,
,4.0,,
,,2.0,
,4.0,4.0,8.0


Iter: 6


0,1,2,3
,8.0,2.0,8.0
,,4.0,2.0
,,,
,,,


Iter: 7


0,1,2,3
8.0,2.0,8.0,
4.0,2.0,,
,,,
2.0,,,


Iter: 8


0,1,2,3
,8.0,2.0,8.0
,4.0,4.0,2.0
,,,
,,,2.0


Iter: 9


0,1,2,3
8.0,2.0,8.0,
8.0,2.0,,2.0
,,,
2.0,,,


Iter: 10


0,1,2,3
16.0,4.0,8.0,2.0
2.0,,,
,,,
2.0,,,


Iter: 11


0,1,2,3
16.0,4.0,8.0,2.0
4.0,,,2.0
,,,
,,,


Iter: 12


0,1,2,3
16.0,4.0,8.0,2.0
,,4.0,2.0
,,,
,2.0,,


Iter: 13


0,1,2,3
16.0,4.0,8.0,4.0
,2.0,4.0,
,,,
,,,4.0


Iter: 14


0,1,2,3
16.0,4.0,8.0,4.0
2.0,4.0,,
,,4.0,
4.0,,,


Iter: 15


0,1,2,3
16,8.0,8.0,4.0
2,,4.0,
4,,,
2,,,


Iter: 16


0,1,2,3
16,16.0,4.0,
2,4.0,,
4,,4.0,
2,,,


Iter: 17


0,1,2,3
32,4.0,,
2,4.0,,
8,,,
2,,,4.0


Iter: 18


0,1,2,3
32,4.0,,2.0
2,4.0,,
8,,,
2,4.0,,


Iter: 19


0,1,2,3
32,8.0,,2.0
2,4.0,,
8,,4.0,
2,,,


Iter: 20


0,1,2,3
32,8.0,4.0,2.0
2,4.0,4.0,
8,,,
2,,,


Iter: 21


0,1,2,3
32,8.0,8.0,2.0
2,4.0,,
8,,2.0,
2,,,


Iter: 22


0,1,2,3
32,8.0,8.0,2.0
2,4.0,2.0,
8,,,
2,,2.0,


Iter: 23


0,1,2,3
32,16.0,2.0,
2,4.0,2.0,
8,,,
4,2.0,,


Iter: 24


0,1,2,3
32,16.0,4.0,
2,4.0,,
8,2.0,,2.0
4,,,


Iter: 25


0,1,2,3
32,16,4.0,2.0
2,4,,
8,2,,
4,2,,


Iter: 26


0,1,2,3
32,16.0,4.0,2.0
2,4.0,,
8,4.0,,4.0
4,,,


Iter: 27


0,1,2,3
32,16.0,4.0,2.0
2,8.0,,4.0
8,,,
4,,4.0,


Iter: 28


0,1,2,3
32,16.0,4.0,2.0
2,8.0,4.0,2.0
8,,,
8,,,


Iter: 29


0,1,2,3
32.0,16.0,8.0,4.0
2.0,8.0,2.0,
16.0,,,
,,,


Iter: 30


0,1,2,3
32.0,16.0,8.0,4.0
2.0,8.0,2.0,
16.0,4.0,,
,,,


Iter: 31


0,1,2,3
32.0,16,8.0,4.0
2.0,8,2.0,
16.0,4,,
,4,,


Iter: 32


0,1,2,3
32,16.0,8.0,4.0
2,8.0,2.0,
16,4.0,,
4,,,4.0


Iter: 33


0,1,2,3
32,16.0,8.0,4.0
2,8.0,2.0,
16,4.0,2.0,
8,,,


Iter: 34


0,1,2,3
32,16,8.0,4.0
2,8,4.0,
16,4,,
8,2,,


Iter: 35


0,1,2,3
32.0,16.0,8,4
,2.0,8,4
,4.0,16,4
,,8,2


Iter: 36


0,1,2,3
32.0,16.0,16.0,8.0
,2.0,16.0,4.0
,4.0,8.0,2.0
2.0,,,


Iter: 37


0,1,2,3
32,32.0,8.0,
2,16.0,4.0,
4,8.0,2.0,4.0
2,,,


Iter: 38


0,1,2,3
64,8.0,,
2,16.0,4.0,
4,8.0,2.0,4.0
2,,,2.0


Iter: 39


0,1,2,3
64,8.0,4.0,4.0
2,16.0,2.0,2.0
4,8.0,,2.0
2,,,


Iter: 40


0,1,2,3
64,8,8.0,
2,16,4.0,
4,8,2.0,
2,2,,


Iter: 41


0,1,2,3
64,16.0,2.0,
2,16.0,4.0,
4,8.0,2.0,
4,,,


Iter: 42


0,1,2,3
64,16.0,2.0,
2,16.0,4.0,
4,8.0,2.0,4.0
4,,,


Iter: 43


0,1,2,3
64,16.0,2.0,
2,16.0,4.0,2.0
4,8.0,2.0,4.0
4,,,


Iter: 44


0,1,2,3
64,16,2.0,
2,16,4.0,2.0
4,8,2.0,4.0
4,2,,


Iter: 45


0,1,2,3
64,16,2.0,
2,16,4.0,2.0
4,8,2.0,4.0
4,2,,4.0


Iter: 46


0,1,2,3
64,16,2,
2,16,4,2.0
4,8,2,4.0
4,2,4,2.0


Iter: 47


0,1,2,3
4,64,16,2
2,16,4,2
4,8,2,4
4,2,4,2


Iter: 48


0,1,2,3
4,64,16,2
2,16,4,2
4,8,2,4
4,2,4,2


You lose! Score: 64
CPU times: user 713 ms, sys: 56.5 ms, total: 770 ms
Wall time: 681 ms


In [19]:
model.save('my_model.h5')