In [2]:
import tensorflow as tf
import numpy as np
import pandas as pd
from copy import deepcopy
import random 
import math
import time
from tkinter import *

In [None]:
### Largely inspired from https://github.com/moporgic/2048-Framework-Python/blob/master/board.py
class board:
    """ simple implementation of 2048 puzzle """
    def __init__(self, state = None, info = None):
        self.state = state[:] if state is not None else [0] * 16
        self.info = info
        return

    def __getitem__(self, pos):
        return self.state[pos]

    def __setitem__(self, pos, tile):
        self.state[pos] = tile
        return

    def place(self, pos, tile):
        """
        place a tile (index value) to the specific position (1-d index)
        return 0 if the action is valid, or -1 if not
        """
        if pos >= 16 or pos < 0 or self.state[pos] != 0:
            return -1
        if tile != 1 and tile != 2:
            return -1
        self.state[pos] = tile
        return 0

    def slide(self, opcode):
        """
        apply an action to the board
        return the reward of the action, or -1 if the action is illegal
        """
        if opcode == 0:
            return self.slide_up()
        if opcode == 1:
            return self.slide_right()
        if opcode == 2:
            return self.slide_down()
        if opcode == 3:
            return self.slide_left()
        return -1

    def slide_left(self):
        move, score = [], 0
        for row in [self.state[r:r+4] for r in range(0, 16, 4)]:
            row, buf = [], [t for t in row if t]
            while buf:
                if len(buf) >= 2 and buf[0] is buf[1]:
                    buf = buf[1:]
                    buf[0] += 1
                    score += 1 << buf[0]
                row += [buf[0]]
                buf = buf[1:]
            move += row + [0] * (4 - len(row))
        if move != self.state:
            self.state = move
            return score
        return -1

    def slide_right(self):
        self.reflect_horizontal()
        score = self.slide_left()
        self.reflect_horizontal()
        return score

    def slide_up(self):
        self.transpose()
        score = self.slide_left()
        self.transpose()
        return score

    def slide_down(self):
        self.transpose()
        score = self.slide_right()
        self.transpose()
        return score

    def rotate(self, rot = 1):
        """ rotate the board clockwise by given times """
        rot = ((rot % 4) + 4) % 4
        if rot == 1:
            self.rotate_clockwise()
            return
        if rot == 2:
            self.reverse()
            return
        if rot == 3:
            self.rotate_counterclockwise()
            return
        return

    def rotate_clockwise(self):
        self.transpose()
        self.reflect_horizontal()
        return

    def rotate_counterclockwise(self):
        self.transpose()
        self.reflect_vertical()
        return

    def reverse(self):
        self.reflect_horizontal()
        self.reflect_vertical()
        return

    def reflect_horizontal(self):
        self.state = [self.state[r + i] for r in range(0, 16, 4) for i in reversed(range(4))]
        return

    def reflect_vertical(self):
        self.state = [self.state[c + i] for c in reversed(range(0, 16, 4)) for i in range(4)]
        return

    def transpose(self):
        self.state = [self.state[r + i] for i in range(4) for r in range(0, 16, 4)]
        return

    def __str__(self):
        state = '+' + '-' * 24 + '+\n'
        for row in [self.state[r:r + 4] for r in range(0, 16, 4)]:
            state += ('|' + ''.join('{0:6d}'.format((1 << t) & -2) for t in row) + '|\n')
        state += '+' + '-' * 24 + '+'
        return state

In [None]:
class action:
    """ base action """

    def __init__(self, code = -1):
        self.code = code
        return

    def apply(self, state):
        """ apply this action to a specific board object """
        return -1

    def save(self, output):
        """ serialize this action to a file object """
        output.write(self.__str__())
        return True

    def load(self, input):
        """ deserialize from a file object """
        input.read(2)
        return False

    def __str__(self):
        return "??"

    def event(self):
        return self.code & 0x00ffffff

    def type(self):
        return self.code & 0xff000000

action.prototype = []
def parse(input):
    for proto in action.prototype:
        a = proto()
        if a.load(input):
            return a
    input.read(2)
    return action()
action.parse = parse

class slide(action):
    """ create a sliding action with board opcode """

    type = 0x73000000
    res = [ "#U", "#R", "#D", "#L", "#?" ]

    def __init__(self, code = -1):
        super().__init__(slide.type | code)
        return

    def apply(self, state):
        return state.slide(self.event())

    def __str__(self):
        return slide.res[max(min(self.event(), 4), 0)]

    def load(self, input):
        ipt = input.tell()
        val = input.read(2)
        code = slide.res.index(val) if val in slide.res else -1
        if code >= 0 and code < 4:
            self.code = slide(code).code
            return True
        input.seek(ipt)
        return False

action.slide = slide
action.prototype += [action.slide]


class place(action):
    """ create a placing action with position and tile """

    type = 0x70000000
    res = list("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ?")

    def __init__(self, pos = -1, tile = -1):
        super().__init__(place.type | (pos & 0x0f) | (tile << 4))
        return

    def position(self):
        return self.event() & 0x0f

    def tile(self):
        return self.event() >> 4

    def apply(self, state):
        return state.place(self.position(), self.tile())

    def __str__(self):
        return place.res[self.position()] + place.res[max(min(self.tile(), 36), 0)]

    def load(self, input):
        ipt = input.tell()
        val = input.read(2)
        pos = place.res.index(val[0]) if val[0] in place.res else -1
        tile = place.res.index(val[1]) if val[1] in place.res else -1
        if pos >= 0 and pos < 16 and tile > 0 and tile < 36:
            self.code = place(pos, tile).code
            return True
        input.seek(ipt)
        return False

action.place = place
action.prototype += [action.place]

In [None]:
class episode:
    """ container of actions and time usages of an episode """

    def __init__(self):
        self.clear()
        return

    def state(self):
        return self.ep_state

    def score(self):
        return self.ep_score

    def open_episode(self, tag = ""):
        self.ep_open = tag, self.millisec()  # flag, time usage
        return

    def close_episode(self, tag = ""):
        self.ep_close = tag, self.millisec()  # flag, time usage
        return

    def apply_action(self, move):
        reward = move.apply(self.state())
        if reward == -1:
            return False
        usage = self.millisec() - self.ep_time
        record = move, reward, usage # action, reward, time usage
        self.ep_moves += [record]
        self.ep_score += reward
        return True

    def take_turns(self, slide, place):
        self.ep_time = self.millisec()
        return place if (self.step() < 2 or self.step() % 2 != 0) else slide

    def last_turns(self, slide, place):
        return self.take_turns(place, slide) if (self.step() > 2) else place

    def step(self, who = -1):
        size = len(self.ep_moves)
        if who == action.slide.type:
            return int((size - 1) / 2)
        if who == action.place.type:
            return size - int((size - 1) / 2)
        return size

    def time(self, who = -1):
        if self.ep_moves:
            if who == action.slide.type:
                return sum([mv[2] for mv in self.ep_moves[slice(2, self.step(), 2)]]) # action, reward, time usage
            if who == action.place.type:
                return self.ep_moves[0][2] + sum([mv[2] for mv in self.ep_moves[slice(1, self.step(), 2)]]) # action, reward, time usage
        return self.ep_close[1] - self.ep_open[1] # flag, time usage

    def actions(self, who = -1):
        if self.ep_moves:
            if who == action.slide.type:
                return [mv[0] for mv in self.ep_moves[slice(2, self.step(), 2)]] # action, reward, time usage
            if who == action.place.type:
                return [self.ep_moves[0][0]] + [mv[0] for mv in self.ep_moves[slice(1, self.step(), 2)]] # action, reward, time usage
        return [mv[0] for mv in self.ep_moves] # action, reward, time usage

    def save(self, output):
        """ serialize this episode to a file object """
        output.write(self.__str__())
        return True

    def load(self, input):
        """ deserialize from a file object """
        try:
            self.clear()
            line = input.readline()
            # line --> open|moves|close
            delim = line.index("|"), line.index("|", line.index("|") + 1)
            open = line[0:delim[0]]
            close = line[(delim[1] + 1):]
            moves = line[(delim[0] + 1):delim[1]]
            # open --> flag@time
            delim = open.index("@")
            self.ep_open = open[0:delim], int(open[(delim + 1):])
            # close --> flag@time
            delim = close.index("@")
            self.ep_close = close[0:delim], int(close[(delim + 1):])
            # moves --> action[reward](time)...
            minput = io.StringIO(moves)
            while True:
                # check if EOF
                ipt = minput.tell()
                if minput.read(1) == "":
                    break
                minput.seek(ipt)
                # ?? --> action
                a = action.parse(minput)
                self.ep_score += a.apply(self.ep_state)
                # [?] --> reward
                r = self.load_optional_value(minput, "[]")
                # (?) --> time
                t = self.load_optional_value(minput, "()")
                # (action, reward, time)
                self.ep_moves += [(a, r, t)]
            return True
        except (RuntimeError, ValueError, IndexError):
            pass
        return False

    def load_optional_value(self, minput, flag):
        t = 0
        ipt = minput.tell()
        if minput.read(1) == flag[0]:
            buf = minput.read()
            t = buf[0:buf.index(flag[-1])]
            minput.seek(ipt + len(t) + 2)
            t = int(t)
        else:
            minput.seek(ipt)
            t = 0
        return t

    def __str__(self):
        open = str(self.ep_open[0]) + "@" + str(self.ep_open[1])
        moves = "".join([str(m[0]) + ("[" + str(m[1]) + "]" if m[1] else "") + ("(" + str(m[2]) + ")" if m[2] else "") for m in self.ep_moves])
        close = str(self.ep_close[0]) + "@" + str(self.ep_close[1])
        return open + "|" + moves + "|" + close

    def clear(self):
        self.ep_state = self.initial_state()
        self.ep_score = 0
        self.ep_time = 0
        self.ep_moves = []
        self.ep_open = "N/A", 0 # flag, time usage
        self.ep_close = "N/A", 0 # flag, time usage
        return

    def initial_state(self):
        return board()

    def millisec(self):
        return int(round(time.time() * 1000))

In [None]:
class statistics:
    """ container & statistics of episodes """

    def __init__(self, total, block = 0, limit = 0):
        """
        the total episodes to run
        the block size of statistics
        the limit of saving records

        note that total >= limit >= block
        """
        self.total = total
        self.block = block if block else total
        self.limit = limit if limit else total
        self.data = []
        self.count = 0
        return

    def show(self, tstat = True, blk = int(0)):
        """
        show the statistics of last 'block' games

        the format is
        1000   avg = 273901, max = 382324, ops = 241563 (170543|896715)
               512     100%   (0.3%)
               1024    99.7%  (0.2%)
               2048    99.5%  (1.1%)
               4096    98.4%  (4.7%)
               8192    93.7%  (22.4%)
               16384   71.3%  (71.3%)

        where
         '1000': current index
         'avg = 273901, max = 382324': the average score is 273901
         'ops = 241563 (170543|896715)': the average speed is 241563
                                         the average speed of the slider is 170543
                                         the average speed of the placer is 896715
         '93.7%': 93.7% of the games reached 8192-tiles, i.e., win rate of 8192-tile
         '22.4%': 22.4% of the games terminated with 8192-tiles as the largest tile
        """
        num = min(len(self.data), (blk if blk != 0 else self.block))
        stat = [0] * 64
        sop, pop, eop = 0, 0, 0
        sdu, pdu, edu = 0, 0, 0
        ssc, msc = 0, 0
        for i in range(1, num + 1):
            ep = self.data[-i]
            ssc += ep.score()
            msc = max(ep.score(), msc)
            stat[max(ep.state().state)] += 1
            sop += ep.step()
            pop += ep.step(action.slide.type)
            eop += ep.step(action.place.type)
            sdu += ep.time()
            pdu += ep.time(action.slide.type)
            edu += ep.time(action.place.type)

        print("%d\t" "avg = %d, max = %d, ops = %s (%s|%s)" % (
            self.count,
            ssc / num,
            msc,
            int(sop * 1000 / sdu) if sdu > 0 else "inf",
            int(pop * 1000 / pdu) if pdu > 0 else "inf",
            int(eop * 1000 / edu) if edu > 0 else "inf"))

        if not tstat:
            return
        c = 0
        for t in range(0, len(stat)):
            if c >= num:
                break
            if not stat[t]:
                continue
            accu = sum(stat[t:])
            print("\t" "%d" "\t" "%s%%" "\t" "(%s%%)" % (
                (1 << t) & -2,
                accu * 100.0 / num,
                stat[t] * 100.0 / num)) # type, win rate, % of ending
            c += stat[t]

        print()
        return

    def summary(self):
        self.show(tstat=True, blk=len(self.data))
        return

    def is_finished(self):
        return self.count >= self.total

    def open_episode(self, flag = ""):
        if self.count >= self.limit:
            self.data = self.data[1:]
        self.count += 1
        self.data += [episode()]
        self.data[-1].open_episode(flag)
        return

    def close_episode(self, flag = ""):
        self.data[-1].close_episode(flag)
        if self.count % self.block == 0:
            self.show()
        return

    def at(self, i):
        return self.data[i]

    def front(self):
        return self.data[0]

    def back(self):
        return self.data[-1]

    def step(self):
        return self.count

    def save(self, output):
        """ serialize this action to a file object """
        output.write(self.__str__())
        return True

    def load(self, input):
        """ deserialize from a file object """
        self.data = []
        while True:
            # load an episode
            ep = episode()
            if ep.load(input):
                self.data += [ep]
            else:
                break
        self.total = max(self.total, len(self.data))
        self.count = len(self.data)
        return True

    def __str__(self):
        return "\n".join([str(ep) for ep in self.data]) + "\n"