In [382]:
import numpy as np
from itertools import permutations
import random as rn
from copy import deepcopy
from collections import Counter
import time

In [383]:
class Board(object):
    def __init__(self, size = 4):
        self.board = np.zeros((size,size,size))
        self.emptySquares = [(i,j) for i in range(4) for j in range(4)]
        
    def setTile(self, tile, pos):
        self.board[pos[0], :, pos[1]] = tile
        self.emptySquares.remove(pos)
        
        
    def getEmptySquares(self):
        emptySquares = tuple(np.where(self.board[:, 0] == 0))
        return emptySquares
        
    def winningPosition(self):
        return np.any(np.sum(self.board, 0) == 4) or np.any(np.sum(self.board, 2) == 4)
        

In [384]:
class Bag(object):
    def __init__(self, count = 16):
        self.createAllTiles()
        
    def removeTile(self, tile):
            for i, t in enumerate(self.tiles):
                if  np.all(t == tile):
                    self.tiles = np.delete(arr=self.tiles,obj= i, axis = 0)
                    break
            #self.tiles = np.delete(self.tiles, n, 0)
        
    def createAllTiles(self):
        self.tiles = np.array(([1,1,1,-1],[1,1,-1,1],[1,-1,1,1],
                             [-1,1,1,1],[1,1,-1,-1],[1,-1,1,-1],[-1,1,1,-1],
                            [1,-1,-1,1],[-1,1,-1,1],[-1,-1,1,1],[-1,-1,-1,-1],
                             [-1,-1,-1,1],[-1,-1,1,-1],[-1,1,-1,-1],[1,-1,-1,-1]))
        
                    
        

In [420]:
class State():
    def __init__(self):
        self.board = Board()
        self.bag = Bag()
        self.currentPlayer = 0
        self.currentTile = np.array([1,1,1,1])
        
    def chooseRandomTile(self):
        if len(self.bag.tiles) > 0:
            n = rn.randrange(len(self.bag.tiles))
            tile = self.bag.tiles[n]
            self.currentTile = tile
            self.bag.removeTile(tile)
            #return self.currentTile

    def playRandomTile(self):
        #print(self.board.emptySquares)
        pos = rn.choice(self.board.emptySquares)
        self.board.setTile(self.currentTile, pos)
        #self.board.emptySquares.remove(pos)
        #return self.board.board
        
    
    def __hash__(self):
        return hash(np.array2string(self.board.board) + np.array2string(self.currentTile))
    
    def __eq__(self, other):
        return self.__hash__() == other.__hash__()# and self.currentTile == other.currentTile
    

In [478]:
class MonteCarlo:
    def __init__(self):
        self.bigMap = {hash(state): {"count" : 1, "wins" : 0}}
    
    def getBestMove(self, state):
        score = 0
        currentBest = None
        tempMap = {}
        t = time.time()
        while (time.time() - t) < 10:
            for j in state.board.emptySquares:
                for i in state.bag.tiles:
                    virtualState = deepcopy(state)
                    #print(virtualState.board.emptySquares)
                    virtualState.board.setTile(virtualState.currentTile, j)
                    virtualState.currentTile = i
                    virtualState.bag.removeTile(i)
                    if virtualState not in self.bigMap:
                        self.rollOut(virtualState)
                        tempMap[hash(virtualState)] = self.bigMap[hash(virtualState)]["wins"]/self.bigMap[hash(virtualState)]["count"]
                    else:
                        tempMap[hash(virtualState)] = self.bigMap[hash(virtualState)]["wins"]/self.bigMap[hash(virtualState)]["count"]
            score =  max(tempMap.values())
            for j in state.board.emptySquares:
                for i in state.bag.tiles:
                    virtualState = deepcopy(state)
                    virtualState.board.setTile(virtualState.currentTile, j)
                    virtualState.currentTile = i
                    virtualState.bag.removeTile(i)
                    #print(self.bigMap[hash(virtualState)]["wins"], score)
                    if self.bigMap[hash(virtualState)]["wins"]/self.bigMap[hash(virtualState)]["count"] == score:
                        currentBest = virtualState
            self.rollOut(currentBest)
        
        return currentBest, score

    def rollOut(self, state2):
        #print("try to roll out")
        i = 0
        state = deepcopy(state2)
        tempMap = {hash(state) : {"count" : 1, "wins" : 0}}
        while (len(state.bag.tiles) > 0) and not state.board.winningPosition():
            #print(state.bag.tiles)
            state.playRandomTile()
            state.chooseRandomTile()
            if state in tempMap:
                tempMap[hash(state)]["count"] += 1
            else:
                tempMap[hash(state)] = {"count" : 1, "wins" : 0}
            i += 1
        if i % 2 == 0:
            for v in tempMap.values():
                v["wins"] = 1
        for k, v in tempMap.items():
            if k in self.bigMap:
                self.bigMap[k]["count"] += v["count"]
                self.bigMap[k]["wins"] += v["wins"]
            else:
                self.bigMap[k] = {"count" : v["count"], "wins" : v["wins"]}
                
            
            
                   
            
        

In [460]:
state = State()
mc = MonteCarlo()

In [461]:
a, b = mc.getBestMove(state)

In [462]:
a.currentTile

array([-1,  1, -1, -1])

In [428]:
a.bag.tiles

array([[ 1,  1,  1, -1],
       [ 1,  1, -1,  1],
       [ 1, -1,  1,  1],
       [-1,  1,  1,  1],
       [ 1,  1, -1, -1],
       [ 1, -1,  1, -1],
       [-1,  1,  1, -1],
       [ 1, -1, -1,  1],
       [-1,  1, -1,  1],
       [-1, -1,  1,  1],
       [-1, -1, -1, -1],
       [-1, -1, -1,  1],
       [-1, -1,  1, -1],
       [ 1, -1, -1, -1]])

In [429]:
mc.getBestMove(a)

(<__main__.State at 0x7f8e8b63cfd0>, 1)

In [474]:
def something():
    state = State()
    mc = MonteCarlo()
    sc = 0
    for i in range(16):
        state, sc = mc.getBestMove(state)
        print(state.board.board, state.currentTile, sc)
        if state.board.winningPosition():
            print("won!")
            break
    return state


In [473]:
a = something()

[[[0. 0. 0. 0.]
  [0. 0. 0. 0.]
  [0. 0. 0. 0.]
  [0. 0. 0. 0.]]

 [[0. 0. 0. 0.]
  [0. 0. 0. 0.]
  [0. 0. 0. 0.]
  [0. 0. 0. 0.]]

 [[0. 0. 0. 0.]
  [0. 0. 0. 0.]
  [0. 0. 0. 0.]
  [0. 0. 0. 0.]]

 [[0. 0. 0. 1.]
  [0. 0. 0. 1.]
  [0. 0. 0. 1.]
  [0. 0. 0. 1.]]] [-1 -1 -1  1] 1.0
[[[ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]]

 [[ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]]

 [[ 0.  0. -1.  0.]
  [ 0.  0. -1.  0.]
  [ 0.  0. -1.  0.]
  [ 0.  0.  1.  0.]]

 [[ 0.  0.  0.  1.]
  [ 0.  0.  0.  1.]
  [ 0.  0.  0.  1.]
  [ 0.  0.  0.  1.]]] [-1  1  1 -1] 1.0
[[[ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]]

 [[ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]
  [ 0.  0.  0.  0.]]

 [[ 0.  0. -1.  0.]
  [ 0.  0. -1.  0.]
  [ 0.  0. -1.  0.]
  [ 0.  0.  1.  0.]]

 [[ 0.  0. -1.  1.]
  [ 0.  0.  1.  1.]
  [ 0.  0.  1.  1.]
  [ 0.  0. -1.  1.]]] [-1 -1  1  1] 1.0
[[[ 0.  0.  0.  0.]
  [ 0.

In [475]:
a.board.board

array([[[ 1., -1.,  1.,  1.],
        [-1.,  1.,  1., -1.],
        [ 1., -1.,  1., -1.],
        [-1.,  1., -1., -1.]],

       [[-1.,  1., -1., -1.],
        [-1.,  1.,  1., -1.],
        [-1., -1.,  1.,  1.],
        [-1., -1.,  1.,  1.]],

       [[ 0.,  1., -1.,  1.],
        [ 0., -1., -1., -1.],
        [ 0.,  1., -1., -1.],
        [ 0.,  1.,  1.,  1.]],

       [[-1., -1., -1.,  1.],
        [-1.,  1.,  1.,  1.],
        [ 1., -1.,  1.,  1.],
        [-1., -1., -1.,  1.]]])

In [477]:
a.currentTile

array([ 1,  1, -1,  1])

In [481]:
a = np.sum(a, 0)

In [484]:
a.board.board

array([[[ 1., -1.,  1.,  1.],
        [-1.,  1.,  1., -1.],
        [ 1., -1.,  1., -1.],
        [-1.,  1., -1., -1.]],

       [[-1.,  1., -1., -1.],
        [-1.,  1.,  1., -1.],
        [-1., -1.,  1.,  1.],
        [-1., -1.,  1.,  1.]],

       [[ 0.,  1., -1.,  1.],
        [ 0., -1., -1., -1.],
        [ 0.,  1., -1., -1.],
        [ 0.,  1.,  1.,  1.]],

       [[-1., -1., -1.,  1.],
        [-1.,  1.,  1.,  1.],
        [ 1., -1.,  1.,  1.],
        [-1., -1., -1.,  1.]]])

In [486]:
np.sum(1, 0)

1

In [493]:
a.board.setTile(a.currentTile, (2,0))

In [494]:
a.board.board

array([[[ 1., -1.,  1.,  1.],
        [-1.,  1.,  1., -1.],
        [ 1., -1.,  1., -1.],
        [-1.,  1., -1., -1.]],

       [[-1.,  1., -1., -1.],
        [-1.,  1.,  1., -1.],
        [-1., -1.,  1.,  1.],
        [-1., -1.,  1.,  1.]],

       [[ 1.,  1., -1.,  1.],
        [ 1., -1., -1., -1.],
        [-1.,  1., -1., -1.],
        [ 1.,  1.,  1.,  1.]],

       [[-1., -1., -1.,  1.],
        [-1.,  1.,  1.,  1.],
        [ 1., -1.,  1.,  1.],
        [-1., -1., -1.,  1.]]])

In [496]:
print(np.sum(a.board.board,0))

[[ 0.  0. -2.  2.]
 [-2.  2.  2. -2.]
 [ 0. -2.  2.  0.]
 [-2.  0.  0.  2.]]


In [497]:
len(mc.bigMap)

3073