In [None]:
import numpy as np

# Mancala

[Kalah is solved](http://kalaha.krus.dk/) 

# Awalé
Also known as awari and owari

## Solved

It might be solved by [Solving awari with parallel retrograde analysis](http://ieeexplore.ieee.org/abstract/document/1236468/)

In [None]:
class Game:
    def __init__(self, pits=6, seeds=4):
        self.pits = pits
        self.seeds = seeds
        self.reset()
        self.actions = np.array(range(self.pits))
    
    def reset(self):
        self.state = np.ones((self.pits * 2), dtype=int) * self.seeds
        self.current_player = 0
        self.captures = np.zeros((2,), dtype=int)
        self.history = set()
    
    def copy(self):
        g = Game()
        
        g.state = self.state.copy()
        g.current_player = self.current_player
        g.captures = self.captures.copy()
        g.history = self.history.copy()
        
        return g
    
    @property
    def state_from_current_player(self):
        shift = 0 if self.current_player == 0 else self.pits
        return np.roll(self.state, shift)

    @property
    def legal_actions(self):
        # todo : add the Let the opponent play rule
        return [x for x in range(self.pits) if self.state[x] != 0]


    def step(self, action):
        assert 0 <= action < self.pits
        action = action if self.current_player == 0 else action - self.pits
        
        seeds = self.state[action]
        assert seeds != 0
        
        # empty the target pit
        self.state[action] = 0
        
        # fill the next pits
        pit_to_sow = action
        while seeds > 0:
            pit_to_sow = (pit_to_sow + 1) % (self.pits * 2)
            if pit_to_sow != action: # do not fill the target pit ever
                self.state[pit_to_sow] += 1
                seeds -= 1
        
        # capture
        # count the captures of the play
        captures = 0
        if pit_to_sow in self.adverse_pits_idx:
            # if the last seed was in a adverse pit
            # we can try to collect seeds
            while self.state[pit_to_sow] in (2, 3):
                # if the pit contains 2 or 3 seeds, we capture them
                self.captures[self.current_player] += self.state[pit_to_sow]
                captures += self.state[pit_to_sow]
                self.state[pit_to_sow] = 0
                
                # go backwards
                pit_to_sow = (pit_to_sow - 1) % (self.pits * 2)
                
        
        # change player
        self.current_player = (self.current_player + 1) % 2
        
        # record state
        tstate = tuple(self.state)
        if tstate in self.history:
            assert False, "Game loop"
        else:
            self.history.add(tstate)
        
        return captures, self.state, self.game_finished
    
    @property
    def adverse_pits_idx(self):
        if self.current_player == 1:
            return list(range(self.pits))
        else:
            return list(range(self.pits, self.pits * 2))
    
    @property
    def game_finished(self):
        no_moves_left = np.all(self.state_from_current_player[:self.pits] == np.zeros((self.pits,)))
        enough_captures = np.any(self.captures > (self.pits * self.seeds) / 2)
        draw = np.all(self.captures == (self.pits * self.seeds) / 2)
        loop = tuple(self.state) in self.history
        return no_moves_left or enough_captures or draw or loop
    
    def show_state(self):
        if self.game_finished:
            print("Game finished")
        print("Current player: {} - Score: {}/{}\n{}".format(
            self.current_player,
            self.captures[self.current_player],
            self.captures[(self.current_player + 1) % 2],
            "-" * self.pits * 3
        ))
        
        pits = []
        for seeds in self.state_from_current_player:
            pits.append("{:3}".format(seeds))
        
        print("".join(reversed(pits[self.pits:])))
        print("".join(pits[:self.pits]))
        
        

Shortest game ever recoreded according to [Mancala World](http://mancala.wikia.com/wiki/Oware) : 19 steps

In [None]:
g = Game()

In [None]:
%%timeit

g.reset()
shortest_game = [(6, 0), (6, 0), (3, 0), (5, 0), (3, 0), (6, 0), (2, 0), (4, 2), (4, 4), (4, 0), (2, 0), (5, 0), (3, 0), (3, 0), (5, 7), (1, 6), (1, 7), (2, 0), (6, 9),]

for action, captures in shortest_game:
    game_captures, _, _ = g.step(action - 1)
    assert captures == game_captures
#g.show_state()

# Tree Search

# MCTS

In [None]:
class Node:
    def __init__(self, game, parent=None):
        self.game = game
        self.parent = parent
        self.visits = 0
        self.wins = 0
        self.children = {}
    
    @property
    def is_fully_expanded(self):
        return len(self.children) == self.game.pits
    
    def add_child(game):
        return Node(game.copy(), self)

In [None]:
game = Game()
print(game.game_finished)
BUDGET = 10

tree = Node(g)

while not game.game_finished:
    for i in range(BUDGET):
        
        # find a node to expand
        node = tree
        while node.is_fully_expanded:
            node = random.choice(node.children.values())
        
        # expand node
        g = node.game.copy()
        while not g.game_finished:
            g.step(random.choice(g.legal_actions))
            node = node.add_child(game)

# MCTS + UCB

# Learning

# Deep mind

 * Alpha Go [Mastering the game of Go with deep neural networks and tree search](https://storage.googleapis.com/deepmind-media/alphago/AlphaGoNaturePaper.pdf)
 * Alpha Go Zero [Mastering the game of Go without human knowledge](https://www.nature.com/articles/nature24270.epdf?author_access_token=VJXbVjaSHxFoctQQ4p2k4tRgN0jAjWel9jnR3ZoTv0PVW4gB86EEpGqTRDtpIz-2rmo8-KG06gqVobU5NSCFeHILHcVFUeMsbvwS-lxjqQGg98faovwjxeTUgZAUMnRQ)
 * Alpha Zero [Mastering Chess and Shogi by Self-Play with a
General Reinforcement Learning Algorithm](https://arxiv.org/pdf/1712.01815.pdf)