In [1]:
#%matplotlib inline

import numpy as np
import itertools
import random
import math
import matplotlib.pyplot as plt

import import_ipynb
from rules import Game

importing Jupyter notebook from rules.ipynb


AssertionError: 

In [None]:
def max_rand(iterable, key=lambda x: x):
    maximum_value = max(key(x) for x in iterable)
    keep = [x for x in iterable if key(x) == maximum_value]
    return random.choice(keep)

In [None]:
class Player:
    def play(self, their_action):
        # If we are the first player, there is no previous action
        if their_action != -1:
            # Play the opponent's move
            self.root, _, _ = self.root.step(their_action)
        else:
            assert self.player_id == 0, "Only the first player can have their_action=-1"
        
        action = self.get_action()
        self.root, _, _ = self.root.step(action)
        
        return action


In [None]:
class RandomPlayer(Player):
    def __init__(self, player_id):
        self.root = Game.start_game()
        self.player_id = player_id
    
    def get_action(self):
        return random.choice(self.root.legal_actions)

In [None]:
class GreedyPlayer(Player):
    def __init__(self, player_id, eps=0):
        self.root = Game.start_game()
        self.player_id = player_id
        self.eps = eps
    
    def get_action(self):
        # Choose a move
        children = []
        
        for legal_action in self.root.legal_actions:
            new_state, captures, finished = self.root.step(legal_action)
            if new_state.winner is None:
                win = 0
            elif new_state.winner == self.player_id:
                win = 1
            else:
                win = -1
            children.append((legal_action, captures, win))
        
        # order wins first, then by captures, then random
        sorted_children = sorted(children, key=lambda a_c_w: (-a_c_w[2], -a_c_w[1], random.random()))
        if random.random() < self.eps:
            action = random.choice(self.root.legal_actions)
        else:
            action = sorted_children[0][0]
            
        return action

In [None]:
class MCTSPlayer(Player):
    def __init__(self, player_id, budget, c=1 / math.sqrt(2)):
        self.root = Game.start_game()
        self.player_id = player_id
        self.budget = budget
        self.c = c

    def tree_policy(self, node):
        while not node.is_leaf_game:
            if node.is_fully_expanded:
                node = random.choice(node.expanded_children)
            else:
                action = random.choice(node.legal_unvisited_actions)
                node, _, _ = node.step(action)
        return node
    
    def explore_tree(self):
        # Choose a starting node
        node = self.tree_policy(self.root)

        # Run a simulation on that node
        finished = node.game_finished
        while not finished:
            action = self.default_policy(node)
            node, _, finished = node.step(action)

        # Backtrack stats
        node.update_stats(node.winner)
    
    def default_policy(self, node):
        # Random walk
        return random.choice(node.legal_actions)
    
    def action_score(self, x):
        node = self.root.children[x]
        if node is None:
            return -random.random()

        assert self.root.current_player == self.player_id
        assert node.current_player != self.player_id

        return node.wins[self.player_id]
        
    
    def get_action(self):
        for _ in range(self.budget):
            self.explore_tree()
        
        possible_actions = self.root.legal_actions
        return max(possible_actions, key=self.action_score)

In [None]:
class UCTPlayer(MCTSPlayer):
    def node_score(self, node):
        exporation = node.wins[node.current_opponent] / (node.n_playouts + 1)
        exploitation = math.sqrt(math.log(node.parent.n_playouts) / (node.n_playouts + 1))
        return exporation + self.c * exploitation

    def tree_policy(self, node):
        while not node.is_leaf_game:
            if node.is_fully_expanded:
                node = max_rand(node.expanded_children, key=self.node_score)
            else:
                action = random.choice(node.legal_unvisited_actions)
                node, _, _ = node.step(action)
        return node

In [None]:
class GreedyUCTPlayer(UCTPlayer):    
    def default_policy(self, node):
        # Greedy walk
        assert len(node.legal_actions) != 0
        captures = [node.step(action)[1] + 1 for action in node.legal_actions]
        return random.choices(node.legal_actions, weights=captures)[0]

In [None]:
 class HumanPlayer(Player):
    def __init__(self, player_id):
        self.root = Game.start_game()
        self.player_id = player_id
    
    def get_action(self):
        self.root.show_state()
        action = -1
        while action not in self.root.legal_actions:
            action = int(input("Input move [0-5]: "))
            if action not in self.root.legal_actions:
                print("Illegal move")
            print("\n")
        return action