In [160]:
#%matplotlib inline

import numpy as np
import itertools
import random
import math
import matplotlib.pyplot as plt

import import_ipynb
from rules import Game
from players import RandomPlayer, GreedyPlayer, MCTSPlayer, UCTPlayer, GreedyUCTPlayer, HumanPlayer

np.set_printoptions(edgeitems=12)

# Generate a dataset

We play with our best player (GreedyUCTPlayer) and use the real game states to fill our datasets.

If the features are the board and the value to guess the ratio of wins, we will learn nothing, it depends a lot on the score. If we add the score, we will might then learn that a high score leads to a great ratio of wins, not very interesting. (Or it might not, since the hypothesis "high score leads to a great ratio of wins" is the hypothesis of the greedy agent).

We will start with X as the game state and Y the probabilities of wins for each move.

In [5]:
def play_game(player, opponent):
    game = Game.start_game()
    opponent_action = -1

    while not game.game_finished:
        player_action = player.play(opponent_action)
        game, captures, finished = game.step(player_action)

        player, opponent = opponent, player
        opponent_action = player_action
    return game

In [63]:
def extract_states(endstate):
    X, Y = [], []
    
    if endstate.game_finished:
        # we skip the last node as it doesn't have any children
        state = endstate.parent
    else:
        state = endstate
    
    while state.parent:
        X.append(state.view_from_current_player)
        Y.append([
            (child.wins[state.current_player]/ child.n_playouts) if child and child.n_playouts else 0
            for child in state.children
        ])
        state = state.parent
    return X, Y

TODO : Maybe we should pick the budget in a normal distribution so we can have players with different strenghts compete.
We could also use different agents at random ?

In [111]:
BUDGET = 50
def generate_data(*args):
    # Create our players
    player = GreedyUCTPlayer(0, BUDGET)
    opponent = GreedyUCTPlayer(1, BUDGET)
    
    # Run a full game
    play_game(player, opponent)
    
    # Extract states
    X1, Y1 = extract_states(player.root)
    X2, Y2 =extract_states(opponent.root)
    
    return X1 + X2, Y1 + Y2
    

In [112]:
import multiprocessing

In [146]:
NWORKERS = 4
pool = multiprocessing.Pool(NWORKERS)
games_data = pool.map(generate_data, range(NWORKERS * 3))

X = np.array(list(itertools.chain(*[x for x, y in games_data])))
Y = np.array(list(itertools.chain(*[y for x, y in games_data])))

In [148]:
Y_bestmove_as_1 = Y == np.expand_dims(np.amax(Y, axis=1), axis=1)
Y_bestmove_i = np.argmax(Y, axis=1)

We have data, lets cut that in a test and train set.

In [114]:
import sklearn
from sklearn.model_selection import train_test_split

In [152]:
from sklearn.tree import DecisionTreeClassifier

In [150]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y_bestmove_i, test_size=0.33, random_state=42)

In [157]:
tree = DecisionTreeClassifier()
tree.fit(X_train, Y_train)
tree.score(X_test, Y_test)

0.5470941883767535

In [158]:
Y_test.shape

(499,)

In [159]:
X_train.shape

(1013, 12)