In [8]:
import tensorflow as tf
import numpy as np
from src.ml.model import get_model
import pandas as pd

In [13]:
from src.alphazero.game_player import GamePlayer


from src.robot_reboot.game import get_game_from_matrix
from src.alphazero.game_player import GamePlayer
from src.robot_reboot.model import RobotRebootModel
from src.ml.util import get_test_data

states = get_test_data(['robot_reboot_data/test.tfrecords'])


def evaluate(cnn):
    history = {
        'history': {
            'score': list(),
            'outcome': list()
        }
    }
    
    MAX_DEPTH = 20
    for s in states:
        game, state = get_game_from_matrix(s)
        model = RobotRebootModel(game, cnn)
        game_player = GamePlayer(model, game)
        final_state = game_player.play(state, MAX_DEPTH)
        history['history']['outcome'].append(game.get_value(final_state))
        history['history']['score'].append(game.get_score(final_state))
    return history

def summary(history):
    score = history['history']['score']
    outcome = history['history']['outcome']
    return sum(outcome), sum(score)/len(score)



def get_df(histories):
    data = list()
    i = 0
    for h in histories:
        wins, avg_score = summary(h)
        data.append({'Model': i, 'Wins': wins, 'Average Score': avg_score})
        i +=1
    return pd.DataFrame(data=data)

## First iteration

In [31]:
model_0 = get_model()
model_0.load_weights('robot_reboot_model/model_0/')

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x170ab4210>

In [32]:
model_0_history = evaluate(model_0)
model_0_history

{'history': {'score': [-20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -1,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -1,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20

In [33]:
model_1 = get_model()
model_1.load_weights('robot_reboot_model/model_1/')

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x171285ed0>

In [34]:
model_1_history = evaluate(model_1)
model_1_history

{'history': {'score': [-20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -1,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -1,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -1,
   -20,

After the first iteration, the model slightly imporoved the number of wins and its average score

In [35]:
get_df([model_0_history, model_1_history])

Unnamed: 0,Model,Wins,Average Score
0,0,10,-19.53
1,1,12,-19.43


## Second iteration

In [36]:
model_2 = get_model()
model_2.load_weights('robot_reboot_model/model_2/')

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x170e7a8d0>

In [37]:
model_2_history = evaluate(model_2)
model_2_history

{'history': {'score': [-20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -1,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -1,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -1,
   -20,
   -1,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,
   -20,


In [38]:
get_df([model_0_history, model_1_history, model_2_history])

Unnamed: 0,Model,Wins,Average Score
0,0,10,-19.53
1,1,12,-19.43
2,2,12,-19.43


### Benchmark

In [43]:
benchmark_states = states[0:30]
MAX_DEPTH = 20

#### UCT

In [45]:
from src.uct.uct import UCT

uct_history = {
    'history': {
        'score': list(),
        'outcome': list()
    }
}

for s in benchmark_states:
    game, state = get_game_from_matrix(s)
    model = UCT(game, MAX_DEPTH, playouts=50)
    game_player = GamePlayer(model, game)
    final_state = game_player.play(state, MAX_DEPTH)
    uct_history['history']['outcome'].append(game.get_value(final_state))
    uct_history['history']['score'].append(game.get_score(final_state))

In [51]:
from src.alphazero.alphazero import AlphaZero
from src.robot_reboot.model import Model

alphazero_history = {
    'history': {
        'score': list(),
        'outcome': list()
    }
}

for s in benchmark_states:
    game, state = get_game_from_matrix(s)
    rr_model = RobotRebootModel(game, model_2)
    rr_game_player = GamePlayer(rr_model, game)
    
    model = AlphaZero(MAX_DEPTH, rr_game_player, playouts=50)
    game_player = GamePlayer(model, game)
    final_state = game_player.play(state, MAX_DEPTH)
    alphazero_history['history']['outcome'].append(game.get_value(final_state))
    alphazero_history['history']['score'].append(game.get_score(final_state))

In [62]:
get_df([uct_history, alphazero_history])

Unnamed: 0,Model,Wins,Average Score
0,0,18,-9.133333
1,1,17,-9.6
