In [1]:
import tensorflow as tf
import numpy as np
from src.ml.model import get_model

In [2]:
current_model = get_model()
current_model.load_weights('robot_reboot_model/model_0/')

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x176f27050>

In [3]:
trained_model = get_model()
trained_model.load_weights('robot_reboot_model/model_0/')

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x1771d3650>

In [4]:
from src.alphazero.heuristic_function import heuristic_fn
from src.alphazero.montecarlo_tree_search import MonteCarloTreeSearch
from src.robot_reboot.factory import RobotRebootFactory
from src.robot_reboot.model import RobotRebootModel
from src.alphazero.game_player import GamePlayer


rows, cols, layers = 31, 31, 9

def build_mcts(nn, game, max_depth, playouts):
    model = RobotRebootModel(game, nn)
    game_player = GamePlayer(model, game)
    mcts = MonteCarloTreeSearch(heuristic_fn, max_depth, game_player, playouts=playouts)
    return mcts
    
def play(state, mcts, game, depth=0, max_depth=2):
    if depth > max_depth:
        return state
    p = mcts.search(state)
    i_best = np.argsort(p)[::-1][0]
    action = game.actions[i_best]
    next_state = game.apply(action, state)
    return play(next_state, mcts, game, depth=depth + 1, max_depth=max_depth)

def evaluate(nn, state, i, game, max_depth =20, playouts=50):
    mcts = build_mcts(nn, game, max_depth, playouts)
    final_state = play(state, mcts, game, max_depth=max_depth)
    v = game.get_value(final_state)
    score = game.get_score(final_state)
    
    sample = tf.train.Example(
        features=tf.train.Features(
            feature={
                'i': tf.train.Feature(float_list=tf.train.FloatList(value=[i])),
                'v': tf.train.Feature(float_list=tf.train.FloatList(value=[v])),
                'score': tf.train.Feature(float_list=tf.train.FloatList(value=[score])),
                's': tf.train.Feature(float_list=tf.train.FloatList(value=final_state.reshape(rows * cols * layers, )))
            }
        )
    )
    print(f'{i}: score {score} and v {v}')
    with tf.io.TFRecordWriter(f'results/{i}_result.tfrecords') as f:
        f.write(sample.SerializeToString())

In [6]:
from src.robot_reboot.game import get_game_from_matrix

test_dataset = tf.data.TFRecordDataset('robot_reboot_data/testing.tfrecords')
feature_desc = {
    's': tf.io.VarLenFeature(tf.float32)
}
for serialized in test_dataset:
    parsed = tf.io.parse_single_example(serialized, feature_desc)
    s = tf.reshape(tf.sparse.to_dense(parsed['s']), (rows, cols, layers)).numpy()

In [7]:
matrix = s

In [17]:
game, state = get_game_from_matrix(matrix)
evaluate(trained_model, state, 0, game, max_depth=2, playouts=2)

TypeError: evaluate() got multiple values for argument 'max_depth'

In [14]:
str(game.goal_house)

'Robot 2 needs to get to its house on (20, 28)'

In [16]:
state.robots_positions

[(0, 1), (6, 17), (0, 22), (10, 6)]

In [None]:
robots

In [None]:
evaluate(trained_model, s, 0, max_depth=3, playouts=2)

In [None]:
history = {
    'history': {
        'value': [0, 1, 1, -1, -1],
        'score': [12, 12, 12, 12]
    }
}
history

In [None]:
import multiprocessing
from multiprocessing import Value
def a_function(ret_value):
    ret_value.value = 3.14

index = Value("d", 0.0)

reader_process = multiprocessing.Process(target=a_function, args=[ret_value])
reader_process.start()
reader_process.join()

print(ret_value.value)
