In [1]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow.keras.optimizers import Adam

from hex import HexGame
from neuralnet import createModel, loadModel
from player import NeuralNetPlayer, RandomPlayer, MCTSPlayer, NeuralMCTSPlayer
from tournament import Tournament
import pickle
from mcts import Mcts

tf.config.run_functions_eagerly(True)

boardSize = 3

%matplotlib qt

In [6]:
# load model.4 and bestmodel.4
model = loadModel(f'model.{boardSize}')
bestModel = loadModel(f'bestmodel.{boardSize}')

mctsPlayer = MCTSPlayer(maxIters=25, maxTime=1000)
nnMctsPlayer = NeuralMCTSPlayer(model=model, maxIters=25, maxTime=1000)
tournament = Tournament(HexGame, [nnMctsPlayer, mctsPlayer], boardSize=boardSize, plot=True)
tournament.run(4)
tournament.printResults()



NeuralMCTS won 3 times, lost 5 times, and drew 0 times
MCTS won 5 times, lost 3 times, and drew 0 times


### Sequential games comparison

In [9]:
rounds = 4
# model = createModel(size=boardSize)
model = loadModel(f'model.{boardSize}')
neuralNetPlayer = NeuralNetPlayer(model=model, argmax=True)
randomPlayer = RandomPlayer()
tournament = Tournament(HexGame, neuralNetPlayer, randomPlayer, boardSize=boardSize, plot=True)
tournament.run(rounds)
wins, losses, draws = tournament.getResults()
print(f"Neuralnet Player: {wins} wins, {losses} losses, {draws} draws")

replay = nnMctsPlayer.mcts.replayBuffer
# TODO: flip both axis and double the replay buffer
print(f'Length of replay buffer: {len(replay)}')



Neuralnet Player: 2 wins, 2 losses, 0 draws
Length of replay buffer: 26


In [None]:
replay

In [None]:
# train model on replay buffer
X = np.array([x[0] for x in replay]).reshape(len(replay), boardSize*boardSize)
y = np.array([x[1] for x in replay]).reshape(len(replay), boardSize*boardSize)
model.fit(X, y, epochs=10, verbose=1)

In [None]:
# create new game
game = HexGame(None, None, size=3)
board = game.getNNState()
# prediction = model.predict(board)

# plot distribution of actions predictions of empty board
# plt.scatter(range(len(prediction[0])), prediction[0])

# plot distribution actions of empty board with mcts
mc = Mcts(maxIters=5000, maxTime=15)
mc.search(game)
dist = mc.replayBuffer
plt.scatter(range(len(dist[0][-1])), dist[0][-1])
plt.show()

In [None]:
# TODO: check who wins by counting 1's and -1's in last layer of y

In [None]:
s = 5
dataName = f'replayBuffer{s}.pickle'
with open(dataName, 'rb') as f:
    replay = pickle.load(f)

X = np.array([x[0] for x in replay]).reshape(len(replay),-1)
y = np.array([x[1] for x in replay]).reshape(len(replay),-1)

In [None]:
model = createModel(size=s)

In [None]:
games = [game.getNNState()[0] for _ in range(10)]

In [None]:
games

In [None]:
games = np.array(games)

In [None]:
games[0]

In [None]:
X[:10]

In [None]:
y[:10]

### Load replaybuffer

In [None]:
dataName = f'replayBuffer{boardSize}.pickle'
with open(dataName, 'rb') as f:
    replay = pickle.load(f)

### Create training data and train

In [None]:
X = np.array([x[0] for x in replay]).reshape(len(replay), boardSize*boardSize)
y = np.array([x[1] for x in replay]).reshape(len(replay), boardSize*boardSize)
print(X.shape)
print(y.shape)

numModels = 5
models = []

for i in range(numModels):
    newModel = tf.keras.models.clone_model(model)
    newModel.compile(loss='mean_squared_error', optimizer=Adam(learning_rate=0.01))
    newModel.fit(X, y, epochs=20, verbose=0)
    models.append(newModel)

### Test vs random

In [None]:
nn_better = 0
total_wins = []
for i in range(numModels):
    tournament = Tournament(HexGame, NeuralNetPlayer(model=models[i]), RandomPlayer(), boardSize=boardSize)
    tournament.run(11)
    wins, losses, draws = tournament.getResults()
    total_wins.append(wins)
    if wins > losses:
        nn_better += 1
    print(f"Model {i} vs random: {wins} wins, {losses} losses, {draws} draws")

    tournament = Tournament(HexGame, NeuralNetPlayer(model=models[i]),  MCTSPlayer(maxIters=50, maxTime=20), boardSize=boardSize)
    tournament.run(11)
    wins, losses, draws = tournament.getResults()
    total_wins.append(wins)
    if wins > losses:
        nn_better += 1
    print(f"Model {i} vs mcts: {wins} wins, {losses} losses, {draws} draws")

print(f"NN MCTS Player: {nn_better} models better than random player")

In [None]:
plt.hist(total_wins)
print(f'winrate: {int(100*sum(total_wins)/len(total_wins)/11)}%')

### Visualize a model

In [None]:
model = models[0]
# use %magic to make plots pop up in a separate window
tournament = Tournament(HexGame, NeuralNetPlayer(model=model), RandomPlayer(), boardSize=boardSize, plot=True)
tournament.run(1)

### Save model

In [None]:
# model.save('model.h5')