In [1]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow.keras.optimizers import Adam

from hex import HexGame
from neuralnet import createModel, loadModel, createCriticModel
from player import NeuralNetPlayer, RandomPlayer, MCTSPlayer, NeuralMCTSPlayer
from tournament import Tournament
import pickle
from mcts import Mcts
from keras.callbacks import EarlyStopping, ModelCheckpoint

tf.config.run_functions_eagerly(True)
%matplotlib qt

### Test if NN mcts is better than normal mcts

In [None]:
boardSize = 4
bestModel = loadModel(f'bestmodel.{boardSize}')

mctsPlayer = MCTSPlayer(maxIters=25, maxTime=1000)
nnMctsPlayer = NeuralMCTSPlayer(model=bestModel, maxIters=25, maxTime=1000)
tournament = Tournament(HexGame, [nnMctsPlayer, mctsPlayer], boardSize=boardSize, plot=True)
tournament.run(4)
tournament.printResults()

### Test critic

In [123]:
# create data for critic
boardSize = 4
# Let two mcts players play against each other
mctsPlayer1 = MCTSPlayer(maxIters=100, maxTime=1000, argmax=True)
mctsPlayer2 = MCTSPlayer(maxIters=100, maxTime=1000, argmax=True)
tournament = Tournament(HexGame, [mctsPlayer1, mctsPlayer2], boardSize=boardSize, plot=False)
tournament.run(200)
tournament.printResults()

Starting player won 259 times and lost 141 times
MCTS won 199 times, lost 201 times, and drew 0 times
MCTS won 201 times, lost 199 times, and drew 0 times


In [124]:
# get replay buffer from each players
replayBuffer = mctsPlayer1.mcts.replayBuffer + mctsPlayer2.mcts.replayBuffer
print(f'Length of replay buffer: {len(replayBuffer)}')

Length of replay buffer: 3615


In [125]:
X = [r[0] for r in replayBuffer]
y = [r[-1] for r in replayBuffer]
X = np.array(X).reshape(len(X), -1)
y = np.array(y).reshape(-1, 1)
# for all -1's in y set it to 0
y[y == -1] = 0

In [126]:
import numpy as np
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
from tensorflow.keras.layers import Dropout, BatchNormalization
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.optimizers import RMSprop

# CRITIC
def createCriticModel(size):
    model = Sequential()
    model.add(Dense(size*size, input_dim=size*size+size, activation='relu'))
    model.add(BatchNormalization())
    # dropout layer
    model.add(Dropout(0.3))
    model.add(Dense(size, activation='relu'))
    model.add(BatchNormalization())
    # Add another Dense layer
    model.add(Dense(size, activation='relu'))
    model.add(BatchNormalization())
    # Add another dropout layer
    model.add(Dropout(0.3))
    # final layer is a singular value from 0 to 1
    model.add(Dense(1, activation='sigmoid', kernel_initializer='he_uniform'))
    model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [127]:
# Early stopping callback to stop training when validation accuracy has stopped improving
early_stopping = EarlyStopping(monitor='val_accuracy', patience=10, verbose=1, mode='max', restore_best_weights=True)
# Model checkpoint callback to save the best model weights during training
model_checkpoint = ModelCheckpoint('best_model_weights.h5', monitor='val_accuracy', save_best_only=True, verbose=0, mode='max')

critic = createCriticModel(boardSize)
# split into train and test
X_train = X[:int(len(X)*0.8)]
y_train = y[:int(len(y)*0.8)]
X_test = X[int(len(X)*0.8):]
y_test = y[int(len(y)*0.8):]
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

# train model until convergence
history = critic.fit(X_train, y_train, epochs=200, validation_data=(X_test, y_test), verbose=0, batch_size=64, callbacks=[early_stopping, model_checkpoint], shuffle=True)
critic.load_weights('best_model_weights.h5')
# print val accuracy and loss
print(f'Val accuracy: {history.history["val_accuracy"][-1]}')
print(f'Val loss: {history.history["val_loss"][-1]}')
print(f'Mean of y_test: {np.mean(y_test)}')

(2892, 20)
(2892, 1)
(723, 20)
(723, 1)
Restoring model weights from the end of the best epoch: 53.
Epoch 63: early stopping
Val accuracy: 0.7123098373413086
Val loss: 0.5574069619178772
Mean of y_test: 0.6113416320885201


In [128]:
# plot accuracy 
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [129]:
# plot loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

### critic in real-time game

In [131]:
player1 = MCTSPlayer(maxIters=100, maxTime=1000, argmax=True)
player2 = MCTSPlayer(maxIters=100, maxTime=1000, argmax=True)

game = HexGame(None, None, boardSize, plot=False)
turn = 0
while not game.isTerminal():
    print(critic.predict(game.getNNState(), verbose=0)[0][0])
    if turn % 2 == 0:
        player1.playAction(game)
    else:
        player2.playAction(game)
    turn += 1
print("winner is: ", game.getResult())

0.6491817
0.46109477
0.4824482
0.33125588
0.32222277
0.2847189
0.22229151
0.2580883
winner is:  -1


### test how good on last move

In [132]:
player1 = MCTSPlayer(maxIters=100, maxTime=1000, argmax=True)
player2 = MCTSPlayer(maxIters=100, maxTime=1000, argmax=True)

correct = 0
for _ in range(20):
    game = HexGame(None, None, boardSize, plot=False)
    turn = 0
    while not game.isTerminal():
        lastState = game.getNNState()
        if turn % 2 == 0:
            player1.playAction(game)
        else:
            player2.playAction(game)
        turn += 1
    pred = critic.predict(lastState, verbose=0)[0][0]
    print(f'Prediction: {pred}, Actual: {game.getResult()}')
    if pred > 0.5 and game.getResult() == 1:
        correct += 1
    elif pred < 0.5 and game.getResult() == -1:
        correct += 1

print(f'Accuracy: {correct/20}')

Prediction: 0.8352087140083313, Actual: 1
Prediction: 0.6477511525154114, Actual: 1
Prediction: 0.9251713156700134, Actual: 1
Prediction: 0.30397605895996094, Actual: -1
Prediction: 0.20696748793125153, Actual: 1
Prediction: 0.8370585441589355, Actual: 1
Prediction: 0.598253071308136, Actual: 1
Prediction: 0.9804055690765381, Actual: 1
Prediction: 0.8845412731170654, Actual: 1
Prediction: 0.20940864086151123, Actual: -1
Prediction: 0.669664204120636, Actual: 1
Prediction: 0.3395485281944275, Actual: 1
Prediction: 0.3352910876274109, Actual: -1
Prediction: 0.7537873983383179, Actual: 1
Prediction: 0.5791076421737671, Actual: -1
Prediction: 0.8391754627227783, Actual: 1
Prediction: 0.36929917335510254, Actual: -1
Prediction: 0.6987243890762329, Actual: 1
Prediction: 0.8374888300895691, Actual: 1
Prediction: 0.9558698534965515, Actual: 1
Accuracy: 0.85


### Sequential games comparison

In [None]:
rounds = 4
# model = createModel(size=boardSize)
model = loadModel(f'model.{boardSize}')
neuralNetPlayer = NeuralNetPlayer(model=model, argmax=True)
randomPlayer = RandomPlayer()
tournament = Tournament(HexGame, neuralNetPlayer, randomPlayer, boardSize=boardSize, plot=True)
tournament.run(rounds)
wins, losses, draws = tournament.getResults()
print(f"Neuralnet Player: {wins} wins, {losses} losses, {draws} draws")

replay = nnMctsPlayer.mcts.replayBuffer
# TODO: flip both axis and double the replay buffer
print(f'Length of replay buffer: {len(replay)}')

TypeError: Tournament.__init__() got multiple values for argument 'boardSize'

In [None]:
replay

In [None]:
# train model on replay buffer
X = np.array([x[0] for x in replay]).reshape(len(replay), boardSize*boardSize)
y = np.array([x[1] for x in replay]).reshape(len(replay), boardSize*boardSize)
model.fit(X, y, epochs=10, verbose=1)

In [None]:
# create new game
game = HexGame(None, None, size=3)
board = game.getNNState()
# prediction = model.predict(board)

# plot distribution of actions predictions of empty board
# plt.scatter(range(len(prediction[0])), prediction[0])

# plot distribution actions of empty board with mcts
mc = Mcts(maxIters=5000, maxTime=15)
mc.search(game)
dist = mc.replayBuffer
plt.scatter(range(len(dist[0][-1])), dist[0][-1])
plt.show()

In [None]:
# TODO: check who wins by counting 1's and -1's in last layer of y

In [None]:
s = 5
dataName = f'replayBuffer{s}.pickle'
with open(dataName, 'rb') as f:
    replay = pickle.load(f)

X = np.array([x[0] for x in replay]).reshape(len(replay),-1)
y = np.array([x[1] for x in replay]).reshape(len(replay),-1)

In [None]:
model = createModel(size=s)

In [None]:
games = [game.getNNState()[0] for _ in range(10)]

In [None]:
games

In [None]:
games = np.array(games)

In [None]:
games[0]

In [None]:
X[:10]

In [None]:
y[:10]

### Load replaybuffer

In [None]:
dataName = f'replayBuffer{boardSize}.pickle'
with open(dataName, 'rb') as f:
    replay = pickle.load(f)

### Create training data and train

In [None]:
X = np.array([x[0] for x in replay]).reshape(len(replay), boardSize*boardSize)
y = np.array([x[1] for x in replay]).reshape(len(replay), boardSize*boardSize)
print(X.shape)
print(y.shape)

numModels = 5
models = []

for i in range(numModels):
    newModel = tf.keras.models.clone_model(model)
    newModel.compile(loss='mean_squared_error', optimizer=Adam(learning_rate=0.01))
    newModel.fit(X, y, epochs=20, verbose=0)
    models.append(newModel)

### Test vs random

In [None]:
nn_better = 0
total_wins = []
for i in range(numModels):
    tournament = Tournament(HexGame, NeuralNetPlayer(model=models[i]), RandomPlayer(), boardSize=boardSize)
    tournament.run(11)
    wins, losses, draws = tournament.getResults()
    total_wins.append(wins)
    if wins > losses:
        nn_better += 1
    print(f"Model {i} vs random: {wins} wins, {losses} losses, {draws} draws")

    tournament = Tournament(HexGame, NeuralNetPlayer(model=models[i]),  MCTSPlayer(maxIters=50, maxTime=20), boardSize=boardSize)
    tournament.run(11)
    wins, losses, draws = tournament.getResults()
    total_wins.append(wins)
    if wins > losses:
        nn_better += 1
    print(f"Model {i} vs mcts: {wins} wins, {losses} losses, {draws} draws")

print(f"NN MCTS Player: {nn_better} models better than random player")

In [None]:
plt.hist(total_wins)
print(f'winrate: {int(100*sum(total_wins)/len(total_wins)/11)}%')

### Visualize a model

In [None]:
model = models[0]
# use %magic to make plots pop up in a separate window
tournament = Tournament(HexGame, NeuralNetPlayer(model=model), RandomPlayer(), boardSize=boardSize, plot=True)
tournament.run(1)

### Save model

In [None]:
# model.save('model.h5')