In [82]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
from copy import deepcopy
import pandas as pd
import numpy as np
import pickle
import random

from TicTacToe import Board, GameTools as gt

Initializing the neural network multiclassifier model.

This particular model has 3 hidden layers, with each layer containing 30 neurons. The activation function for the hidden layers is set to be ReLu.

In [83]:
agent = MLPClassifier(hidden_layer_sizes=(30, 30, 30), activation='relu')
agent

Demonstrating how to use the partial_fit() method.

You must provide a matrix of features, an array of labels, and an array of all classes that your model may need to predict.

In [84]:
features = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0]]) # 2d array since features are always in a matrix format
label = np.array([5])
classes = [1, 2, 3, 4, 5, 6, 7, 8, 9]

agent.partial_fit(X=features, y=label, classes=classes)
agent.loss_

2.5767843886770314

Let's write a function that competes our agent against a random player.

In order to keep this demonstration as simple as possible, we will build this training environment with a limited number of settings in the parameters. You may want to consider adding additional parameters so you can adjust how your model interacts with the environment.

Be very careful of what you code into the training environment. It's always a good idea to hand trace a game or two in your environment to make sure you aren't feeding your model erroneous data.

In [85]:
def environment(model: MLPClassifier, model_plays_first: bool, training_type: str = "dense", train_model: bool = False, proba_threshold: float = 1.0, threshold_type: str = "lower", drop_multiplier: float = 0.0, drop_constant: float = 0.0, print_board: bool = False):
    game = Board() # initializing a new game of TTT
    
    match_states = list() # a list to store the state of the board for each play made by our model
    match_labels = list() # a list to store the labels associated with each match state
    alt_labels = list() # a list to store alternative labels for the case where our model loses
    
    status = game.status() # function specific to TTT. Will be used to check if the game has ended
    
    trained = False # to determine if our model recieved training during a given match
    
    moves_remaining = 9
    
    # while loop which plays a game of TTT
    while moves_remaining > 0 and not status[0]:
        state = deepcopy(game.vector) # getting copy of the board state in a vectorized format - Ex: [0, 0, 0, 0, 0, 0, 0, 0, 0]

        label = None
        
        coords = gt.avail_moves(game.board) # using a tool specific to our TTT game to get the (row, column) coordinates for available positions on the board

        matrix_state = np.array([state]) # putting our state into a numpy matrix. we need to use a consistent data structure and format for training the model and numpy works well
        
        # if model is playing first and it is first player's turn
        # this is the model's move
        if model_plays_first and moves_remaining % 2 != 0:
            prediction = model.predict(matrix_state)[0] # predicting the best move based on the current game board
            proba = model.predict_proba(matrix_state)[0][prediction - 1]
            
            move = ((prediction - 1) // 3, (prediction - 1) % 3) # converting an integer into (row, column) coordinates. This is specific to our TTT
            # it is possible the model will try to make a move that isn't possible because the space is taken. We will validate that the move was made successfully here.
            out = game.move(row=move[0], column=move[1], player=1) # move() function returns True if move was successful, false otherwise
            
            if out:
                label = prediction
            else:
                move = random.choice(coords) # choosing randomly from known available positions
                coords.remove(move) # dropping our selected move from the available positions on the board
                
                label = (3 * move[0]) + move[1] + 1 # converting coordinates to integer value
                game.move(row=move[0], column=move[1], player=1) # placing random move on the board
                
            if (proba < proba_threshold and threshold_type == "lower") or (proba > proba_threshold and threshold_type == "upper"): # we only add this data to the training set if it does not meet the threshold
                match_states.append(state) # saving state
                match_labels.append(label) # saving associated label
            
                if len(coords) > 0:
                    move = random.choice(coords)
                    pos = (3 * move[0]) + move[1] + 1
                    alt_labels.append(pos) # saving alternate label
                else:
                    alt_labels.append(label) # there was no alternative because all other positions were taken
                    
        # if model is playing second and it is first player's turn
        # this is the random players move
        elif not model_plays_first and moves_remaining % 2 != 0:
            move = random.choice(coords)
            game.move(row=move[0], column=move[1], player=-1)
        
        # if model is playing first and it is the second player's turn
        # this is the random players move
        elif model_plays_first and moves_remaining % 2 == 0:
            move = random.choice(coords)
            game.move(row=move[0], column=move[1], player=-1)
            
        # if model is playing second and it is the second player's turn
        # this is the model's move
        else:
            prediction = model.predict(matrix_state)[0] # predicting the best move based on the current game board
            proba = model.predict_proba(matrix_state)[0][prediction - 1] # getting the probability to determine how confident the agent is
            
            move = ((prediction - 1) // 3, (prediction - 1) % 3) # converting an integer into (row, column) coordinates. This is specific to our TTT
            
            # it is possible the model will try to make a move that isn't possible because the space is taken. We will validate that the move was made successfully here.
            out = game.move(row=move[0], column=move[1], player=1) # move() function returns True if move was successful, false otherwise
            
            if out:
                label = prediction
            else:
                move = random.choice(coords) # choosing randomly from known available positions
                coords.remove(move) # dropping our selected move from the available positions on the board
                
                label = (3 * move[0]) + move[1] + 1 # converting coordinates to integer value
                game.move(row=move[0], column=move[1], player=1) # placing random move on the board
                
            if (proba < proba_threshold and threshold_type == "lower") or (proba > proba_threshold and threshold_type == "upper"): # we only add this data to the training set if it does not meet the threshold
                match_states.append(state) # saving state
                match_labels.append(label) # saving associated label
            
                if len(coords) > 0:
                    move = random.choice(coords)
                    pos = (3 * move[0]) + move[1] + 1
                    alt_labels.append(pos) # saving alternate label
                else:
                    alt_labels.append(label) # there was no alternative because all other positions were taken
        
        
        moves_remaining -= 1 # decrementing the remaining move count
        status = game.status() # updating the status of the game
        
    # end of the while loop for playing a game of TTT
    
    # if our setting for training the model is set to True, then the model will be partially fit to the data from this game
    # if set to false, this environment just allows a model to compete against a random player to evaluate performance
    # also checking that match_states is not an empty list before going into drops
    if train_model and match_states:
        
        # dropping training data based on how early in the game it was
        # starting at 0.0, each subsequent sample has an additional 0.2 chance of being kept as training data
        drop_indexes = list()
        for n in range(len(match_states)):
            if random.random() > (n * drop_multiplier + drop_constant):
                drop_indexes.append(n)
                
        for idx in drop_indexes[::-1]:
            match_states.pop(idx)
            match_labels.pop(idx)
            alt_labels.pop(idx)
            
        if match_states:
            trained = True
            # converting our features, the match states, into a numpy matrix
            features = np.array(match_states)
            
            # converting our labels into a numpy array
            # if the model won, we use the winning labels
            # if the model lost, we use the alternate labels
            if status[1] == 1:
                labels = np.array(match_labels)
            else:
                labels = np.array(alt_labels)
            
            # a list explicitly stating the possible classes for our model - a required parameter for partial fitting
            classes = [x for x in range(1, 10)]
        
            if training_type == "sparse_winner" and status[1] == 1:
                model.partial_fit(features, labels, classes)
                
            elif training_type == "sparse_loser" and status[1] == -1:
                model.partial_fit(features, labels, classes)
                
            elif training_type == "dense":
                model.partial_fit(features, labels, classes)
    
    # if print_board set to true, will print a nice string formatting version of the board
    if print_board:
        print(game.print())
    
    return status[1], trained, match_states, match_labels

Now we have a fully functional training environment for our agent.

We can run a single match without training and examine the output from our environment.

In [86]:
t = environment(model=agent, model_plays_first=True, train_model=False, print_board=True)
t

 X |   |   
-----------
 X | O |   
-----------
 X | O |   


(1,
 False,
 [[0, 0, 0, 0, 0, 0, 0, 0, 0],
  [0, 0, 0, 0, 0, 0, 1, -1, 0],
  [1, 0, 0, 0, -1, 0, 1, -1, 0]],
 [7, 1, 4])

It looks like our model is successfully playing a game of Tic Tac Toe! We can also see that our environment is able to successfully track all of the game states and decisions made by our model. This is good news!

Now, we can iteratively train the model and then see how it performs. In this case, we set the model to only play as the second player.

In [87]:
training_passes = 0
# 10000 training iterations
for _ in range(10000):
    # Most Effective parameters so far:
    # model=agent, model_plays_first=False, train_model=True, training_type="dense", proba_threshold=0.5, threshold_type="lower")
    t = environment(model=agent, model_plays_first=False, train_model=True, training_type="dense", proba_threshold=0.5, threshold_type="lower", drop_multiplier=0.1, drop_constant=0.05) 
    if t[1]:
        training_passes += 1
        
training_passes

3322

Our agent has now been trained on 10000 matches of TTT.

We can ask our agent to predict the best move from a random board as a test of its functionality.

In [88]:
test_board = np.array([[1, 0, 0, 0, 1, 0, -1, -1, 0]])
zero_board = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0]])

pred = agent.predict(zero_board)

agent.classes_, agent.predict_proba(zero_board)[0][pred - 1][0], pred

(array([1, 2, 3, 4, 5, 6, 7, 8, 9]), 0.5108077194125298, array([5]))

We can now put our agent into matches against a random player and evaluate how the agent performs. Notice that the "train_model" parameter is set to "False". Our agent will strictly play, it will not learn.

We will observe its performance when it plays first and when it plays second.

Anything other than a win is considered a loss.

In [89]:
win_ct = 0
loss_ct = 0
draw_ct = 0

for _ in range(5000):
    f = environment(model=agent, model_plays_first=True, train_model=False, print_board=False)
    
    if f[0] == 1:
        win_ct += 1
    elif f[0] == 0:
        draw_ct += 1
    else:
        loss_ct += 1
    
for _ in range(5000):
    s = environment(model=agent, model_plays_first=False, train_model=False, print_board=False)

    if s[0] == 1:
        win_ct += 1
    elif s[0] == 0:
        draw_ct += 1
    else:
        loss_ct += 1
        
win_ct, loss_ct, draw_ct

(8630, 1036, 334)

Now that we have a functioning environment for our agent to play against a random opponent, let's build another environment where we can compete models against eachother to further advance their skill.

In [90]:
def comp_environment(model: MLPClassifier, opponent: MLPClassifier, model_plays_first: bool, training_type: str = "dense", train_model: bool = False, proba_threshold: float = 1.0, threshold_type: str = "lower", drop_multiplier: float = 0.0, drop_constant: float = 0.0, print_board: bool = False):
    game = Board() # initializing a new game of TTT
    
    match_states = list() # a list to store the state of the board for each play made by our model
    match_labels = list() # a list to store the labels associated with each match state
    alt_labels = list() # a list to store alternative labels for the case where our model loses
    
    status = game.status() # function specific to TTT. Will be used to check if the game has ended
    
    trained = False # to determine if our model recieved training during a given match
    
    moves_remaining = 9
    
    # while loop which plays a game of TTT
    while moves_remaining > 0 and not status[0]:
        state = deepcopy(game.vector) # getting copy of the board state in a vectorized format - Ex: [0, 0, 0, 0, 0, 0, 0, 0, 0]

        label = None
        
        coords = gt.avail_moves(game.board) # using a tool specific to our TTT game to get the (row, column) coordinates for available positions on the board

        matrix_state = np.array([state]) # putting our state into a numpy matrix. we need to use a consistent data structure and format for training the model and numpy works well
        
        # if model is playing first and it is first player's turn
        # this is the model's move
        if model_plays_first and moves_remaining % 2 != 0:
            prediction = model.predict(matrix_state)[0] # predicting the best move based on the current game board
            proba = model.predict_proba(matrix_state)[0][prediction - 1]
            
            move = ((prediction - 1) // 3, (prediction - 1) % 3) # converting an integer into (row, column) coordinates. This is specific to our TTT
            # it is possible the model will try to make a move that isn't possible because the space is taken. We will validate that the move was made successfully here.
            out = game.move(row=move[0], column=move[1], player=1) # move() function returns True if move was successful, false otherwise
            
            if out:
                label = prediction
            else:
                move = random.choice(coords) # choosing randomly from known available positions
                coords.remove(move) # dropping our selected move from the available positions on the board
                
                label = (3 * move[0]) + move[1] + 1 # converting coordinates to integer value
                game.move(row=move[0], column=move[1], player=1) # placing random move on the board
                
            if (proba < proba_threshold and threshold_type == "lower") or (proba > proba_threshold and threshold_type == "upper"): # we only add this data to the training set if it does not meet the threshold
                match_states.append(state) # saving state
                match_labels.append(label) # saving associated label
            
                if len(coords) > 0:
                    move = random.choice(coords)
                    pos = (3 * move[0]) + move[1] + 1
                    alt_labels.append(pos) # saving alternate label
                else:
                    alt_labels.append(label) # there was no alternative because all other positions were taken
                    
        # if model is playing second and it is first player's turn
        # this is the opponent's move
        elif not model_plays_first and moves_remaining % 2 != 0:
            prediction = opponent.predict(matrix_state)[0] # predicting the best move based on the current game board
            proba = opponent.predict_proba(matrix_state)[0][prediction - 1]
            
            move = ((prediction - 1) // 3, (prediction - 1) % 3) # converting an integer into (row, column) coordinates. This is specific to our TTT
            # it is possible the model will try to make a move that isn't possible because the space is taken. We will validate that the move was made successfully here.
            out = game.move(row=move[0], column=move[1], player=-1) # move() function returns True if move was successful, false otherwise
            
            if not out:
                move = random.choice(coords) # choosing randomly from known available positions
                coords.remove(move) # dropping our selected move from the available positions on the board
                
                game.move(row=move[0], column=move[1], player=-1) # placing random move on the board
        
        # if model is playing first and it is the second player's turn
        # this is the opponent's move
        elif model_plays_first and moves_remaining % 2 == 0:
            prediction = opponent.predict(matrix_state)[0] # predicting the best move based on the current game board
            proba = opponent.predict_proba(matrix_state)[0][prediction - 1]
            
            move = ((prediction - 1) // 3, (prediction - 1) % 3) # converting an integer into (row, column) coordinates. This is specific to our TTT
            # it is possible the model will try to make a move that isn't possible because the space is taken. We will validate that the move was made successfully here.
            out = game.move(row=move[0], column=move[1], player=-1) # move() function returns True if move was successful, false otherwise
            
            if not out:
                move = random.choice(coords) # choosing randomly from known available positions
                coords.remove(move) # dropping our selected move from the available positions on the board
                
                game.move(row=move[0], column=move[1], player=-1) # placing random move on the board
            
        # if model is playing second and it is the second player's turn
        # this is the model's move
        else:
            prediction = model.predict(matrix_state)[0] # predicting the best move based on the current game board
            proba = model.predict_proba(matrix_state)[0][prediction - 1] # getting the probability to determine how confident the agent is
            
            move = ((prediction - 1) // 3, (prediction - 1) % 3) # converting an integer into (row, column) coordinates. This is specific to our TTT
            
            # it is possible the model will try to make a move that isn't possible because the space is taken. We will validate that the move was made successfully here.
            out = game.move(row=move[0], column=move[1], player=1) # move() function returns True if move was successful, false otherwise
            
            if out:
                label = prediction
            else:
                move = random.choice(coords) # choosing randomly from known available positions
                coords.remove(move) # dropping our selected move from the available positions on the board
                
                label = (3 * move[0]) + move[1] + 1 # converting coordinates to integer value
                game.move(row=move[0], column=move[1], player=1) # placing random move on the board
                
            if (proba < proba_threshold and threshold_type == "lower") or (proba > proba_threshold and threshold_type == "upper"): # we only add this data to the training set if it does not meet the threshold
                match_states.append(state) # saving state
                match_labels.append(label) # saving associated label
            
                if len(coords) > 0:
                    move = random.choice(coords)
                    pos = (3 * move[0]) + move[1] + 1
                    alt_labels.append(pos) # saving alternate label
                else:
                    alt_labels.append(label) # there was no alternative because all other positions were taken
        
        
        moves_remaining -= 1 # decrementing the remaining move count
        status = game.status() # updating the status of the game
        
    # end of the while loop for playing a game of TTT
    
    # if our setting for training the model is set to True, then the model will be partially fit to the data from this game
    # if set to false, this environment just allows a model to compete against a random player to evaluate performance
    # also checking that match_states is not an empty list
    if train_model and match_states:
        
        # dropping training data based on how early in the game it was
        # starting at 0.0, each subsequent sample has an additional 0.2 chance of being kept as training data
        drop_indexes = list()
        for n in range(len(match_states)):
            if random.random() > (n * drop_multiplier + drop_constant):
                drop_indexes.append(n)
                
        for idx in drop_indexes[::-1]:
            match_states.pop(idx)
            match_labels.pop(idx)
            alt_labels.pop(idx)
            
        if match_states:
            trained = True
            # converting our features, the match states, into a numpy matrix
            features = np.array(match_states)
            
            # converting our labels into a numpy array
            # if the model won, we use the winning labels
            # if the model lost, we use the alternate labels
            if status[1] == 1:
                labels = np.array(match_labels)
            else:
                labels = np.array(alt_labels)
            
            # a list explicitly stating the possible classes for our model - a required parameter for partial fitting
            classes = [x for x in range(1, 10)]
        
            if training_type == "sparse_winner" and status[1] == 1:
                model.partial_fit(features, labels, classes)
                
            elif training_type == "sparse_loser" and status[1] == -1:
                model.partial_fit(features, labels, classes)
                
            elif training_type == "dense":
                model.partial_fit(features, labels, classes)
    
    # if print_board set to true, will print a nice string formatting version of the board
    if print_board:
        print(game.print())
    
    return status[1], trained, match_states, match_labels, alt_labels

In [91]:
# agent_main = pickle.load(open("model_binaries/demo_agent_main.sav", "rb"))
agent_secondary = pickle.load(open("model_binaries/demo_agent_secondary.sav", "rb"))

In [97]:
training_wins = 0
training_loss = 0
training_draw = 0
passes = 0

for _ in range(10000):
    t = comp_environment(model=agent, opponent=agent_secondary, model_plays_first=False, train_model=True, training_type="sparse_winner", proba_threshold=1, threshold_type="lower", drop_multiplier=0.05, drop_constant=0.0, print_board=False)
    if t[0] == 1:
        training_wins += 1
    elif t[0] == 0:
        training_draw += 1
    else:
        training_loss += 1
    
    if t[1]:
        passes += 1

print("Passes:", passes, "wins:", training_wins, "losses:", training_loss, "draws:", training_draw)
    
win_ct = 0
loss_ct = 0
draw_ct = 0

for _ in range(5000):
    f = environment(model=agent, model_plays_first=True, train_model=False, print_board=False)
    
    if f[0] == 1:
        win_ct += 1
    elif f[0] == 0:
        draw_ct += 1
    else:
        loss_ct += 1
    
for _ in range(5000):
    s = environment(model=agent, model_plays_first=False, train_model=False, print_board=False)

    if s[0] == 1:
        win_ct += 1
    elif s[0] == 0:
        draw_ct += 1
    else:
        loss_ct += 1
        
win_ct, loss_ct, draw_ct

Passes: 2267 wins: 576 losses: 6226 draws: 3198


(6708, 2954, 338)