<a href="https://colab.research.google.com/github/Isamaoz/CE888---Data-Science-and-Decision-Making/blob/master/Assignment/OXO_Game_10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# First collection of data

This part uses the code given by the lecturers, it records the data from 500 games with itermax = 1000 for the first player and itermax = 100 for the second player

In [14]:
import pandas as pd
df = pd.DataFrame(columns=['Game No.','Past_Player','New_Player','Move_NP'])
df.head()

Unnamed: 0,Game No.,Past_Player,New_Player,Move_NP


In [15]:
# This is a very simple implementation of the UCT Monte Carlo Tree Search algorithm in Python 2.7.
# The function UCT(rootstate, itermax, verbose = False) is towards the bottom of the code.
# It aims to have the clearest and simplest possible code, and for the sake of clarity, the code
# is orders of magnitude less efficient than it could be made, particularly by using a 
# state.GetRandomMove() or state.DoRandomRollout() function.
# 
# Example GameState classes for Nim, OXO and Othello are included to give some idea of how you
# can write your own GameState use UCT in your 2-player game. Change the game to be played in 
# the UCTPlayGame() function at the bottom of the code.
# 
# Written by Peter Cowling, Ed Powley, Daniel Whitehouse (University of York, UK) September 2012.
# 
# Licence is granted to freely use and distribute for any sensible/legal purpose so long as this comment
# remains in any distributed code.
# 
# For more information about Monte Carlo Tree Search check out our web site at www.mcts.ai

from math import *
import random

class GameState:
    """ A state of the game, i.e. the game board. These are the only functions which are
        absolutely necessary to implement UCT in any 2-player complete information deterministic 
        zero-sum game, although they can be enhanced and made quicker, for example by using a 
        GetRandomMove() function to generate a random move during rollout.
        By convention the players are numbered 1 and 2.
    """
    def __init__(self):
            self.playerJustMoved = 2 # At the root pretend the player just moved is player 2 - player 1 has the first move
        
    def Clone(self):
        """ Create a deep clone of this game state.
        """
        st = GameState()
        st.playerJustMoved = self.playerJustMoved
        return st

    def DoMove(self, move):
        """ Update a state by carrying out the given move.
            Must update playerJustMoved.
        """
        self.playerJustMoved = 3 - self.playerJustMoved
        
    def GetMoves(self):
        """ Get all possible moves from this state.
        """
    
    def GetResult(self, playerjm):
        """ Get the game result from the viewpoint of playerjm. 
        """

    def __repr__(self):
        """ Don't need this - but good style.
        """
        pass


class OXOState:
    """ A state of the game, i.e. the game board.
        Squares in the board are in this arrangement
        012
        345
        678
        where 0 = empty, 1 = player 1 (X), 2 = player 2 (O)
    """
    def __init__(self):
        self.playerJustMoved = 2 # At the root pretend the player just moved is p2 - p1 has the first move
        self.board = [0,0,0,0,0,0,0,0,0] # 0 = empty, 1 = player 1, 2 = player 2
        
    def Clone(self):
        """ Create a deep clone of this game state.
        """
        st = OXOState()
        st.playerJustMoved = self.playerJustMoved
        st.board = self.board[:]
        return st

    def DoMove(self, move):
        """ Update a state by carrying out the given move.
            Must update playerToMove.
        """
        assert move >= 0 and move <= 8 and move == int(move) and self.board[move] == 0
        self.playerJustMoved = 3 - self.playerJustMoved
        self.board[move] = self.playerJustMoved
        
    def GetMoves(self):
        """ Get all possible moves from this state.
        """
        return [i for i in range(9) if self.board[i] == 0]
    
    def GetResult(self, playerjm):
        """ Get the game result from the viewpoint of playerjm. 
        """
        for (x,y,z) in [(0,1,2),(3,4,5),(6,7,8),(0,3,6),(1,4,7),(2,5,8),(0,4,8),(2,4,6)]:
            if self.board[x] == self.board[y] == self.board[z]:
                if self.board[x] == playerjm:
                    return 1.0
                else:
                    return 0.0
        if self.GetMoves() == []: return 0.5 # draw
        return False # Should not be possible to get here

    def __repr__(self):
        s= ""
        for i in range(9): 
            s += ".XO"[self.board[i]]
            if i % 3 == 2: s += "\n"
        return s


class Node:
    """ A node in the game tree. Note wins is always from the viewpoint of playerJustMoved.
        Crashes if state not specified.
    """
    def __init__(self, move = None, parent = None, state = None):
        self.move = move # the move that got us to this node - "None" for the root node
        self.parentNode = parent # "None" for the root node
        self.childNodes = []
        self.wins = 0
        self.visits = 0
        self.untriedMoves = state.GetMoves() # future child nodes
        self.playerJustMoved = state.playerJustMoved # the only part of the state that the Node needs later
        
    def UCTSelectChild(self):
        """ Use the UCB1 formula to select a child node. Often a constant UCTK is applied so we have
            lambda c: c.wins/c.visits + UCTK * sqrt(2*log(self.visits)/c.visits to vary the amount of
            exploration versus exploitation.
        """
        s = sorted(self.childNodes, key = lambda c: c.wins/c.visits + sqrt(2*log(self.visits)/c.visits))[-1]
        return s
    
    def AddChild(self, m, s):
        """ Remove m from untriedMoves and add a new child node for this move.
            Return the added child node
        """
        n = Node(move = m, parent = self, state = s)
        self.untriedMoves.remove(m)
        self.childNodes.append(n)
        return n
    
    def Update(self, result):
        """ Update this node - one additional visit and result additional wins. result must be from the viewpoint of playerJustmoved.
        """
        self.visits += 1
        self.wins += result

    def __repr__(self):
        return "[M:" + str(self.move) + " W/V:" + str(self.wins) + "/" + str(self.visits) + " U:" + str(self.untriedMoves) + "]"

    def TreeToString(self, indent):
        s = self.IndentString(indent) + str(self)
        for c in self.childNodes:
             s += c.TreeToString(indent+1)
        return s

    def IndentString(self,indent):
        s = "\n"
        for i in range (1,indent+1):
            s += "| "
        return s

    def ChildrenToString(self):
        s = ""
        for c in self.childNodes:
             s += str(c) + "\n"
        return s


def UCT(rootstate, itermax, verbose = False):
    """ Conduct a UCT search for itermax iterations starting from rootstate.
        Return the best move from the rootstate.
        Assumes 2 alternating players (player 1 starts), with game results in the range [0.0, 1.0]."""

    rootnode = Node(state = rootstate)

    for i in range(itermax):
        node = rootnode
        state = rootstate.Clone()

        # Select
        while node.untriedMoves == [] and node.childNodes != []: # node is fully expanded and non-terminal
            node = node.UCTSelectChild()
            state.DoMove(node.move)

        # Expand
        if node.untriedMoves != []:  # if we can expand (i.e. state/node is non-terminal)
            m = random.choice(node.untriedMoves) 
            state.DoMove(m)
            node = node.AddChild(m, state)  # add child and descend tree

        # Rollout - this can often be made orders of magnitude quicker using a state.GetRandomMove() function
        while state.GetMoves() != []: # while state is non-terminal
            state.DoMove(random.choice(state.GetMoves()))

        # Backpropagate
        while node != None: # backpropagate from the expanded node and work back to the root node
            node.Update(state.GetResult(node.playerJustMoved)) # state is terminal. Update node with result from POV of node.playerJustMoved
            node = node.parentNode

    # Output some information about the tree - can be omitted
    #if verbose: print(rootnode.TreeToString(0))
    #else: print(rootnode.ChildrenToString())

    return sorted(rootnode.childNodes, key = lambda c: c.visits)[-1].move # return the move that was most visited
                
def UCTPlayGame(x, counter):
    """ Play a sample game between two UCT players where each player gets a different number 
        of UCT iterations (= simulations = tree nodes).
    """
    state = OXOState() # uncomment to play OXO
    while state.GetMoves() != []:
        #print(str(state))
        for i in range (9):
          df.loc[counter,i] = state.board[i]  
        if state.playerJustMoved == 1:
            m = UCT(rootstate=state, itermax=1000, verbose=False)  # play with values for itermax and verbose = True
        else:
            m = UCT(rootstate=state, itermax=100, verbose=False)
        #print("Best Move: " + str(m) + "\n")
        df.loc[counter,'Move_NP'] = m                                 #Prints the best move
        df.loc[counter,'Past_Player'] = state.playerJustMoved         #Prints the player that made the last move
        df.loc[counter,'New_Player'] = 3 - state.playerJustMoved      #Prints the player thai is about to play the 'best move'
        df.loc[counter,'Game No.'] = x + 1                            #Pints the numer of game
        counter += 1
        #print("Game no. ", x)
        state.DoMove(m)
        if state.GetResult(state.playerJustMoved) != False:
            #print(str(state))
            break
    if state.GetResult(state.playerJustMoved) == 1.0:
        flag = state.playerJustMoved
        #print("Player " + str(state.playerJustMoved) + " wins!")
    elif state.GetResult(state.playerJustMoved) == 0.0:
        #print("Player " + str(3 - state.playerJustMoved) + " wins!")
        flag = 3 - state.playerJustMoved
    else: 
        #print("Nobody wins!")
        flag = 0
    return counter, flag
if __name__ == "__main__":
    """ Play a single game to the end using UCT for both players. 
    """
    p1_wins = 0
    p2_wins = 0
    no_wins = 0
    counter = 0
    for x in range(500):
        new_row, flag = UCTPlayGame(x,counter)
        if (flag == 1):
          p1_wins = p1_wins + 1
        elif(flag == 2):
          p2_wins = p2_wins + 1
        elif (flag == 0):
          no_wins = no_wins + 1
        counter = new_row
    print("Player 1 won", p1_wins, "times")
    print("Player 2 won", p2_wins, "times")
    print("Number of draws:", no_wins)

Player 1 won 34 times
Player 2 won 30 times
Number of draws: 436


In [16]:
df

Unnamed: 0,Game No.,Past_Player,New_Player,Move_NP,0,1,2,3,4,5,6,7,8
0,1,2,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,1,2,4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1,2,1,8,1.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0
3,1,1,2,1,1.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,1.0
4,1,2,1,7,1.0,2.0,0.0,0.0,2.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4365,500,2,1,1,2.0,0.0,0.0,1.0,1.0,2.0,0.0,0.0,0.0
4366,500,1,2,7,2.0,1.0,0.0,1.0,1.0,2.0,0.0,0.0,0.0
4367,500,2,1,2,2.0,1.0,0.0,1.0,1.0,2.0,0.0,2.0,0.0
4368,500,1,2,6,2.0,1.0,1.0,1.0,1.0,2.0,0.0,2.0,0.0


In [0]:
import pandas as pd

df.to_csv(r'OXO.csv',index = None, header=True)

In [18]:
from sklearn import tree

model = tree.DecisionTreeClassifier()
inputs = df.drop(['Move_NP', 'Game No.', 'Past_Player', 'New_Player'], axis='columns')
inputs.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0
3,1.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,1.0
4,1.0,2.0,0.0,0.0,2.0,0.0,0.0,0.0,1.0


In [19]:
type(inputs)

pandas.core.frame.DataFrame

In [20]:
df.columns

Index(['Game No.', 'Past_Player', 'New_Player', 'Move_NP', 0, 1, 2, 3, 4, 5, 6,
       7, 8],
      dtype='object')

In [21]:
target = df['Move_NP']
target=target.astype('int')
target

0       0
1       4
2       8
3       1
4       7
       ..
4365    1
4366    7
4367    2
4368    6
4369    8
Name: Move_NP, Length: 4370, dtype: int64

In [22]:
model.fit(inputs, target)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')

#Second data recording

Create new dataframe for new data

In [23]:
df2 = pd.DataFrame(columns=['Game No.','Past_Player','New_Player','Move_NP'])
df2.head()

Unnamed: 0,Game No.,Past_Player,New_Player,Move_NP


Change definition of Rollout to use the Decision Tree

In [37]:
import numpy as np

def UCT(selector, rootstate, itermax, verbose = False):
    """ Conduct a UCT search for itermax iterations starting from rootstate.
        Return the best move from the rootstate.
        Assumes 2 alternating players (player 1 starts), with game results in the range [0.0, 1.0]."""

    rootnode = Node(state = rootstate)

    for i in range(itermax):
        node = rootnode
        state = rootstate.Clone()

        # Select
        while node.untriedMoves == [] and node.childNodes != []: # node is fully expanded and non-terminal
            node = node.UCTSelectChild()
            state.DoMove(node.move)

        # Expand
        if node.untriedMoves != []:  # if we can expand (i.e. state/node is non-terminal)
            m = random.choice(node.untriedMoves) 
            state.DoMove(m)
            node = node.AddChild(m, state)  # add child and descend tree

        # Rollout - this can often be made orders of magnitude quicker using a state.GetRandomMove() function
        while state.GetMoves() != []: # while state is non-terminal
            #print("Moves", state.GetMoves())
            if(selector <= 8):
              board_re = np.reshape(state.board,(-1,9))
              #print(board_re)
              prediction = model.predict(board_re)
              pred = prediction.astype(np.int64)
              #print(pred[0])

              if (pred[0] in state.GetMoves()):
                state.DoMove(pred[0])
              else:
                state.DoMove(random.choice(state.GetMoves()))
            else:
              state.DoMove(random.choice(state.GetMoves()))

        # Backpropagate
        while node != None: # backpropagate from the expanded node and work back to the root node
            node.Update(state.GetResult(node.playerJustMoved)) # state is terminal. Update node with result from POV of node.playerJustMoved
            node = node.parentNode

    # Output some information about the tree - can be omitted
    #if verbose: print(rootnode.TreeToString(0))
    #else: print(rootnode.ChildrenToString())

    return sorted(rootnode.childNodes, key = lambda c: c.visits)[-1].move # return the move that was most visited
                
def UCTPlayGame(x, counter):
    """ Play a sample game between two UCT players where each player gets a different number 
        of UCT iterations (= simulations = tree nodes).
    """
    state = OXOState() # uncomment to play OXO
    while state.GetMoves() != []:
        #print(str(state))
        for i in range (9):
          df2.loc[counter,i] = state.board[i]  

        randomSel = random.randint(0,9)   
        if state.playerJustMoved == 1:
            m = UCT(selector=randomSel, rootstate=state, itermax=1000, verbose=False)  # play with values for itermax and verbose = True
        else:
            m = UCT(selector=randomSel, rootstate=state, itermax=100, verbose=False)
        #print("Best Move: " + str(m) + "\n")
        df2.loc[counter,'Move_NP'] = m                                 #Prints the best move
        df2.loc[counter,'Past_Player'] = state.playerJustMoved         #Prints the player that made the last move
        df2.loc[counter,'New_Player'] = 3 - state.playerJustMoved      #Prints the player thai is about to play the 'best move'
        df2.loc[counter,'Game No.'] = x + 1                            #Pints the numer of game
        counter += 1
        #print("Game no. ", x)
        state.DoMove(m)
        if state.GetResult(state.playerJustMoved) != False:
            #print(str(state))
            break
    if state.GetResult(state.playerJustMoved) == 1.0:
        flag = state.playerJustMoved
        #print("Player " + str(state.playerJustMoved) + " wins!")
    elif state.GetResult(state.playerJustMoved) == 0.0:
        #print("Player " + str(3 - state.playerJustMoved) + " wins!")
        flag = 3 - state.playerJustMoved
    else: 
        #print("Nobody wins!")
        flag = 0
    return counter, flag
if __name__ == "__main__":
    """ Play a single game to the end using UCT for both players. 
    """
    p1_wins = 0
    p2_wins = 0
    no_wins = 0
    counter = 0
    for x in range(500):
        new_row, flag = UCTPlayGame(x,counter)
        if (flag == 1):
          p1_wins = p1_wins + 1
        elif(flag == 2):
          p2_wins = p2_wins + 1
        elif (flag == 0):
          no_wins = no_wins + 1
        counter = new_row
    print("Player 1 won", p1_wins, "times")
    print("Player 2 won", p2_wins, "times")
    print("Number of draws:", no_wins)

Player 1 won 7 times
Player 2 won 4 times
Number of draws: 489


In [0]:
#Save second dataset as csv file
df2.to_csv(r'OXO_2.csv',index = None, header=True)

In [39]:
model_2 = tree.DecisionTreeClassifier()
inputs_2 = df2.drop(['Move_NP', 'Game No.', 'Past_Player', 'New_Player'], axis='columns')
target_2 = df2['Move_NP']
target_2 = target_2.astype('int')

model_2.fit(inputs_2, target_2)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')

#Third data recording

In [40]:
df3 = pd.DataFrame(columns=['Game No.','Past_Player','New_Player','Move_NP'])
df3.head()

Unnamed: 0,Game No.,Past_Player,New_Player,Move_NP


In [42]:
def UCT(selector, rootstate, itermax, verbose = False):
    """ Conduct a UCT search for itermax iterations starting from rootstate.
        Return the best move from the rootstate.
        Assumes 2 alternating players (player 1 starts), with game results in the range [0.0, 1.0]."""

    rootnode = Node(state = rootstate)

    for i in range(itermax):
        node = rootnode
        state = rootstate.Clone()

        # Select
        while node.untriedMoves == [] and node.childNodes != []: # node is fully expanded and non-terminal
            node = node.UCTSelectChild()
            state.DoMove(node.move)

        # Expand
        if node.untriedMoves != []:  # if we can expand (i.e. state/node is non-terminal)
            m = random.choice(node.untriedMoves) 
            state.DoMove(m)
            node = node.AddChild(m, state)  # add child and descend tree

        # Rollout - this can often be made orders of magnitude quicker using a state.GetRandomMove() function
        while state.GetMoves() != []: # while state is non-terminal
            #print("Moves", state.GetMoves())
            if(selector <= 8):
              board_re = np.reshape(state.board,(-1,9))
              #print(board_re)
              # Emulate do-while to continue predicting if the answer is not within the possible moves
              #while True:
              prediction = model_2.predict(board_re)
              pred = prediction.astype(np.int64)
              #print(pred[0])
                #if (pred[0] in state.GetMoves()):
                  #break;
              #print(prediction)

              #print(type(pred[0]))
              if (pred[0] in state.GetMoves()):
                state.DoMove(pred[0])
              else:
                state.DoMove(random.choice(state.GetMoves()))
            else:
            #print(type(random.choice(state.GetMoves)))
              state.DoMove(random.choice(state.GetMoves()))

        # Backpropagate
        while node != None: # backpropagate from the expanded node and work back to the root node
            node.Update(state.GetResult(node.playerJustMoved)) # state is terminal. Update node with result from POV of node.playerJustMoved
            node = node.parentNode

    # Output some information about the tree - can be omitted
    #if verbose: print(rootnode.TreeToString(0))
    #else: print(rootnode.ChildrenToString())

    return sorted(rootnode.childNodes, key = lambda c: c.visits)[-1].move # return the move that was most visited
                
def UCTPlayGame(x, counter):
    """ Play a sample game between two UCT players where each player gets a different number 
        of UCT iterations (= simulations = tree nodes).
    """
    state = OXOState() # uncomment to play OXO
    while state.GetMoves() != []:
        #print(str(state))
        for i in range (9):
          df3.loc[counter,i] = state.board[i]  

        randomSel = random.randint(0,9)   
        if state.playerJustMoved == 1:
            m = UCT(selector=randomSel, rootstate=state, itermax=1000, verbose=False)  # play with values for itermax and verbose = True
        else:
            m = UCT(selector=randomSel, rootstate=state, itermax=100, verbose=False)
        #print("Best Move: " + str(m) + "\n")
        df3.loc[counter,'Move_NP'] = m                                 #Prints the best move
        df3.loc[counter,'Past_Player'] = state.playerJustMoved         #Prints the player that made the last move
        df3.loc[counter,'New_Player'] = 3 - state.playerJustMoved      #Prints the player thai is about to play the 'best move'
        df3.loc[counter,'Game No.'] = x + 1                            #Pints the numer of game
        counter += 1
        #print("Game no. ", x)
        state.DoMove(m)
        if state.GetResult(state.playerJustMoved) != False:
            #print(str(state))
            break
    if state.GetResult(state.playerJustMoved) == 1.0:
        flag = state.playerJustMoved
        #print("Player " + str(state.playerJustMoved) + " wins!")
    elif state.GetResult(state.playerJustMoved) == 0.0:
        #print("Player " + str(3 - state.playerJustMoved) + " wins!")
        flag = 3 - state.playerJustMoved
    else: 
        #print("Nobody wins!")
        flag = 0
    return counter, flag
if __name__ == "__main__":
    """ Play a single game to the end using UCT for both players. 
    """
    p1_wins = 0
    p2_wins = 0
    no_wins = 0
    counter = 0
    for x in range(500):
        new_row, flag = UCTPlayGame(x,counter)
        if (flag == 1):
          p1_wins = p1_wins + 1
        elif(flag == 2):
          p2_wins = p2_wins + 1
        elif (flag == 0):
          no_wins = no_wins + 1
        counter = new_row
    print("Player 1 won", p1_wins, "times")
    print("Player 2 won", p2_wins, "times")
    print("Number of draws:", no_wins)

Player 1 won 10 times
Player 2 won 5 times
Number of draws: 485


In [0]:
#Write dataframe in csv file
df3.to_csv(r'OXO_3.csv',index = None, header=True)

In [44]:
#Decision Tree Classifier after 2 iterations
model_3 = tree.DecisionTreeClassifier()
inputs_3 = df3.drop(['Move_NP', 'Game No.', 'Past_Player', 'New_Player'], axis='columns')
target_3 = df3['Move_NP']
target_3 = target_3.astype('int')

model_3.fit(inputs_3, target_3)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')

#Fourth data collection

In [46]:
df4 = pd.DataFrame(columns=['Game No.','Past_Player','New_Player','Move_NP'])

def UCT(selector, rootstate, itermax, verbose = False):
    """ Conduct a UCT search for itermax iterations starting from rootstate.
        Return the best move from the rootstate.
        Assumes 2 alternating players (player 1 starts), with game results in the range [0.0, 1.0]."""

    rootnode = Node(state = rootstate)

    for i in range(itermax):
        node = rootnode
        state = rootstate.Clone()

        # Select
        while node.untriedMoves == [] and node.childNodes != []: # node is fully expanded and non-terminal
            node = node.UCTSelectChild()
            state.DoMove(node.move)

        # Expand
        if node.untriedMoves != []:  # if we can expand (i.e. state/node is non-terminal)
            m = random.choice(node.untriedMoves) 
            state.DoMove(m)
            node = node.AddChild(m, state)  # add child and descend tree

        # Rollout - this can often be made orders of magnitude quicker using a state.GetRandomMove() function
        while state.GetMoves() != []: # while state is non-terminal
            #print("Moves", state.GetMoves())
            if(selector <= 8):
              board_re = np.reshape(state.board,(-1,9))
              #print(board_re)
              # Emulate do-while to continue predicting if the answer is not within the possible moves
              #while True:
              prediction = model_3.predict(board_re)
              pred = prediction.astype(np.int64)
              #print(pred[0])
                #if (pred[0] in state.GetMoves()):
                  #break;
              #print(prediction)

              #print(type(pred[0]))
              if (pred[0] in state.GetMoves()):
                state.DoMove(pred[0])
              else:
                state.DoMove(random.choice(state.GetMoves()))
            else:
            #print(type(random.choice(state.GetMoves)))
              state.DoMove(random.choice(state.GetMoves()))

        # Backpropagate
        while node != None: # backpropagate from the expanded node and work back to the root node
            node.Update(state.GetResult(node.playerJustMoved)) # state is terminal. Update node with result from POV of node.playerJustMoved
            node = node.parentNode

    # Output some information about the tree - can be omitted
    #if verbose: print(rootnode.TreeToString(0))
    #else: print(rootnode.ChildrenToString())

    return sorted(rootnode.childNodes, key = lambda c: c.visits)[-1].move # return the move that was most visited
                
def UCTPlayGame(x, counter):
    """ Play a sample game between two UCT players where each player gets a different number 
        of UCT iterations (= simulations = tree nodes).
    """
    state = OXOState() # uncomment to play OXO
    while state.GetMoves() != []:
        #print(str(state))
        for i in range (9):
          df4.loc[counter,i] = state.board[i]  

        randomSel = random.randint(0,9)   
        if state.playerJustMoved == 1:
            m = UCT(selector=randomSel, rootstate=state, itermax=1000, verbose=False)  # play with values for itermax and verbose = True
        else:
            m = UCT(selector=randomSel, rootstate=state, itermax=50, verbose=False)
        #print("Best Move: " + str(m) + "\n")
        df4.loc[counter,'Move_NP'] = m                                 #Prints the best move
        df4.loc[counter,'Past_Player'] = state.playerJustMoved         #Prints the player that made the last move
        df4.loc[counter,'New_Player'] = 3 - state.playerJustMoved      #Prints the player thai is about to play the 'best move'
        df4.loc[counter,'Game No.'] = x + 1                            #Pints the numer of game
        counter += 1
        #print("Game no. ", x)
        state.DoMove(m)
        if state.GetResult(state.playerJustMoved) != False:
            #print(str(state))
            break
    if state.GetResult(state.playerJustMoved) == 1.0:
        flag = state.playerJustMoved
        #print("Player " + str(state.playerJustMoved) + " wins!")
    elif state.GetResult(state.playerJustMoved) == 0.0:
        #print("Player " + str(3 - state.playerJustMoved) + " wins!")
        flag = 3 - state.playerJustMoved
    else: 
        #print("Nobody wins!")
        flag = 0
    return counter, flag
if __name__ == "__main__":
    """ Play a single game to the end using UCT for both players. 
    """
    p1_wins = 0
    p2_wins = 0
    no_wins = 0
    counter = 0
    for x in range(500):
        new_row, flag = UCTPlayGame(x,counter)
        if (flag == 1):
          p1_wins = p1_wins + 1
        elif(flag == 2):
          p2_wins = p2_wins + 1
        elif (flag == 0):
          no_wins = no_wins + 1
        counter = new_row
    print("Player 1 won", p1_wins, "times")
    print("Player 2 won", p2_wins, "times")
    print("Number of draws:", no_wins)

    #Write dataframe in csv file
    df4.to_csv(r'OXO_4.csv',index = None, header=True)

Player 1 won 29 times
Player 2 won 30 times
Number of draws: 441


In [56]:
#Decision Tree Classifier after 3 iterations
model_4 = tree.DecisionTreeClassifier()
inputs_4 = df4.drop(['Move_NP', 'Game No.', 'Past_Player', 'New_Player'], axis='columns')
target_4 = df4['Move_NP']
target_4 = target_4.astype('int')

model_4.fit(inputs_4, target_4)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')

#Fifth data collection

In [58]:
df5 = pd.DataFrame(columns=['Game No.','Past_Player','New_Player','Move_NP'])

def UCT(selector, rootstate, itermax, verbose = False):
    """ Conduct a UCT search for itermax iterations starting from rootstate.
        Return the best move from the rootstate.
        Assumes 2 alternating players (player 1 starts), with game results in the range [0.0, 1.0]."""

    rootnode = Node(state = rootstate)

    for i in range(itermax):
        node = rootnode
        state = rootstate.Clone()

        # Select
        while node.untriedMoves == [] and node.childNodes != []: # node is fully expanded and non-terminal
            node = node.UCTSelectChild()
            state.DoMove(node.move)

        # Expand
        if node.untriedMoves != []:  # if we can expand (i.e. state/node is non-terminal)
            m = random.choice(node.untriedMoves) 
            state.DoMove(m)
            node = node.AddChild(m, state)  # add child and descend tree

        # Rollout - this can often be made orders of magnitude quicker using a state.GetRandomMove() function
        while state.GetMoves() != []: # while state is non-terminal
            #print("Moves", state.GetMoves())
            if(selector <= 8):
              board_re = np.reshape(state.board,(-1,9))
              #print(board_re)
              # Emulate do-while to continue predicting if the answer is not within the possible moves
              #while True:
              prediction = model_4.predict(board_re)
              pred = prediction.astype(np.int64)
              #print(pred[0])
                #if (pred[0] in state.GetMoves()):
                  #break;
              #print(prediction)

              #print(type(pred[0]))
              if (pred[0] in state.GetMoves()):
                state.DoMove(pred[0])
              else:
                state.DoMove(random.choice(state.GetMoves()))
            else:
            #print(type(random.choice(state.GetMoves)))
              state.DoMove(random.choice(state.GetMoves()))

        # Backpropagate
        while node != None: # backpropagate from the expanded node and work back to the root node
            node.Update(state.GetResult(node.playerJustMoved)) # state is terminal. Update node with result from POV of node.playerJustMoved
            node = node.parentNode

    # Output some information about the tree - can be omitted
    #if verbose: print(rootnode.TreeToString(0))
    #else: print(rootnode.ChildrenToString())

    return sorted(rootnode.childNodes, key = lambda c: c.visits)[-1].move # return the move that was most visited
                
def UCTPlayGame(x, counter):
    """ Play a sample game between two UCT players where each player gets a different number 
        of UCT iterations (= simulations = tree nodes).
    """
    state = OXOState() # uncomment to play OXO
    while state.GetMoves() != []:
        #print(str(state))
        for i in range (9):
          df5.loc[counter,i] = state.board[i]  

        randomSel = random.randint(0,9)   
        if state.playerJustMoved == 1:
            m = UCT(selector=randomSel, rootstate=state, itermax=1000, verbose=False)  # play with values for itermax and verbose = True
        else:
            m = UCT(selector=randomSel, rootstate=state, itermax=100, verbose=False)
        #print("Best Move: " + str(m) + "\n")
        df5.loc[counter,'Move_NP'] = m                                 #Prints the best move
        df5.loc[counter,'Past_Player'] = state.playerJustMoved         #Prints the player that made the last move
        df5.loc[counter,'New_Player'] = 3 - state.playerJustMoved      #Prints the player thai is about to play the 'best move'
        df5.loc[counter,'Game No.'] = x + 1                            #Pints the numer of game
        counter += 1
        #print("Game no. ", x)
        state.DoMove(m)
        if state.GetResult(state.playerJustMoved) != False:
            #print(str(state))
            break
    if state.GetResult(state.playerJustMoved) == 1.0:
        flag = state.playerJustMoved
        #print("Player " + str(state.playerJustMoved) + " wins!")
    elif state.GetResult(state.playerJustMoved) == 0.0:
        #print("Player " + str(3 - state.playerJustMoved) + " wins!")
        flag = 3 - state.playerJustMoved
    else: 
        #print("Nobody wins!")
        flag = 0
    return counter, flag
if __name__ == "__main__":
    """ Play a single game to the end using UCT for both players. 
    """
    p1_wins = 0
    p2_wins = 0
    no_wins = 0
    counter = 0
    for x in range(500):
        new_row, flag = UCTPlayGame(x,counter)
        if (flag == 1):
          p1_wins = p1_wins + 1
        elif(flag == 2):
          p2_wins = p2_wins + 1
        elif (flag == 0):
          no_wins = no_wins + 1
        counter = new_row
    print("Player 1 won", p1_wins, "times")
    print("Player 2 won", p2_wins, "times")
    print("Number of draws:", no_wins)

    #Write dataframe in csv file
    df5.to_csv(r'OXO_5.csv',index = None, header=True)

Player 1 won 6 times
Player 2 won 4 times
Number of draws: 490


In [59]:
#Decision Tree Classifier after 3 iterations
model_5 = tree.DecisionTreeClassifier()
inputs_5 = df5.drop(['Move_NP', 'Game No.', 'Past_Player', 'New_Player'], axis='columns')
target_5 = df5['Move_NP']
target_5 = target_5.astype('int')

model_5.fit(inputs_5, target_5)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')

#Sixth data collection

In [60]:
df6 = pd.DataFrame(columns=['Game No.','Past_Player','New_Player','Move_NP'])

def UCT(selector, rootstate, itermax, verbose = False):
    """ Conduct a UCT search for itermax iterations starting from rootstate.
        Return the best move from the rootstate.
        Assumes 2 alternating players (player 1 starts), with game results in the range [0.0, 1.0]."""

    rootnode = Node(state = rootstate)

    for i in range(itermax):
        node = rootnode
        state = rootstate.Clone()

        # Select
        while node.untriedMoves == [] and node.childNodes != []: # node is fully expanded and non-terminal
            node = node.UCTSelectChild()
            state.DoMove(node.move)

        # Expand
        if node.untriedMoves != []:  # if we can expand (i.e. state/node is non-terminal)
            m = random.choice(node.untriedMoves) 
            state.DoMove(m)
            node = node.AddChild(m, state)  # add child and descend tree

        # Rollout - this can often be made orders of magnitude quicker using a state.GetRandomMove() function
        while state.GetMoves() != []: # while state is non-terminal
            #print("Moves", state.GetMoves())
            if(selector <= 8):
              board_re = np.reshape(state.board,(-1,9))
              #print(board_re)
              # Emulate do-while to continue predicting if the answer is not within the possible moves
              #while True:
              prediction = model_5.predict(board_re)
              pred = prediction.astype(np.int64)
              #print(pred[0])
                #if (pred[0] in state.GetMoves()):
                  #break;
              #print(prediction)

              #print(type(pred[0]))
              if (pred[0] in state.GetMoves()):
                state.DoMove(pred[0])
              else:
                state.DoMove(random.choice(state.GetMoves()))
            else:
            #print(type(random.choice(state.GetMoves)))
              state.DoMove(random.choice(state.GetMoves()))

        # Backpropagate
        while node != None: # backpropagate from the expanded node and work back to the root node
            node.Update(state.GetResult(node.playerJustMoved)) # state is terminal. Update node with result from POV of node.playerJustMoved
            node = node.parentNode

    # Output some information about the tree - can be omitted
    #if verbose: print(rootnode.TreeToString(0))
    #else: print(rootnode.ChildrenToString())

    return sorted(rootnode.childNodes, key = lambda c: c.visits)[-1].move # return the move that was most visited
                
def UCTPlayGame(x, counter):
    """ Play a sample game between two UCT players where each player gets a different number 
        of UCT iterations (= simulations = tree nodes).
    """
    state = OXOState() # uncomment to play OXO
    while state.GetMoves() != []:
        #print(str(state))
        for i in range (9):
          df6.loc[counter,i] = state.board[i]  

        randomSel = random.randint(0,9)   
        if state.playerJustMoved == 1:
            m = UCT(selector=randomSel, rootstate=state, itermax=1000, verbose=False)  # play with values for itermax and verbose = True
        else:
            m = UCT(selector=randomSel, rootstate=state, itermax=100, verbose=False)
        #print("Best Move: " + str(m) + "\n")
        df6.loc[counter,'Move_NP'] = m                                 #Prints the best move
        df6.loc[counter,'Past_Player'] = state.playerJustMoved         #Prints the player that made the last move
        df6.loc[counter,'New_Player'] = 3 - state.playerJustMoved      #Prints the player thai is about to play the 'best move'
        df6.loc[counter,'Game No.'] = x + 1                            #Pints the numer of game
        counter += 1
        #print("Game no. ", x)
        state.DoMove(m)
        if state.GetResult(state.playerJustMoved) != False:
            #print(str(state))
            break
    if state.GetResult(state.playerJustMoved) == 1.0:
        flag = state.playerJustMoved
        #print("Player " + str(state.playerJustMoved) + " wins!")
    elif state.GetResult(state.playerJustMoved) == 0.0:
        #print("Player " + str(3 - state.playerJustMoved) + " wins!")
        flag = 3 - state.playerJustMoved
    else: 
        #print("Nobody wins!")
        flag = 0
    return counter, flag
if __name__ == "__main__":
    """ Play a single game to the end using UCT for both players. 
    """
    p1_wins = 0
    p2_wins = 0
    no_wins = 0
    counter = 0
    for x in range(500):
        new_row, flag = UCTPlayGame(x,counter)
        if (flag == 1):
          p1_wins = p1_wins + 1
        elif(flag == 2):
          p2_wins = p2_wins + 1
        elif (flag == 0):
          no_wins = no_wins + 1
        counter = new_row
    print("Player 1 won", p1_wins, "times")
    print("Player 2 won", p2_wins, "times")
    print("Number of draws:", no_wins)

    #Write dataframe in csv file
    df6.to_csv(r'OXO_6.csv',index = None, header=True)

Player 1 won 64 times
Player 2 won 27 times
Number of draws: 409


In [61]:
#Decision Tree Classifier after 3 iterations
model_6 = tree.DecisionTreeClassifier()
inputs_6 = df6.drop(['Move_NP', 'Game No.', 'Past_Player', 'New_Player'], axis='columns')
target_6 = df6['Move_NP']
target_6 = target_6.astype('int')

model_6.fit(inputs_6, target_6)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')

#Seventh Data Collection

In [63]:
df7 = pd.DataFrame(columns=['Game No.','Past_Player','New_Player','Move_NP'])

def UCT(selector, rootstate, itermax, verbose = False):
    """ Conduct a UCT search for itermax iterations starting from rootstate.
        Return the best move from the rootstate.
        Assumes 2 alternating players (player 1 starts), with game results in the range [0.0, 1.0]."""

    rootnode = Node(state = rootstate)

    for i in range(itermax):
        node = rootnode
        state = rootstate.Clone()

        # Select
        while node.untriedMoves == [] and node.childNodes != []: # node is fully expanded and non-terminal
            node = node.UCTSelectChild()
            state.DoMove(node.move)

        # Expand
        if node.untriedMoves != []:  # if we can expand (i.e. state/node is non-terminal)
            m = random.choice(node.untriedMoves) 
            state.DoMove(m)
            node = node.AddChild(m, state)  # add child and descend tree

        # Rollout - this can often be made orders of magnitude quicker using a state.GetRandomMove() function
        while state.GetMoves() != []: # while state is non-terminal
            #print("Moves", state.GetMoves())
            if(selector <= 8):
              board_re = np.reshape(state.board,(-1,9))
              #print(board_re)
              # Emulate do-while to continue predicting if the answer is not within the possible moves
              #while True:
              prediction = model_6.predict(board_re)
              pred = prediction.astype(np.int64)
              #print(pred[0])
                #if (pred[0] in state.GetMoves()):
                  #break;
              #print(prediction)

              #print(type(pred[0]))
              if (pred[0] in state.GetMoves()):
                state.DoMove(pred[0])
              else:
                state.DoMove(random.choice(state.GetMoves()))
            else:
            #print(type(random.choice(state.GetMoves)))
              state.DoMove(random.choice(state.GetMoves()))

        # Backpropagate
        while node != None: # backpropagate from the expanded node and work back to the root node
            node.Update(state.GetResult(node.playerJustMoved)) # state is terminal. Update node with result from POV of node.playerJustMoved
            node = node.parentNode

    # Output some information about the tree - can be omitted
    #if verbose: print(rootnode.TreeToString(0))
    #else: print(rootnode.ChildrenToString())

    return sorted(rootnode.childNodes, key = lambda c: c.visits)[-1].move # return the move that was most visited
                
def UCTPlayGame(x, counter):
    """ Play a sample game between two UCT players where each player gets a different number 
        of UCT iterations (= simulations = tree nodes).
    """
    state = OXOState() # uncomment to play OXO
    while state.GetMoves() != []:
        #print(str(state))
        for i in range (9):
          df7.loc[counter,i] = state.board[i]  

        randomSel = random.randint(0,9)   
        if state.playerJustMoved == 1:
            m = UCT(selector=randomSel, rootstate=state, itermax=1000, verbose=False)  # play with values for itermax and verbose = True
        else:
            m = UCT(selector=randomSel, rootstate=state, itermax=100, verbose=False)
        #print("Best Move: " + str(m) + "\n")
        df7.loc[counter,'Move_NP'] = m                                 #Prints the best move
        df7.loc[counter,'Past_Player'] = state.playerJustMoved         #Prints the player that made the last move
        df7.loc[counter,'New_Player'] = 3 - state.playerJustMoved      #Prints the player thai is about to play the 'best move'
        df7.loc[counter,'Game No.'] = x + 1                            #Pints the numer of game
        counter += 1
        #print("Game no. ", x)
        state.DoMove(m)
        if state.GetResult(state.playerJustMoved) != False:
            #print(str(state))
            break
    if state.GetResult(state.playerJustMoved) == 1.0:
        flag = state.playerJustMoved
        #print("Player " + str(state.playerJustMoved) + " wins!")
    elif state.GetResult(state.playerJustMoved) == 0.0:
        #print("Player " + str(3 - state.playerJustMoved) + " wins!")
        flag = 3 - state.playerJustMoved
    else: 
        #print("Nobody wins!")
        flag = 0
    return counter, flag
if __name__ == "__main__":
    """ Play a single game to the end using UCT for both players. 
    """
    p1_wins = 0
    p2_wins = 0
    no_wins = 0
    counter = 0
    for x in range(500):
        new_row, flag = UCTPlayGame(x,counter)
        if (flag == 1):
          p1_wins = p1_wins + 1
        elif(flag == 2):
          p2_wins = p2_wins + 1
        elif (flag == 0):
          no_wins = no_wins + 1
        counter = new_row
    print("Player 1 won", p1_wins, "times")
    print("Player 2 won", p2_wins, "times")
    print("Number of draws:", no_wins)

Player 1 won 10 times
Player 2 won 9 times
Number of draws: 481


In [0]:
#Write dataframe 6 in csv file
df6.to_csv(r'OXO_6.csv',index = None, header=True)

In [0]:
#Write dataframe in csv file
df7.to_csv(r'OXO_7.csv',index = None, header=True)

In [65]:
#Decision Tree Classifier after 3 iterations
model_7 = tree.DecisionTreeClassifier()
inputs_7 = df7.drop(['Move_NP', 'Game No.', 'Past_Player', 'New_Player'], axis='columns')
target_7 = df7['Move_NP']
target_7 = target_7.astype('int')

model_7.fit(inputs_7, target_7)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')

#Eight Data Collection

In [69]:
df8 = pd.DataFrame(columns=['Game No.','Past_Player','New_Player','Move_NP'])

def UCT(selector, rootstate, itermax, verbose = False):
    """ Conduct a UCT search for itermax iterations starting from rootstate.
        Return the best move from the rootstate.
        Assumes 2 alternating players (player 1 starts), with game results in the range [0.0, 1.0]."""

    rootnode = Node(state = rootstate)

    for i in range(itermax):
        node = rootnode
        state = rootstate.Clone()

        # Select
        while node.untriedMoves == [] and node.childNodes != []: # node is fully expanded and non-terminal
            node = node.UCTSelectChild()
            state.DoMove(node.move)

        # Expand
        if node.untriedMoves != []:  # if we can expand (i.e. state/node is non-terminal)
            m = random.choice(node.untriedMoves) 
            state.DoMove(m)
            node = node.AddChild(m, state)  # add child and descend tree

        # Rollout - this can often be made orders of magnitude quicker using a state.GetRandomMove() function
        while state.GetMoves() != []: # while state is non-terminal
            #print("Moves", state.GetMoves())
            if(selector <= 8):
              board_re = np.reshape(state.board,(-1,9))
              #print(board_re)
              # Emulate do-while to continue predicting if the answer is not within the possible moves
              #while True:
              prediction = model_7.predict(board_re)
              pred = prediction.astype(np.int64)
              #print(pred[0])
                #if (pred[0] in state.GetMoves()):
                  #break;
              #print(prediction)

              #print(type(pred[0]))
              if (pred[0] in state.GetMoves()):
                state.DoMove(pred[0])
              else:
                state.DoMove(random.choice(state.GetMoves()))
            else:
            #print(type(random.choice(state.GetMoves)))
              state.DoMove(random.choice(state.GetMoves()))

        # Backpropagate
        while node != None: # backpropagate from the expanded node and work back to the root node
            node.Update(state.GetResult(node.playerJustMoved)) # state is terminal. Update node with result from POV of node.playerJustMoved
            node = node.parentNode

    # Output some information about the tree - can be omitted
    #if verbose: print(rootnode.TreeToString(0))
    #else: print(rootnode.ChildrenToString())

    return sorted(rootnode.childNodes, key = lambda c: c.visits)[-1].move # return the move that was most visited
                
def UCTPlayGame(x, counter):
    """ Play a sample game between two UCT players where each player gets a different number 
        of UCT iterations (= simulations = tree nodes).
    """
    state = OXOState() # uncomment to play OXO
    while state.GetMoves() != []:
        #print(str(state))
        for i in range (9):
          df8.loc[counter,i] = state.board[i]  

        randomSel = random.randint(0,9)   
        if state.playerJustMoved == 1:
            m = UCT(selector=randomSel, rootstate=state, itermax=1000, verbose=False)  # play with values for itermax and verbose = True
        else:
            m = UCT(selector=randomSel, rootstate=state, itermax=100, verbose=False)
        #print("Best Move: " + str(m) + "\n")
        df8.loc[counter,'Move_NP'] = m                                 #Prints the best move
        df8.loc[counter,'Past_Player'] = state.playerJustMoved         #Prints the player that made the last move
        df8.loc[counter,'New_Player'] = 3 - state.playerJustMoved      #Prints the player thai is about to play the 'best move'
        df8.loc[counter,'Game No.'] = x + 1                            #Pints the numer of game
        counter += 1
        #print("Game no. ", x)
        state.DoMove(m)
        if state.GetResult(state.playerJustMoved) != False:
            #print(str(state))
            break
    if state.GetResult(state.playerJustMoved) == 1.0:
        flag = state.playerJustMoved
        #print("Player " + str(state.playerJustMoved) + " wins!")
    elif state.GetResult(state.playerJustMoved) == 0.0:
        #print("Player " + str(3 - state.playerJustMoved) + " wins!")
        flag = 3 - state.playerJustMoved
    else: 
        #print("Nobody wins!")
        flag = 0
    return counter, flag
if __name__ == "__main__":
    """ Play a single game to the end using UCT for both players. 
    """
    p1_wins = 0
    p2_wins = 0
    no_wins = 0
    counter = 0
    for x in range(500):
        new_row, flag = UCTPlayGame(x,counter)
        if (flag == 1):
          p1_wins = p1_wins + 1
        elif(flag == 2):
          p2_wins = p2_wins + 1
        elif (flag == 0):
          no_wins = no_wins + 1
        counter = new_row
    print("Player 1 won", p1_wins, "times")
    print("Player 2 won", p2_wins, "times")
    print("Number of draws:", no_wins)

    #Write dataframe in csv file
    df8.to_csv(r'OXO_8.csv',index = None, header=True)

Player 1 won 78 times
Player 2 won 7 times
Number of draws: 415


In [0]:
#Write dataframe in csv file
df8.to_csv(r'OXO_8.csv',index = None, header=True)

In [73]:
#Decision Tree Classifier after 3 iterations
model_8 = tree.DecisionTreeClassifier()
inputs_8 = df8.drop(['Move_NP', 'Game No.', 'Past_Player', 'New_Player'], axis='columns')
target_8 = df8['Move_NP']
target_8 = target_8.astype('int')

model_8.fit(inputs_8, target_8)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')

#Ninth Data Collection

In [77]:
df9 = pd.DataFrame(columns=['Game No.','Past_Player','New_Player','Move_NP'])

def UCT(selector, rootstate, itermax, verbose = False):
    """ Conduct a UCT search for itermax iterations starting from rootstate.
        Return the best move from the rootstate.
        Assumes 2 alternating players (player 1 starts), with game results in the range [0.0, 1.0]."""

    rootnode = Node(state = rootstate)

    for i in range(itermax):
        node = rootnode
        state = rootstate.Clone()

        # Select
        while node.untriedMoves == [] and node.childNodes != []: # node is fully expanded and non-terminal
            node = node.UCTSelectChild()
            state.DoMove(node.move)

        # Expand
        if node.untriedMoves != []:  # if we can expand (i.e. state/node is non-terminal)
            m = random.choice(node.untriedMoves) 
            state.DoMove(m)
            node = node.AddChild(m, state)  # add child and descend tree

        # Rollout - this can often be made orders of magnitude quicker using a state.GetRandomMove() function
        while state.GetMoves() != []: # while state is non-terminal
            #print("Moves", state.GetMoves())
            if(selector <= 8):
              board_re = np.reshape(state.board,(-1,9))
              #print(board_re)
              # Emulate do-while to continue predicting if the answer is not within the possible moves
              #while True:
              prediction = model_8.predict(board_re)
              pred = prediction.astype(np.int64)
              #print(pred[0])
                #if (pred[0] in state.GetMoves()):
                  #break;
              #print(prediction)

              #print(type(pred[0]))
              if (pred[0] in state.GetMoves()):
                state.DoMove(pred[0])
              else:
                state.DoMove(random.choice(state.GetMoves()))
            else:
            #print(type(random.choice(state.GetMoves)))
              state.DoMove(random.choice(state.GetMoves()))

        # Backpropagate
        while node != None: # backpropagate from the expanded node and work back to the root node
            node.Update(state.GetResult(node.playerJustMoved)) # state is terminal. Update node with result from POV of node.playerJustMoved
            node = node.parentNode

    # Output some information about the tree - can be omitted
    #if verbose: print(rootnode.TreeToString(0))
    #else: print(rootnode.ChildrenToString())

    return sorted(rootnode.childNodes, key = lambda c: c.visits)[-1].move # return the move that was most visited
                
def UCTPlayGame(x, counter):
    """ Play a sample game between two UCT players where each player gets a different number 
        of UCT iterations (= simulations = tree nodes).
    """
    state = OXOState() # uncomment to play OXO
    while state.GetMoves() != []:
        #print(str(state))
        for i in range (9):
          df9.loc[counter,i] = state.board[i]  

        randomSel = random.randint(0,9)   
        if state.playerJustMoved == 1:
            m = UCT(selector=randomSel, rootstate=state, itermax=1000, verbose=False)  # play with values for itermax and verbose = True
        else:
            m = UCT(selector=randomSel, rootstate=state, itermax=100, verbose=False)
        #print("Best Move: " + str(m) + "\n")
        df9.loc[counter,'Move_NP'] = m                                 #Prints the best move
        df9.loc[counter,'Past_Player'] = state.playerJustMoved         #Prints the player that made the last move
        df9.loc[counter,'New_Player'] = 3 - state.playerJustMoved      #Prints the player thai is about to play the 'best move'
        df9.loc[counter,'Game No.'] = x + 1                            #Pints the numer of game
        counter += 1
        #print("Game no. ", x)
        state.DoMove(m)
        if state.GetResult(state.playerJustMoved) != False:
            #print(str(state))
            break
    if state.GetResult(state.playerJustMoved) == 1.0:
        flag = state.playerJustMoved
        #print("Player " + str(state.playerJustMoved) + " wins!")
    elif state.GetResult(state.playerJustMoved) == 0.0:
        #print("Player " + str(3 - state.playerJustMoved) + " wins!")
        flag = 3 - state.playerJustMoved
    else: 
        #print("Nobody wins!")
        flag = 0
    return counter, flag
if __name__ == "__main__":
    """ Play a single game to the end using UCT for both players. 
    """
    p1_wins = 0
    p2_wins = 0
    no_wins = 0
    counter = 0
    for x in range(500):
        new_row, flag = UCTPlayGame(x,counter)
        if (flag == 1):
          p1_wins = p1_wins + 1
        elif(flag == 2):
          p2_wins = p2_wins + 1
        elif (flag == 0):
          no_wins = no_wins + 1
        counter = new_row
    print("Player 1 won", p1_wins, "times")
    print("Player 2 won", p2_wins, "times")
    print("Number of draws:", no_wins)

    #Write dataframe in csv file
    df9.to_csv(r'OXO_9.csv',index = None, header=True)

Player 1 won 10 times
Player 2 won 5 times
Number of draws: 485


In [78]:
#Decision Tree Classifier after 3 iterations
model_9 = tree.DecisionTreeClassifier()
inputs_9 = df9.drop(['Move_NP', 'Game No.', 'Past_Player', 'New_Player'], axis='columns')
target_9 = df9['Move_NP']
target_9 = target_9.astype('int')

model_9.fit(inputs_9, target_9)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')

#Tenth Model

In [79]:
df10 = pd.DataFrame(columns=['Game No.','Past_Player','New_Player','Move_NP'])

def UCT(selector, rootstate, itermax, verbose = False):
    """ Conduct a UCT search for itermax iterations starting from rootstate.
        Return the best move from the rootstate.
        Assumes 2 alternating players (player 1 starts), with game results in the range [0.0, 1.0]."""

    rootnode = Node(state = rootstate)

    for i in range(itermax):
        node = rootnode
        state = rootstate.Clone()

        # Select
        while node.untriedMoves == [] and node.childNodes != []: # node is fully expanded and non-terminal
            node = node.UCTSelectChild()
            state.DoMove(node.move)

        # Expand
        if node.untriedMoves != []:  # if we can expand (i.e. state/node is non-terminal)
            m = random.choice(node.untriedMoves) 
            state.DoMove(m)
            node = node.AddChild(m, state)  # add child and descend tree

        # Rollout - this can often be made orders of magnitude quicker using a state.GetRandomMove() function
        while state.GetMoves() != []: # while state is non-terminal
            #print("Moves", state.GetMoves())
            if(selector <= 8):
              board_re = np.reshape(state.board,(-1,9))
              #print(board_re)
              # Emulate do-while to continue predicting if the answer is not within the possible moves
              #while True:
              prediction = model_9.predict(board_re)
              pred = prediction.astype(np.int64)
              #print(pred[0])
                #if (pred[0] in state.GetMoves()):
                  #break;
              #print(prediction)

              #print(type(pred[0]))
              if (pred[0] in state.GetMoves()):
                state.DoMove(pred[0])
              else:
                state.DoMove(random.choice(state.GetMoves()))
            else:
            #print(type(random.choice(state.GetMoves)))
              state.DoMove(random.choice(state.GetMoves()))

        # Backpropagate
        while node != None: # backpropagate from the expanded node and work back to the root node
            node.Update(state.GetResult(node.playerJustMoved)) # state is terminal. Update node with result from POV of node.playerJustMoved
            node = node.parentNode

    # Output some information about the tree - can be omitted
    #if verbose: print(rootnode.TreeToString(0))
    #else: print(rootnode.ChildrenToString())

    return sorted(rootnode.childNodes, key = lambda c: c.visits)[-1].move # return the move that was most visited
                
def UCTPlayGame(x, counter):
    """ Play a sample game between two UCT players where each player gets a different number 
        of UCT iterations (= simulations = tree nodes).
    """
    state = OXOState() # uncomment to play OXO
    while state.GetMoves() != []:
        #print(str(state))
        for i in range (9):
          df10.loc[counter,i] = state.board[i]  

        randomSel = random.randint(0,9)   
        if state.playerJustMoved == 1:
            m = UCT(selector=randomSel, rootstate=state, itermax=1000, verbose=False)  # play with values for itermax and verbose = True
        else:
            m = UCT(selector=randomSel, rootstate=state, itermax=100, verbose=False)
        #print("Best Move: " + str(m) + "\n")
        df10.loc[counter,'Move_NP'] = m                                 #Prints the best move
        df10.loc[counter,'Past_Player'] = state.playerJustMoved         #Prints the player that made the last move
        df10.loc[counter,'New_Player'] = 3 - state.playerJustMoved      #Prints the player thai is about to play the 'best move'
        df10.loc[counter,'Game No.'] = x + 1                            #Pints the numer of game
        counter += 1
        #print("Game no. ", x)
        state.DoMove(m)
        if state.GetResult(state.playerJustMoved) != False:
            #print(str(state))
            break
    if state.GetResult(state.playerJustMoved) == 1.0:
        flag = state.playerJustMoved
        #print("Player " + str(state.playerJustMoved) + " wins!")
    elif state.GetResult(state.playerJustMoved) == 0.0:
        #print("Player " + str(3 - state.playerJustMoved) + " wins!")
        flag = 3 - state.playerJustMoved
    else: 
        #print("Nobody wins!")
        flag = 0
    return counter, flag
if __name__ == "__main__":
    """ Play a single game to the end using UCT for both players. 
    """
    p1_wins = 0
    p2_wins = 0
    no_wins = 0
    counter = 0
    for x in range(500):
        new_row, flag = UCTPlayGame(x,counter)
        if (flag == 1):
          p1_wins = p1_wins + 1
        elif(flag == 2):
          p2_wins = p2_wins + 1
        elif (flag == 0):
          no_wins = no_wins + 1
        counter = new_row
    print("Player 1 won", p1_wins, "times")
    print("Player 2 won", p2_wins, "times")
    print("Number of draws:", no_wins)

Player 1 won 19 times
Player 2 won 43 times
Number of draws: 438


In [0]:
#Write dataframe in csv file
df10.to_csv(r'OXO_10.csv',index = None, header=True)

In [80]:
#Decision Tree Classifier after 3 iterations
model_10 = tree.DecisionTreeClassifier()
inputs_10 = df10.drop(['Move_NP', 'Game No.', 'Past_Player', 'New_Player'], axis='columns')
target_10 = df10['Move_NP']
target_10 = target_10.astype('int')

model_10.fit(inputs_10, target_10)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')

#Eleventh Model

In [81]:
df11 = pd.DataFrame(columns=['Game No.','Past_Player','New_Player','Move_NP'])

def UCT(selector, rootstate, itermax, verbose = False):
    """ Conduct a UCT search for itermax iterations starting from rootstate.
        Return the best move from the rootstate.
        Assumes 2 alternating players (player 1 starts), with game results in the range [0.0, 1.0]."""

    rootnode = Node(state = rootstate)

    for i in range(itermax):
        node = rootnode
        state = rootstate.Clone()

        # Select
        while node.untriedMoves == [] and node.childNodes != []: # node is fully expanded and non-terminal
            node = node.UCTSelectChild()
            state.DoMove(node.move)

        # Expand
        if node.untriedMoves != []:  # if we can expand (i.e. state/node is non-terminal)
            m = random.choice(node.untriedMoves) 
            state.DoMove(m)
            node = node.AddChild(m, state)  # add child and descend tree

        # Rollout - this can often be made orders of magnitude quicker using a state.GetRandomMove() function
        while state.GetMoves() != []: # while state is non-terminal
            #print("Moves", state.GetMoves())
            if(selector <= 8):
              board_re = np.reshape(state.board,(-1,9))
              #print(board_re)
              # Emulate do-while to continue predicting if the answer is not within the possible moves
              #while True:
              prediction = model_10.predict(board_re)
              pred = prediction.astype(np.int64)
              #print(pred[0])
                #if (pred[0] in state.GetMoves()):
                  #break;
              #print(prediction)

              #print(type(pred[0]))
              if (pred[0] in state.GetMoves()):
                state.DoMove(pred[0])
              else:
                state.DoMove(random.choice(state.GetMoves()))
            else:
            #print(type(random.choice(state.GetMoves)))
              state.DoMove(random.choice(state.GetMoves()))

        # Backpropagate
        while node != None: # backpropagate from the expanded node and work back to the root node
            node.Update(state.GetResult(node.playerJustMoved)) # state is terminal. Update node with result from POV of node.playerJustMoved
            node = node.parentNode

    # Output some information about the tree - can be omitted
    #if verbose: print(rootnode.TreeToString(0))
    #else: print(rootnode.ChildrenToString())

    return sorted(rootnode.childNodes, key = lambda c: c.visits)[-1].move # return the move that was most visited
                
def UCTPlayGame(x, counter):
    """ Play a sample game between two UCT players where each player gets a different number 
        of UCT iterations (= simulations = tree nodes).
    """
    state = OXOState() # uncomment to play OXO
    while state.GetMoves() != []:
        #print(str(state))
        for i in range (9):
          df11.loc[counter,i] = state.board[i]  

        randomSel = random.randint(0,9)   
        if state.playerJustMoved == 1:
            m = UCT(selector=randomSel, rootstate=state, itermax=1000, verbose=False)  # play with values for itermax and verbose = True
        else:
            m = UCT(selector=randomSel, rootstate=state, itermax=100, verbose=False)
        #print("Best Move: " + str(m) + "\n")
        df11.loc[counter,'Move_NP'] = m                                 #Prints the best move
        df11.loc[counter,'Past_Player'] = state.playerJustMoved         #Prints the player that made the last move
        df11.loc[counter,'New_Player'] = 3 - state.playerJustMoved      #Prints the player thai is about to play the 'best move'
        df11.loc[counter,'Game No.'] = x + 1                            #Pints the numer of game
        counter += 1
        #print("Game no. ", x)
        state.DoMove(m)
        if state.GetResult(state.playerJustMoved) != False:
            #print(str(state))
            break
    if state.GetResult(state.playerJustMoved) == 1.0:
        flag = state.playerJustMoved
        #print("Player " + str(state.playerJustMoved) + " wins!")
    elif state.GetResult(state.playerJustMoved) == 0.0:
        #print("Player " + str(3 - state.playerJustMoved) + " wins!")
        flag = 3 - state.playerJustMoved
    else: 
        #print("Nobody wins!")
        flag = 0
    return counter, flag
if __name__ == "__main__":
    """ Play a single game to the end using UCT for both players. 
    """
    p1_wins = 0
    p2_wins = 0
    no_wins = 0
    counter = 0
    for x in range(500):
        new_row, flag = UCTPlayGame(x,counter)
        if (flag == 1):
          p1_wins = p1_wins + 1
        elif(flag == 2):
          p2_wins = p2_wins + 1
        elif (flag == 0):
          no_wins = no_wins + 1
        counter = new_row
    print("Player 1 won", p1_wins, "times")
    print("Player 2 won", p2_wins, "times")
    print("Number of draws:", no_wins)

Player 1 won 4 times
Player 2 won 20 times
Number of draws: 476


In [0]:
#Write dataframe in csv file
df11.to_csv(r'OXO_11.csv',index = None, header=True)

In [85]:
#Decision Tree Classifier after 3 iterations
model_11 = tree.DecisionTreeClassifier()
inputs_11 = df11.drop(['Move_NP', 'Game No.', 'Past_Player', 'New_Player'], axis='columns')
target_11 = df11['Move_NP']
target_11 = target_11.astype('int')

model_11.fit(inputs_11, target_11)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=None, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')

#First Evaluation
Agent 11 vs the past 10 agents

In [0]:
df11 = pd.DataFrame(columns=['Game No.','Past_Player','New_Player','Move_NP'])

def UCT(selector, rootstate, itermax, verbose = False):
    """ Conduct a UCT search for itermax iterations starting from rootstate.
        Return the best move from the rootstate.
        Assumes 2 alternating players (player 1 starts), with game results in the range [0.0, 1.0]."""

    rootnode = Node(state = rootstate)

    for i in range(itermax):
        node = rootnode
        state = rootstate.Clone()

        # Select
        while node.untriedMoves == [] and node.childNodes != []: # node is fully expanded and non-terminal
            node = node.UCTSelectChild()
            state.DoMove(node.move)

        # Expand
        if node.untriedMoves != []:  # if we can expand (i.e. state/node is non-terminal)
            m = random.choice(node.untriedMoves) 
            state.DoMove(m)
            node = node.AddChild(m, state)  # add child and descend tree

        # Rollout - this can often be made orders of magnitude quicker using a state.GetRandomMove() function
        while state.GetMoves() != []: # while state is non-terminal
            #print("Moves", state.GetMoves())
            if(selector <= 8):
              board_re = np.reshape(state.board,(-1,9))
              #print(board_re)
              # Emulate do-while to continue predicting if the answer is not within the possible moves
              #while True:
              prediction = model_10.predict(board_re)
              pred = prediction.astype(np.int64)
              #print(pred[0])
                #if (pred[0] in state.GetMoves()):
                  #break;
              #print(prediction)

              #print(type(pred[0]))
              if (pred[0] in state.GetMoves()):
                state.DoMove(pred[0])
              else:
                state.DoMove(random.choice(state.GetMoves()))
            else:
            #print(type(random.choice(state.GetMoves)))
              state.DoMove(random.choice(state.GetMoves()))

        # Backpropagate
        while node != None: # backpropagate from the expanded node and work back to the root node
            node.Update(state.GetResult(node.playerJustMoved)) # state is terminal. Update node with result from POV of node.playerJustMoved
            node = node.parentNode

    # Output some information about the tree - can be omitted
    #if verbose: print(rootnode.TreeToString(0))
    #else: print(rootnode.ChildrenToString())

    return sorted(rootnode.childNodes, key = lambda c: c.visits)[-1].move # return the move that was most visited
                
def UCTPlayGame(x, counter):
    """ Play a sample game between two UCT players where each player gets a different number 
        of UCT iterations (= simulations = tree nodes).
    """
    state = OXOState() # uncomment to play OXO
    while state.GetMoves() != []:
        #print(str(state))
        for i in range (9):
          df11.loc[counter,i] = state.board[i]  

        randomSel = random.randint(0,9)   
        if state.playerJustMoved == 1:
            m = UCT(selector=randomSel, rootstate=state, itermax=1000, verbose=False)  # play with values for itermax and verbose = True
        else:
            m = UCT(selector=randomSel, rootstate=state, itermax=100, verbose=False)
        #print("Best Move: " + str(m) + "\n")
        df11.loc[counter,'Move_NP'] = m                                 #Prints the best move
        df11.loc[counter,'Past_Player'] = state.playerJustMoved         #Prints the player that made the last move
        df11.loc[counter,'New_Player'] = 3 - state.playerJustMoved      #Prints the player thai is about to play the 'best move'
        df11.loc[counter,'Game No.'] = x + 1                            #Pints the numer of game
        counter += 1
        #print("Game no. ", x)
        state.DoMove(m)
        if state.GetResult(state.playerJustMoved) != False:
            #print(str(state))
            break
    if state.GetResult(state.playerJustMoved) == 1.0:
        flag = state.playerJustMoved
        #print("Player " + str(state.playerJustMoved) + " wins!")
    elif state.GetResult(state.playerJustMoved) == 0.0:
        #print("Player " + str(3 - state.playerJustMoved) + " wins!")
        flag = 3 - state.playerJustMoved
    else: 
        #print("Nobody wins!")
        flag = 0
    return counter, flag
if __name__ == "__main__":
    """ Play a single game to the end using UCT for both players. 
    """
    p1_wins = 0
    p2_wins = 0
    no_wins = 0
    counter = 0
    for x in range(500):
        new_row, flag = UCTPlayGame(x,counter)
        if (flag == 1):
          p1_wins = p1_wins + 1
        elif(flag == 2):
          p2_wins = p2_wins + 1
        elif (flag == 0):
          no_wins = no_wins + 1
        counter = new_row
    print("Player 1 won", p1_wins, "times")
    print("Player 2 won", p2_wins, "times")
    print("Number of draws:", no_wins)