# Assignment 6

### Karl & Kailash - Group 45

In [1]:
import numpy as np
import math
import random
import copy

In [2]:
import numpy as np
import math
import random

class TicTacToeNode:
    def __init__(self, state, parent=None, move=None):
        self.state = state
        self.parent = parent
        self.move = move
        self.visits = 0
        self.wins = 0
        self.children = []
        self.untried_moves = state.get_available_moves()

    def select_child(self):
        # Use UCT formula to select the best child
        C = 1.4
        log_N = math.log(self.visits)

        def uct(node):
            exploitation_term = node.wins / node.visits
            exploration_term = C * math.sqrt(log_N / node.visits)
            uct_score = exploitation_term + exploration_term
            return uct_score

        return max(self.children, key=uct)

    def expand(self):
        # Choose a random untried move and create a new child node with that move
        copy_state = copy.deepcopy(self.state) #Create copy to not modify other nodes
        move = random.choice(self.untried_moves)
        copy_state.make_move(move)

        new_node = TicTacToeNode(copy_state, parent=self, move=move)
        self.children.append(new_node)
        self.untried_moves.remove(move)
        return new_node

    def update(self, result):
    # Update the node with the result of a simulation
        self.visits += 1
        if result is not None:
            self.wins += result

    def get_best_move(self):
        # Return the move that leads to the child with the highest number of visits
        children_visits = [(child.visits, child.move) for child in self.children]
        children_visits.sort(reverse=True)
        return children_visits[0][1]

class MCSTAgent:
    def __init__(self):
        self.root = None

    def get_move(self, state):
        # creates a new search tree from the current state of the game by initializing the root of the tree 
        #to a new TicTacToeNode object, which represents the current state of the game
        self.root = TicTacToeNode(state)

        # Run the MCST algorithm for a fixed number of iterations
        for i in range(10000):
            node = self.root

            # Selection: traverse the tree using UCT until a leaf node is reached
            while node.untried_moves == [] and node.children != []:
                node = node.select_child()

            # Expansion: if the node is not a terminal state, expand it by adding a new child node
            if node.untried_moves != []:
                node = node.expand()

            # Simulation/rollout: simulate a game from the new node until a result is obtained
            while node.state.winner is None:

                #Opponent plays "optimally"
                move = node.state.find_move()

                #Agent plays randomly to learn
                if node.state.current_player == -1 or move == None:
                    move = random.choice(node.state.get_available_moves())
                
                node.state.make_move(move)

            # Backpropagation: update the nodes visited and wins count for all nodes in the path from the new node to the root
            while node is not None:
                node.update(1 if node.state.winner == 1 else -1)
                node = node.parent

        # Get the best move from the current state by choosing the child with the highest number of visits
        best_move = self.root.state.find_move() 
        if best_move == None:
            best_move = self.root.get_best_move()

        return best_move


In [3]:
class TicTacToe:
    def __init__(self):
        self.board = [0] * 9
        self.current_player = 1
        self.winner = None

    #Find all moves where a player hasn't picked something
    def get_available_moves(self):
        return [i for i, val in enumerate(self.board) if val == 0]

    #Makes move by changing an item in the board
    def make_move(self, move):
        self.board[move] = self.current_player
        self.check_gameover()
        self.switch_player()
    
    # Finds the optimal move to either block or complete a combination of three 
    def find_move(self):
        for i in range(3):
            rows = [self.board[i * 3+j] for j in range(3)]
            cols = [self.board[j * 3+i] for j in range(3)]
            if abs(sum(rows)) == 2:
                j = rows.index(0)
                return i * 3 + j
            if abs(sum(cols)) == 2:
                j = cols.index(0)
                return j * 3 + i
        
        # Check diagonal 1
        diag1 = [self.board[0], self.board[4], self.board[8]]
        if abs(sum(diag1)) == 2 and 0 in diag1:
            j = diag1.index(0)
            return j * 4

        # Check diagonal 2
        diag2 = [self.board[2], self.board[4], self.board[6]]
        if abs(sum(diag2)) == 2 and 0 in diag2:
            j = diag2.index(0)
            return j * 2 + 2
        return None

    #Switch player
    def switch_player(self):
        self.current_player = - self.current_player

    def current_state(self):
        return np.array(self.board).reshape((3, 3)).tolist()

    #Checks if a player has won
    def check_gameover(self):
        for i in range(3):
            if self.board[i * 3] == self.board[i * 3 + 1] == self.board[i * 3 + 2] != 0:
                self.winner = self.board[i*3]
            if self.board[i] == self.board[i + 3] == self.board[i + 6] != 0:
                self.winner = self.board[i]
        if self.board[0] == self.board[4] == self.board[8] != 0:
            self.winner = self.board[0]
        if self.board[2] == self.board[4] == self.board[6] != 0:
            self.winner = self.board[2]
        if all(val != 0 for val in self.board):
            self.winner = 0

    #Print the current state
    def display(self):
        print("-------------")
        for i in range(3):
            print(f"| {self.board[i*3]} | {self.board[i*3+1]} | {self.board[i*3+2]} |")
            print("-------------")

In [4]:
def play_game(c_start = True):
    # Create a new TicTacToe game and an MCST agent
    game = TicTacToe()
    agent = MCSTAgent()

    if not c_start:
        game.switch_player()

    # Main game loop
    while game.winner is None:
        # Display the current state of the game
        game.display()

        # If it's the player's turn, prompt them for a move and make the move
        if game.current_player == -1:
            valid_move = False
            while not valid_move:
                move = int(input("Enter your move (0-8): "))
                if move in game.get_available_moves():
                    valid_move = True
                    game.make_move(move)
                else:
                    print("Invalid move. Try again.")
        # If it's the agent's turn, get the agent's move and make the move
        elif game.winner is None:
            game_copy = copy.deepcopy(game)
            move = agent.get_move(game_copy)
            print(f"Agent plays move {move}")
            game.make_move(move)

    # Display the final state of the game and the winner
    game.display()
    if game.winner == 0:
        print("Tie game!")
    elif game.winner == -1:
        print("Player wins!")
    else:
        print("Agent wins!")


In [5]:
play_game()

-------------
| 0 | 0 | 0 |
-------------
| 0 | 0 | 0 |
-------------
| 0 | 0 | 0 |
-------------
Agent plays move 3
-------------
| 0 | 0 | 0 |
-------------
| 1 | 0 | 0 |
-------------
| 0 | 0 | 0 |
-------------


KeyboardInterrupt: Interrupted by user

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=1262dda2-abb7-4af7-a1b6-72164064af5a' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>