<a href="https://colab.research.google.com/github/MahdiTheGreat/Game-playing-systems/blob/main/AI_TTT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:

 # MCTS self-play

import numpy as np
import random
import time
from math import sqrt, log

class MCTSNode:
    def __init__(self, state, parent=None):
        self.state = state  # Tic-Tac-Toe board state
        self.parent = parent
        self.children = []
        self.visits = 0
        self.value = 0
        self.untried_moves = self.get_legal_moves()

    def get_legal_moves(self):
        return [(i, j) for i in range(3) for j in range(3) if self.state[i, j] == ""]

    def best_child(self, exploration_weight=1.41):
        return max(self.children, key=lambda child: child.value / (child.visits + 1e-6) + exploration_weight * sqrt(log(self.visits + 1) / (child.visits + 1e-6)))

    def expand(self):
        if not self.untried_moves:
            return self  # Return self if no moves left to expand
        move = self.untried_moves.pop()
        new_state = self.state.copy()
        new_state[move] = "X" if np.count_nonzero(self.state == "X") <= np.count_nonzero(self.state == "O") else "O"
        child_node = MCTSNode(new_state, parent=self)
        self.children.append(child_node)
        return child_node

    def update(self, result):
        self.visits += 1
        self.value += result

class MCTS:
    def __init__(self, time_limit=5.0, max_iterations=1000):
        self.time_limit = time_limit
        self.max_iterations = max_iterations

    def search(self, root):
        start_time = time.time()
        iterations = 0

        if root.untried_moves:
            root = root.expand()  # Ensure root expands at least once

        while time.time() - start_time < self.time_limit and iterations < self.max_iterations:
            node = self.select(root)
            if node.untried_moves:
                node = node.expand()
            result = self.simulate(node.state)
            self.backpropagate(node, result)
            iterations += 1

        return max(root.children, key=lambda c: c.visits, default=root)  # Choose best move by visits

    def select(self, node):
        while node.children and not node.untried_moves:
            node = node.best_child()
        return node

    def simulate(self, state):
        current_player = "X" if np.count_nonzero(state == "X") <= np.count_nonzero(state == "O") else "O"
        while True:
            legal_moves = [(i, j) for i in range(3) for j in range(3) if state[i, j] == ""]
            if not legal_moves:
                return 0  # Draw
            move = random.choice(legal_moves)
            state[move] = current_player
            if self.check_winner(state, current_player):
                return 1 if current_player == "X" else -1
            current_player = "O" if current_player == "X" else "X"

    def backpropagate(self, node, result):
        while node:
            node.update(result)
            result = -result  # Alternate win/loss perspective
            node = node.parent

    def check_winner(self, state, player):
        for i in range(3):
            if all(state[i, :] == player) or all(state[:, i] == player):
                return True
        if all(state.diagonal() == player) or all(np.fliplr(state).diagonal() == player):
            return True
        return False

class TicTacToe:
    def __init__(self):
        self.state = np.full((3, 3), "")
        self.mcts = MCTS()

    def play_game(self):
        while True:
            print(self.state)
            root = MCTSNode(self.state)
            best_move = self.mcts.search(root)
            self.state = best_move.state
            if self.mcts.check_winner(self.state, "X"):
                print("X wins!")
                break
            elif self.mcts.check_winner(self.state, "O"):
                print("O wins!")
                break
            elif "" not in self.state:
                print("It's a draw!")
                break

if __name__ == "__main__":
    game = TicTacToe()
    game.play_game()


[['' '' '']
 ['' '' '']
 ['' '' '']]
X wins!


In [8]:
# MCTS vs user, doesnt work yet

import numpy as np
import random
import time
from math import sqrt, log
from IPython.display import clear_output
from google.colab import output

class MCTSNode:
    def __init__(self, state, parent=None):
        self.state = state  # Tic-Tac-Toe board state
        self.parent = parent
        self.children = []
        self.visits = 0
        self.value = 0
        self.untried_moves = self.get_legal_moves()

    def get_legal_moves(self):
        return [(i, j) for i in range(3) for j in range(3) if self.state[i, j] == ""]

    def best_child(self, exploration_weight=1.41):
        return max(self.children, key=lambda child: child.value / (child.visits + 1e-6) + exploration_weight * sqrt(log(self.visits + 1) / (child.visits + 1e-6)))

    def expand(self):
        if not self.untried_moves:
            return self  # Return self if no moves left to expand
        move = self.untried_moves.pop()
        new_state = self.state.copy()
        new_state[move] = "X" if np.count_nonzero(self.state == "X") <= np.count_nonzero(self.state == "O") else "O"
        child_node = MCTSNode(new_state, parent=self)
        self.children.append(child_node)
        return child_node

    def update(self, result):
        self.visits += 1
        self.value += result

class MCTS:
    def __init__(self, time_limit=5.0, max_iterations=1000):
        self.time_limit = time_limit
        self.max_iterations = max_iterations

    def search(self, root):
        start_time = time.time()
        iterations = 0

        if root.untried_moves:
            root = root.expand()  # Ensure root expands at least once

        while time.time() - start_time < self.time_limit and iterations < self.max_iterations:
            node = self.select(root)
            if node.untried_moves:
                node = node.expand()
            result = self.simulate(node.state)
            self.backpropagate(node, result)
            iterations += 1

        return max(root.children, key=lambda c: c.visits, default=root)  # Choose best move by visits

    def select(self, node):
        while node.children and not node.untried_moves:
            node = node.best_child()
        return node

    def simulate(self, state):
        current_player = "X" if np.count_nonzero(state == "X") <= np.count_nonzero(state == "O") else "O"
        while True:
            legal_moves = [(i, j) for i in range(3) for j in range(3) if state[i, j] == ""]
            if not legal_moves:
                return 0  # Draw
            move = random.choice(legal_moves)
            state[move] = current_player
            if self.check_winner(state, current_player):
                return 1 if current_player == "X" else -1
            current_player = "O" if current_player == "X" else "X"

    def backpropagate(self, node, result):
        while node:
            node.update(result)
            result = -result  # Alternate win/loss perspective
            node = node.parent

    def check_winner(self, state, player):
        for i in range(3):
            if all(state[i, :] == player) or all(state[:, i] == player):
                return True
        if all(state.diagonal() == player) or all(np.fliplr(state).diagonal() == player):
            return True
        return False

class TicTacToe:
    def __init__(self):
        self.state = np.full((3, 3), "")
        self.mcts = MCTS()
        self.human_player = "X"
        self.ai_player = "O"
        self.current_player = "X"

    def display_board(self):
        print("  0 1 2")
        for i in range(3):
            print(f"{i} {self.state[i][0]}|{self.state[i][1]}|{self.state[i][2]}")
            if i < 2:
                print("  -+-+-")
        print("\n")

    def play_game(self):
        while True:
            clear_output()
            self.display_board()

            if self.current_player == self.human_player:
                # Human's turn
                while True:
                    try:
                        row = int(input("Enter row (0-2): "))
                        col = int(input("Enter column (0-2): "))
                        if self.state[row, col] == "":
                            self.state[row, col] = self.human_player
                            self.current_player = self.ai_player  # Switch turn
                            break
                        else:
                            print("Invalid move, try again!")
                    except (ValueError, IndexError):
                        print("Invalid input! Enter numbers between 0 and 2.")
            else:
                # AI's turn (MCTS)
                print("AI is thinking...")
                time.sleep(1)
                root = MCTSNode(self.state)
                best_move = self.mcts.search(root)
                self.state = best_move.state
                self.current_player = self.human_player  # Switch turn

            if self.mcts.check_winner(self.state, "X"):
                self.display_board()
                print("X wins!")
                break
            elif self.mcts.check_winner(self.state, "O"):
                self.display_board()
                print("O wins!")
                break
            elif "" not in self.state:
                self.display_board()
                print("It's a draw!")
                break

if __name__ == "__main__":
    game = TicTacToe()
    game.play_game()


  0 1 2
0 X||
  -+-+-
1 ||
  -+-+-
2 ||


AI is thinking...
  0 1 2
0 X||
  -+-+-
1 X||
  -+-+-
2 X|O|O


X wins!
