# New Code for importing TicTacToe class

In [None]:
# Download the raw TicTacToe.py from GitHub
!curl -o TicTacToe.py https://raw.githubusercontent.com/Godunicornizek/SelfPlayAI/main/TicTacToe.py

# Reload the module in Colab
import importlib
import TicTacToe
importlib.reload(TicTacToe)

from TicTacToe import TicTacToe

tictactoe = TicTacToe()
neutral_state = tictactoe.change_perspective(state, player)

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0100  9845  100  9845    0     0  45014      0 --:--:-- --:--:-- --:--:-- 45160
Mounted at /content/drive
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


NameError: name 'state' is not defined

# Old code for cloning into Git

In [None]:
!git clone https://github.com/Godunicornizek/SelfPlayAI.git

fatal: destination path 'SelfPlayAI' already exists and is not an empty directory.


In [None]:
!git pull origin main

From https://github.com/Godunicornizek/SelfPlayAI
 * branch            main       -> FETCH_HEAD
Already up to date.


In [None]:
!git reset --hard
!git pull origin main

HEAD is now at 764f8dc Update TicTacToe game logic
From https://github.com/Godunicornizek/SelfPlayAI
 * branch            main       -> FETCH_HEAD
Already up to date.


In [None]:
%cd /content/SelfPlayAI
!ls

/content/SelfPlayAI
README.md  SelfPlayAI  TicTacToe.ipynb	TicTacToe.py


In [None]:
import sys
sys.path.append('/content/SelfPlayAI')

In [None]:
import getpass
token = getpass.getpass("Enter GitHub token: ")

!git remote set-url origin https://GodunicornIzek:{token}@github.com/Godunicornizek/SelfPlayAI.git

Enter GitHub token: ··········


# TicTacToe Monte Carlo Tree Search

In [None]:
import numpy as np
import math
import random

In [None]:
from TicTacToe import TicTacToe

In [None]:
tictactoe = TicTacToe()

In [None]:
print(tictactoe.get_init())

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


In [None]:
class Node:
    def __init__(self, game, args, state, parent=None, action_taken=None):
        self.game = game
        self.args = args
        self.state = state
        self.parent = parent
        self.action_taken = action_taken

        self.children = []
        self.expandable_moves = game.get_valid_moves(state)

        self.visit_count = 0
        self.value_sum = 0

    def is_fully_expanded(self):
        """Check whether node has expanded all actions, and return whether it is a terminal node or not"""
        return np.sum(self.expandable_moves) == 0 and len(self.children) > 0

    def select(self):
        """Select a child node using the PUCT formula"""
        best_child = None
        best_ucb = -np.inf

        for child in self.children:
            ucb = self.get_ucb(child)
            if ucb > best_ucb:
                best_ucb = ucb
                best_child = child

        return best_child

    def get_ucb(self, child):
        """Implementation of the PUCT formula"""
        # eps = 1e-8
        # # If the child has never been visited, treat Q as 0
        # if child.visit_count == 0:
        #     q_value = 0
        # else:
        #     q_value = 1 - ((child.value_sum / child.visit_count) + 1) / 2
        # # In TicTacToe, we want to place our opponent in a bad predicament.
        # # Hence, we want to choose the child that minimizes the q_value.
        # # This is the reason for the 1 - in the front.

        # # PUCT exploration term
        # u_value = self.args['C'] * math.sqrt(math.log(max(1, self.visit_count)) / max(eps, child.visit_count))

        # return q_value + u_value
        q_value = 1 - ((child.value_sum / child.visit_count) + 1) / 2
        return q_value + self.args['C'] * math.sqrt(math.log(self.visit_count) / child.visit_count)

    def expand(self):
        action = np.random.choice(np.where(self.expandable_moves == 1)[0])
        self.expandable_moves[action] = 0

        child_state = self.state.copy()
        child_state = self.game.get_next_state(child_state, action, 1)
        child_state = self.game.change_perspective(child_state, player = -1)

        child = Node(self.game, self.args, child_state, self, action)
        self.children.append(child)
        return child

    def simulate(self):
        value, is_terminal = self.game.get_value_and_terminated(self.state, self.action_taken)
        value = self.game.get_opponent_value(value)

        if is_terminal:
            return value

        rollout_state = self.state.copy()
        rollout_player = 1
        while True:
            valid_moves = self.game.get_valid_moves(rollout_state)
            action = np.random.choice(np.where(valid_moves == 1)[0])
            rollout_state = self.game.get_next_state(rollout_state, action, rollout_player)
            value, is_terminal = self.game.get_value_and_terminated(rollout_state, action)
            if is_terminal:
                if rollout_player == -1:
                    value = self.game.get_opponent_value(value)
                return value

            rollout_player = self.game.get_opponent(rollout_player)

    def backpropagate(self, value):
        self.value_sum += value
        self.visit_count += 1

        value = self.game.get_opponent_value(value)
        if self.parent is not None:
            self.parent.backpropagate(value)



In [None]:
class MCTS:
    def __init__(self, game, args: dict):
        self.game = game
        self.args = args

    def search(self, state):
        root = Node(self.game, self.args, state)

        for search in range(self.args['num_searches']):
            node = root

            # Phase 1: Selection loop: continues as long as the node has no untried actions
            # and already has children. If needed, it will continue to select with PUCT until it
            # reaches a leaf node
            while node.is_fully_expanded():
                node = node.select()

            # Important distinction: if the method below returns "won", it is referring to the opponent
            # The returned value is from the perspective of the player who made action_taken.
            # Since this node represents the opponent’s turn, the value must be negated during backpropagation.
            # Note: checking whether the node is terminal is crucial for determining whether step 2 is
            #       to be executed
            value, is_terminal = node.game.get_value_and_terminated(node.state, node.action_taken)
            value = self.game.get_opponent_value(value)

            # Check terminal node.
            if not is_terminal:
                node = node.expand()
                value = node.simulate()

            node.backpropagate(value)

        action_probs = np.zeros(self.game.action_size)
        for child in root.children:
            action_probs[child.action_taken] = child.visit_count
        action_probs /= np.sum(action_probs)
        return action_probs



# Testing MCTS

In [None]:
tictactoe = TicTacToe()
player = 1

args = {
    'C': 1.41,
    'num_searches': 1000
}
mcts = MCTS(tictactoe, args)

state = tictactoe.get_init()

while True:
    print(state)

    if player == 1:
        valid_moves = tictactoe.get_valid_moves(state)
        print("valid moves", [i for i in range(tictactoe.action_size) if valid_moves[i] == 1])
        action = int(input(f"{player}:"))

        if valid_moves[action] == 0:
            print("action not valid")
            continue
    else:
        neutral_state = tictactoe.change_perspective(state, player)
        mcts_probs = mcts.search(neutral_state)
        action = np.argmax(mcts_probs)


    state = tictactoe.get_next_state(state, action, player)

    value, is_terminal = tictactoe.get_value_and_terminated(state, action)

    if is_terminal:
        print(state)
        if value == 1:
            print(player, "won")
        else:
            print("draw")
        break

    player = tictactoe.get_opponent(player)

# Code for pushing to Github

The following code does not need to be run again:

In [None]:
#!git clone https://github.com/Godunicornizek/SelfPlayAI.git

Cloning into 'SelfPlayAI'...
remote: Enumerating objects: 17, done.[K
remote: Counting objects: 100% (17/17), done.[K
remote: Compressing objects: 100% (17/17), done.[K
remote: Total 17 (delta 6), reused 8 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (17/17), 10.84 KiB | 1.35 MiB/s, done.
Resolving deltas: 100% (6/6), done.


Run the following code after a workflow for pushing to Git

In [None]:
!git config --global user.name "GodunicornIzek"
!git config --global user.email "godunicornizek@gmail.com"

In [None]:
if __name__ == "__main__":
    from google.colab import drive
    drive.mount('/content/drive')

    %cd /content/drive/MyDrive/Projects

    %cd /content/drive/MyDrive/Projects/SelfPlayAI
    !jupyter nbconvert --to python TicTacToeMCTS.ipynb

    !git status

    !git add TicTacToe.ipynb TicTacToe.py

    !git commit -m "Create TicTacToe MCTS class"

    import getpass
    token = getpass.getpass("Enter GitHub token: ")

    !git remote set-url origin https://GodUnicornIzek:{token}@github.com/GodUnicornizek/SelfPlayAI.git

    !git push origin main

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive
/content/drive/MyDrive/Projects
mv: cannot stat '/content/drive/MyDrive/SelfPlayAI/TicTacToe.ipynb': No such file or directory
/content/drive/MyDrive/Projects/SelfPlayAI
[NbConvertApp] Converting notebook TicTacToe.ipynb to python
[NbConvertApp] Writing 9803 bytes to TicTacToe.py
On branch main
Your branch is up to date with 'origin/main'.

Changes to be committed:
  (use "git restore --staged <file>..." to unstage)
	[32mmodified:   TicTacToe.ipynb[m
	[32mmodified:   TicTacToe.py[m

Changes not staged for commit:
  (use "git add <file>..." to update what will be committed)
  (use "git restore <file>..." to discard changes in working directory)
	[31mmodified:   TicTacToe.ipynb[m
	[31mmodified:   TicTacToe.py[m

[main 3563596] Update TicTacToe game logic
 2 files changed, 15 insertions(+), 11 deletions(-)
 rewrite TicTacToe.ipynb 

# New Code for pushing to Github

The following code does not need to be run again:

In [1]:
!git clone https://github.com/Godunicornizek/SelfPlayAI.git

Cloning into 'SelfPlayAI'...
remote: Enumerating objects: 37, done.[K
remote: Counting objects: 100% (37/37), done.[K
remote: Compressing objects: 100% (28/28), done.[K
remote: Total 37 (delta 15), reused 28 (delta 9), pack-reused 0 (from 0)[K
Receiving objects: 100% (37/37), 26.46 KiB | 3.78 MiB/s, done.
Resolving deltas: 100% (15/15), done.


Run the following code after a workflow for pushing to Git

In [2]:
!git config --global user.name "GodunicornIzek"
!git config --global user.email "godunicornizek@gmail.com"

In [6]:
%cd /content/drive/MyDrive/Projects/SelfPlayAI

/content/drive/MyDrive/Projects/SelfPlayAI


In [8]:
!mv /content/drive/MyDrive/SelfPlayAI/TicTacToeMCTS.ipynb ./

mv: cannot stat '/content/drive/MyDrive/SelfPlayAI/TicTacToeMCTS.ipynb': No such file or directory


In [7]:
!git add TicTacToeMCTS.ipynb

fatal: pathspec 'TicTacToeMCTS.ipynb' did not match any files


In [3]:
if __name__ == "__main__":
    from google.colab import drive
    drive.mount('/content/drive')

    %cd /content/drive/MyDrive
    !mkdir -p Projects
    %cd /content/drive/MyDrive/Projects

    !mv /content/drive/MyDrive/SelfPlayAI/TicTacToeMCTS.ipynb \
    /content/drive/MyDrive/Projects/SelfPlayAI/

    %cd /content/drive/MyDrive/Projects/SelfPlayAI
    !jupyter nbconvert --to python TicTacToeMCTS.ipynb

    !git status

    !git add TicTacToeMCTS.ipynb TicTacToeMCTS.py

    !git commit -m "Create MCTS class and Node class"

    import getpass
    token = getpass.getpass("Enter GitHub token: ")

    !git remote set-url origin https://GodUnicornIzek:{token}@github.com/GodUnicornizek/SelfPlayAI.git

    !git push origin main

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive
/content/drive/MyDrive/Projects
mv: cannot stat '/content/drive/MyDrive/SelfPlayAI/TicTacToeMCTS.ipynb': No such file or directory
/content/drive/MyDrive/Projects/SelfPlayAI
This application is used to convert notebook files (*.ipynb)
        to various other formats.


Options
The options below are convenience aliases to configurable class-options,
as listed in the "Equivalent to" description-line of the aliases.
To see all configurable class-options for some <cmd>, use:
    <cmd> --help-all

--debug
    set log level to logging.DEBUG (maximize logging output)
    Equivalent to: [--Application.log_level=10]
--show-config
    Show the application's configuration (human-readable format)
    Equivalent to: [--Application.show_config=True]
--show-config-json
    Show the application's configuration (json format)
    Equivalent to: [--Appli

KeyboardInterrupt: Interrupted by user