In [97]:
import numpy as np
import random
import math

In [72]:
EMPTY = 0
CROSSES = 1
CIRCLES = 2

ROWS = 3
COLUMNS = 3
DIAGONALS = 2

In [73]:
start_board = np.array((EMPTY, EMPTY, EMPTY, EMPTY, CROSSES, EMPTY, EMPTY, EMPTY, EMPTY))
converter = lambda _: {EMPTY: " ", CROSSES: "X", CIRCLES: "O"}[_]

In [74]:
game_moves = [("X", 4)]

In [75]:
def possible_move(board: np.array) -> list:
    return [i for i in range(len(board)) if board[i] == EMPTY]

In [76]:
def opponent_move(board: np.array) -> np.array:
    possible_moves = possible_move(board)
    if len(possible_moves) != 0:
        move = random.choice(possible_moves)
        game_moves.append(("X", move))
        board[move] = CROSSES
        return board
    else:
        raise KeyError("Board is full")

In [77]:
def print_board( board: np.array) -> None:
    data = np.reshape(board, (ROWS, COLUMNS))

    for i, row in enumerate(data):
        print(
            f" {converter(row[0])} | {converter(row[1])} | {converter(row[2])}"
        )
        if i != len(data) - 1:
            print("---+---+---")

In [78]:
def winning(board: np.array) -> tuple[bool, int]:
    # ROWS
    for i in range(ROWS):
        if board[i] == board[i + 1] == board[i + 2] and board[i] != EMPTY:
            return True, board[i]

    # COLUMNS
    for j in range(COLUMNS):
        if board[j] == board[j + 3] == board[j + 6] and board[j] != EMPTY:
            return True, board[j]

    # DIAGONALS (start top left to bottom right, then top right to bottom left)
    for k in range(DIAGONALS):
        if board[0 if k == 0 else 3] == board[4] == board[8 if k == 0 else 6]:
            return True, board[k]

    # No winner yet
    return False, None

In [79]:
def selection(board: np.array, node: list = [0]) -> tuple[list, np.array]:
    if random.choice([True, False]):
        return node, board
    else:
        move = random.choice(possible_move(board))
        board[move] = CIRCLES
        node.append(move)
        game_moves.append(("O", move))
        # Stop if this is winning terminating node
        if winning(board)[0]: return node, board
        board = opponent_move(board)
        # Stop if this is losing terminating node
        if winning(board)[0]: return node, board
        return selection(board, node)

In [80]:
game_moves = [("X", 4)]
selection_moves, selection_board = selection(start_board.copy())
print_board(selection_board)
print(selection_moves)
print(game_moves)

 X |   |  
---+---+---
 O | X |  
---+---+---
 O | X |  
[0, 6, 3]
[('X', 4), ('O', 6), ('X', 7), ('O', 3), ('X', 0)]


In [87]:
def expansion(board: np.array) -> list:
    children = []
    for move in possible_move(board):
        # Copy data
        child_board = board.copy()
        child_moves = game_moves.copy()

        # Adjust for child
        child_board[move] = CIRCLES
        child_moves.append(("O", move))
        children.append((child_moves, child_board))

    return children

In [89]:
for m, b in expansion(selection_board):
    print_board(b)
    print(m)

 X | O |  
---+---+---
 O | X |  
---+---+---
 O | X |  
[('X', 4), ('O', 6), ('X', 7), ('O', 3), ('X', 0), ('O', 1)]
 X |   | O
---+---+---
 O | X |  
---+---+---
 O | X |  
[('X', 4), ('O', 6), ('X', 7), ('O', 3), ('X', 0), ('O', 2)]
 X |   |  
---+---+---
 O | X | O
---+---+---
 O | X |  
[('X', 4), ('O', 6), ('X', 7), ('O', 3), ('X', 0), ('O', 5)]
 X |   |  
---+---+---
 O | X |  
---+---+---
 O | X | O
[('X', 4), ('O', 6), ('X', 7), ('O', 3), ('X', 0), ('O', 8)]


In [101]:
def rollout(board: np.array) -> tuple[float, int]:
    score = 0
    #first opponent move
    for opm in possible_move(board):
        # Update board
        new_board = board.copy()
        new_board[opm] = CROSSES

        t, w = winning(new_board)
        if t: 
            score += 1 if w == CIRCLES else 0
        elif len(possible_move(new_board)) == 0:
            score += 0.5
            
        else:
            # our move
            for usm in possible_move(new_board):
                # Update board
                newer_board = new_board.copy()
                newer_board[usm] = CIRCLES

                t, w = winning(newer_board)
                if t: 
                    score += 1 if w == CIRCLES else 0
                elif len(possible_move(newer_board)) == 0:
                    score += 0.5
                else:
                    score += rollout(new_board)[0] 

    n = math.factorial(len(possible_move(board)))
    return score, n

In [105]:
for m, b in expansion(selection_board):
    print_board(b)
    # print(m)

    score, n = rollout(b)
    # print(score)
    # print(n)
    print(f"win percentage: {score/n}")

 X | O |  
---+---+---
 O | X |  
---+---+---
 O | X |  
win percentage: 0.4166666666666667
 X |   | O
---+---+---
 O | X |  
---+---+---
 O | X |  
win percentage: 0.16666666666666666
 X |   |  
---+---+---
 O | X | O
---+---+---
 O | X |  
win percentage: 0.0
 X |   |  
---+---+---
 O | X |  
---+---+---
 O | X | O
win percentage: 0.3333333333333333
