In [None]:
import numpy as np
import random

from copy import copy
from functools import partial

from kaggle_environments import make

from pypad.kaggle_sandbox import get_heuristic
from pypad.solvers.minimax import Solver as MiniMaxSolver
from pypad.solvers.mcts import MctsSolver
from pypad.games.connectx import ConnectX
from pypad.games.tictactoe import TicTacToe

# Tic-Tac-Toe validation

In [None]:
def agent_tictactoe_mcts(obs, config):
    grid = np.asarray(obs.board).reshape(3, 3)
    state = TicTacToe.from_grid(grid)
    mcts = MctsSolver()
    move = mcts.solve(state, 1_000)
    return move

# Setup a ConnectX environment.
env = make("tictactoe", debug=True)
env.run([agent_tictactoe_mcts, agent_tictactoe_mcts])
env.render(mode="ipython")

# ConnectX Simulation

In [None]:
# Helper function for score_move: calculates value of heuristic for grid
def get_heuristic(board) -> int:
    rows = board.rows + 1
    directions = (1, rows - 1, rows, rows + 1)
    bitboard = board.position ^ board.mask
    bitboard2 = board.position
    score = 0
    for dir in directions:
        if result := bitboard & (bitboard >> dir) & (bitboard >> 2 * dir):
            score += 0.2 * result.bit_count()
            
        if result := bitboard2 & (bitboard2 >> dir) & (bitboard2 >> 2 * dir):
            score -= 0.1 * result.bit_count()
            
    return score


def shallow_negamax(board: ConnectX, alpha: int, beta: int, depth: int) -> int:
    if board.is_full():
        return 0

    win_mask = board.win_mask()
    possible_moves = board.possible_moves_mask()
    if (win_mask & possible_moves):
        return (board.num_slots - board.num_moves + 1) // 2

    max_possible_score = (board.num_slots - board.num_moves - 1) // 2
    if max_possible_score <= alpha:
        return max_possible_score
    
    if depth == 0:
        return get_heuristic(board)

    alpha = -100_000_000
    beta = min(beta, max_possible_score)

    for move in board.possible_moves():
        b = copy(board)
        b.play_move(move)
        score = -shallow_negamax(b, -beta, -alpha, depth - 1)
        alpha = max(alpha, score)
        if score >= beta:
            return alpha

    return alpha

def agent_negamax(obs, config, depth):

    grid = np.asarray(obs.board).reshape(config.rows, config.columns)
    board = ConnectX.from_grid(grid)

    best_col, best_score = next(board.possible_col_moves()), -1_000_000

    for col in board.possible_col_moves():
        b = copy(board)
        b.play_col(col)
        if b.is_won():
            return col

    for col in board.possible_col_moves():
        b = copy(board)
        b.play_col(col)
        alpha, beta = -1, 1
        score = -shallow_negamax(b, alpha, beta, depth)
        if score > best_score:
            best_score = score
            best_col = col
            
    return best_col

def agent_mcts(obs, config):
    grid = np.asarray(obs.board).reshape(config.rows, config.columns)
    state = ConnectX.from_grid(grid)
    mcts = MctsSolver()
    move = mcts.solve(state, 1_000)
    col = state.bitboard_util.move_to_col(move)
    return col

agent_negamax5 = partial(agent_negamax, depth=4)
agent_negamax2 = partial(agent_negamax, depth=1)

# Setup a ConnectX environment.
env = make("connectx", debug=True)
env.run([agent_negamax5, agent_mcts])
env.render(mode="ipython")

In [None]:
def diagnose(n: int, verbose: bool = True):
    obs = env.steps[n-1][0]['observation']
    config = env.configuration
    
    grid = np.asarray(obs.board).reshape(config.rows, config.columns)
    state = ConnectX.from_grid(grid)
    
    if verbose:
        print(f'Last played by: {state.played_by}')
        print(f'Next to play:   {3 - state.played_by}\n')
        print(grid)
    return obs, config, state

n = 12
depth = 4
obs, config, state = diagnose(n)

col = agent_mcts(obs, config)
col = agent_negamax(obs, config, depth)
print(f'\nSuggested col = {col}')
state

In [None]:
ROWS, COLS = 6, 7
moves = [2,3,2,5,7]
moves = [3,3,4,4]
state = ConnectX.create(ROWS, COLS, moves)

for col in state.possible_col_moves():
    b = copy(state)
    b.play_col(col)
    alpha, beta, depth = -1, 1, 9
    score = -shallow_negamax(b, alpha, beta, depth)
    print(f'[{col}] Score={score}')


In [None]:
solver = MiniMaxSolver()

moves = [6,4,1,1,5,4,4,2,2,6,5,7,5,7,2,5,3,6,1,5]
state = ConnectX.create(ROWS, COLS, moves)

for col in state.possible_col_moves():
    b = copy(state)
    b.play_col(col)
    score = -solver.minimax(b, -1, 1)
    print(f'[{col}] Score={score}')