In [1]:
from dataclasses import dataclass
@dataclass
class Config:
    rows: int
    columns: int
    inarow: int  
@dataclass
class Params:
    weight_num_threes_opp: float
    weight_num_fours: float
    weight_num_fours_opp: float
    n_steps: int

In [2]:
from kaggle_environments import evaluate, make, utils

env = make("connectx", debug=True)

env.run([LookNAheadAgent(Params(weight_num_threes_opp=1e2, weight_num_fours=1e6, weight_num_fours_opp=1e4, n_steps = 3)), 
        LookNAheadAgent(Params(weight_num_threes_opp=1e2, weight_num_fours=1e6, weight_num_fours_opp=1e4, n_steps = 5))])

env.render(mode="ipython")

ModuleNotFoundError: No module named 'kaggle_environments'

In [1]:
import numpy as np
import random

def score_move(grid, col, piece, config, params):
    ngrid = drop_piece(grid, col, piece, config)
    score = minmax(ngrid, params.n_steps - 1, False, piece, config, params)
    return score

def drop_piece(grid, col, piece, config):
    ngrid = grid.copy()
    
    for row in range(config.rows - 1, -1, -1):
        if grid[row][col] == 0:
            break
    ngrid[row][col] = piece
    return ngrid

def check_window(window, num_discs, piece, config):
    return (window.count(piece) == num_discs and window.count(0) == config.inarow-num_discs)
    
def count_window(grid, num_discs, piece, config):
    num_windows = 0
    
    # horizontal
    for r in range(config.rows):
        for c in range(config.columns - (config.inarow - 1)):
            window = [grid[r][c1] for c1 in range(c, c + config.inarow)]
            if check_window(window, num_discs, piece, config):
                num_windows += 1
    # vertical
    for c in range(config.columns):
        for r in range(config.rows - (config.inarow - 1)):
            window = [grid[r1][c] for r1 in range(r, r + config.inarow)]
            if check_window(window, num_discs, piece, config):
                num_windows += 1
    # diagonal
    for r in range(config.rows - (config.inarow - 1)):
        for c in range(config.columns - (config.inarow - 1)):
            window = [grid[r + i][c + i] for i in range(config.inarow)]
            #print(window)
            if check_window(window, num_discs, piece, config):
                num_windows += 1
            window = [grid[r + config.inarow - 1 - i][c + i] for i in range(config.inarow)]
            #print(window)
            if check_window(window, num_discs, piece, config):
                num_windows += 1
    return num_windows

def get_heuristic(grid, piece, config, params):
    num_threes = count_window(grid, 3, piece, config)
    num_threes_opp = count_window(grid, 3, piece % 2 + 1, config)
    num_fours = count_window(grid, 4, piece, config)
    num_fours_opp = count_window(grid, 4, piece % 2 + 1, config)
    
    score = score = num_threes - params.weight_num_threes_opp * num_threes_opp + params.weight_num_fours * num_fours - params.weight_num_fours_opp * num_fours_opp
    
    return score

def is_terminal_window(window, config):
    return (window.count(1) == config.inarow or window.count(2) == config.inarow)

def is_terminal_node(grid, config):
    # tie
    if list(grid.ravel()).count(0) == 0:
        return True
    
    # horizontal
    for r in range(config.rows):
        for c in range(config.columns - (config.inarow - 1)):
            window = [grid[r][c1] for c1 in range(c, c + config.inarow)]
            if is_terminal_window(window, config):
                return True
    # vertical
    for c in range(config.columns):
        for r in range(config.rows - (config.inarow - 1)):
            window = [grid[r1][c] for r1 in range(r, r + config.inarow)]
            if is_terminal_window(window, config):
                return True
    # diagonal
    for r in range(config.rows - (config.inarow - 1)):
        for c in range(config.columns - (config.inarow - 1)):
            window = [grid[r + i][c + i] for i in range(config.inarow)]
            
            if is_terminal_window(window, config):
                return True
            window = [grid[r + config.inarow - 1 - i][c + i] for i in range(config.inarow)]
            
            if is_terminal_window(window, config):
                return True
    
    return False

def minmax(node, depth, maximizingPlayer, piece, config, params):
    valid_moves = [c for c in range(config.columns) if node[0][c] == 0]
    
    if depth == 0 or is_terminal_node(node, config):
        return get_heuristic(node, piece, config, params)
    
    if maximizingPlayer:
        val = -np.Inf
        for c in valid_moves:
            child = drop_piece(node, c, piece, config)
            val = max(val, minmax(child, depth - 1, False, piece, config, params))
        return val
    else:
        val = np.Inf
        for c in valid_moves:
            child = drop_piece(node, c, piece % 2 + 1, config)
            val = min(val, minmax(child, depth - 1, True, piece, config, params))
        return val


In [2]:
class LookNAheadAgent:
    def __init__(self, params):
        self._params = params
    
    def __call__(self, obs, config):
        # Get list of valid moves
        valid_moves = [c for c in range(config.columns) if obs.board[c] == 0]
    
        # Convert the board to a 2D grid
        grid = np.asarray(obs.board).reshape(config.rows, config.columns)
    
        # Use the heuristic to assign a score to each possible board in the next step
        scores = dict(zip(valid_moves, [score_move(grid, col, obs.mark, config, self._params) for col in valid_moves]))
    
        # Get a list of columns (moves) that maximize the heuristic
        max_cols = [key for key in scores.keys() if scores[key] == max(scores.values())]
    
        # Select at random from the maximizing columns
        return random.choice(max_cols)

In [3]:
def get_win_percentages(agent1, agent2, n_rounds=100):
    # Use default Connect Four setup
    config = {'rows': 6, 'columns': 7, 'inarow': 4}
    # Agent 1 goes first (roughly) half the time          
    outcomes = evaluate("connectx", [agent1, agent2], config, [], n_rounds//2)
    # Agent 2 goes first (roughly) half the time      
    outcomes += [[b,a] for [a,b] in evaluate("connectx", [agent2, agent1], config, [], n_rounds-n_rounds//2)]
    print("Agent 1 Win Percentage:", np.round(outcomes.count([1,-1])/len(outcomes), 2))
    print("Agent 2 Win Percentage:", np.round(outcomes.count([-1,1])/len(outcomes), 2))
    print("Number of Invalid Plays by Agent 1:", outcomes.count([None, 0]))
    print("Number of Invalid Plays by Agent 2:", outcomes.count([0, None]))
    agent_1_win_rate = np.round(outcomes.count([1,-1])/len(outcomes), 2)
    return agent_1_win_rate

In [42]:


get_win_percentages(LookNAheadAgent(Params(weight_num_threes_opp=1e2, weight_num_fours=1e6, weight_num_fours_opp=1e4, n_steps = 3)), 
                    LookNAheadAgent(Params(weight_num_threes_opp=1e2, weight_num_fours=1e6, weight_num_fours_opp=1e4, n_steps = 1)), 
                    n_rounds=10)

Agent 1 Win Percentage: 0.9
Agent 2 Win Percentage: 0.1
Number of Invalid Plays by Agent 1: 0
Number of Invalid Plays by Agent 2: 0


In [None]:
import time
def grid_search(w3o_list, w4_list, w4o_list, n_rounds=10):
    baseline_agent = LookNAheadAgent(Params(weight_num_threes_opp=1e2, weight_num_fours=1e6, weight_num_fours_opp=1e4, n_steps = 3))
    for w3o in w3o_list:
        for w4 in w4_list:
            for w4o in w4o_list:
                start = time.time()
                exp_agent = LookNAheadAgent(Params(weight_num_threes_opp=w3o, weight_num_fours=w4, weight_num_fours_opp=w4o, n_steps = 3))
                print(w3o, w4, w4o)
                get_win_percentages(baseline_agent, exp_agent, n_rounds=n_rounds)
                end = time.time()
                print('Elasped', end - start)

In [61]:
grid_search([10, 1e2, 1e3], [1e5, 1e6, 1e7], [1e3, 1e4, 1e5])

grid_search()

Agent 1 Win Percentage: 0.4
Agent 2 Win Percentage: 0.6
Number of Invalid Plays by Agent 1: 0
Number of Invalid Plays by Agent 2: 0
Elasped 37.575748920440674
Agent 1 Win Percentage: 0.5
Agent 2 Win Percentage: 0.4
Number of Invalid Plays by Agent 1: 0
Number of Invalid Plays by Agent 2: 0
Elasped 38.97400140762329
Agent 1 Win Percentage: 0.4
Agent 2 Win Percentage: 0.5
Number of Invalid Plays by Agent 1: 0
Number of Invalid Plays by Agent 2: 0
Elasped 36.477999687194824
Agent 1 Win Percentage: 0.3
Agent 2 Win Percentage: 0.6
Number of Invalid Plays by Agent 1: 0
Number of Invalid Plays by Agent 2: 0
Elasped 36.008325815200806
Agent 1 Win Percentage: 0.6
Agent 2 Win Percentage: 0.4
Number of Invalid Plays by Agent 1: 0
Number of Invalid Plays by Agent 2: 0
Elasped 38.321876764297485
Agent 1 Win Percentage: 0.4
Agent 2 Win Percentage: 0.5
Number of Invalid Plays by Agent 1: 0
Number of Invalid Plays by Agent 2: 0
Elasped 36.386603116989136
Agent 1 Win Percentage: 0.4
Agent 2 Win Percent

KeyboardInterrupt: 

In [62]:
print("3 against 4")
get_win_percentages(LookNAheadAgent(Params(weight_num_threes_opp=1e2, weight_num_fours=1e6, weight_num_fours_opp=1e4, n_steps = 3)), 
                    LookNAheadAgent(Params(weight_num_threes_opp=1e2, weight_num_fours=1e6, weight_num_fours_opp=1e4, n_steps = 4)), 
                    n_rounds=50)

print("3 against 5")
get_win_percentages(LookNAheadAgent(Params(weight_num_threes_opp=1e2, weight_num_fours=1e6, weight_num_fours_opp=1e4, n_steps = 3)), 
                    LookNAheadAgent(Params(weight_num_threes_opp=1e2, weight_num_fours=1e6, weight_num_fours_opp=1e4, n_steps = 5)), 
                    n_rounds=50)

print("4 against 5")
get_win_percentages(LookNAheadAgent(Params(weight_num_threes_opp=1e2, weight_num_fours=1e6, weight_num_fours_opp=1e4, n_steps = 4)), 
                    LookNAheadAgent(Params(weight_num_threes_opp=1e2, weight_num_fours=1e6, weight_num_fours_opp=1e4, n_steps = 5)), 
                    n_rounds=50)

3 against 4
Agent 1 Win Percentage: 0.3
Agent 2 Win Percentage: 0.48
Number of Invalid Plays by Agent 1: 0
Number of Invalid Plays by Agent 2: 0
3 against 5
Agent 1 Win Percentage: 0.24
Agent 2 Win Percentage: 0.62
Number of Invalid Plays by Agent 1: 0
Number of Invalid Plays by Agent 2: 4
4 against 5


KeyboardInterrupt: 

In [6]:
agent4 = LookNAheadAgent(Params(weight_num_threes_opp=1e2, weight_num_fours=1e6, weight_num_fours_opp=1e4, n_steps = 4))

In [4]:
env.play([None, agent4])

NameError: name 'env' is not defined