In [25]:
class TicTacToe:

    def __init__(self, state='.........', p1='x', p2='o'):
        self.state = state
        self.p1 = p1
        self.p2 = p2

        self.terminals = [(0, 1, 2), (0, 3, 6), (0, 4, 8), 
                          (1, 4, 7), (2, 4, 6), (2, 5, 8), 
                          (3, 4, 5), (6, 7, 8)]
        self.get_opp = lambda p: p1 if p==p2 else p2
    
    def getValidMoves(self, player = 'x'):
        new_states = []
        for i in range(len(self.state)):
            if self.state[i] == ".":
                new_states.append(TicTacToe(
                    self.state[:i] + player + self.state[i+1:], 
                    self.p1, self.p2))
        return new_states

    def eval(self, player=None, max_player=None):
        player_set = set([self.p1])
        opp_set = set([self.p2])
        open_wins = 0
        for terminal in self.terminals:
            elements = set([self.state[i] for i in terminal])
            if elements == player_set: return 100
            if elements == opp_set: return -100
            if self.p2 not in elements: open_wins += 1
            elif self.p1 not in elements: open_wins -= 1
        return open_wins
    
    def is_terminal(self):
        return self.eval(self.state) in [100, -100] or '.' not in self.state
        
    def pretty_print(self, state):
        return state[:3] + '\n' + state[3:6] + '\n' + state[6:]

In [29]:
from agents import AlphaBetaAgent, RandomAgent
from tqdm import trange
import numpy as np

episodes = 1000
wins = 0

for i in trange(episodes):

    game = TicTacToe()
    smartAgent = np.random.choice([-1, 1])
    if smartAgent == 1:
        agent1 = AlphaBetaAgent(max_player='x')
        agent2 = RandomAgent()
    else:
        agent1 = RandomAgent()
        agent2 = AlphaBetaAgent(max_player='x')

    done = False
    a1_turn = True
    while not game.is_terminal():
        game, reward = agent1.find_opt_move(game, game.p1) if a1_turn else agent2.find_opt_move(game, game.p2)
        a1_turn = not a1_turn
    
    wins += 1 if smartAgent*game.eval(1, 1) == 100 else 0

print("\nWin Rate:", wins/episodes)

100%|██████████| 1000/1000 [00:03<00:00, 297.76it/s]


Win Rate: 0.761





In [13]:
import importlib
import agents
import ninemensmorris
importlib.reload(agents)
importlib.reload(ninemensmorris)
from agents import AlphaBetaAgent, RandomAgent
from ninemensmorris import NineMensMorris
from tqdm import trange
import numpy as np

episodes = 5
wins = 0

for i in trange(episodes):

    game = NineMensMorris()
    smartAgent = np.random.choice([-1, 1])
    if smartAgent == 1:
        agent1 = AlphaBetaAgent(upper_lim=1_000_000_000, lower_lim=-1_000_000_000, max_depth=3)
        agent2 = RandomAgent()
    else:
        agent1 = RandomAgent()
        agent2 = AlphaBetaAgent(upper_lim=1_000_000_000, lower_lim=-1_000_000_000, max_depth=3)

    done = False
    a1_turn = True
    while not (game.isWin(1) or game.isWin(2)):
        game, reward = agent1.find_opt_move(game, 1) if a1_turn else agent2.find_opt_move(game, 2)
        a1_turn = not a1_turn

    wins += 1 if smartAgent*game.eval(1, 1) == 1_000_000_000 else 0

print("\nWin Rate:", wins/episodes)

100%|██████████| 5/5 [01:12<00:00, 14.45s/it]


Win Rate: 1.0



