In [1]:
class TicTacToe:

    def __init__(self, state='.........', p1='x', p2='o'):
        self.state = state
        self.p1 = p1
        self.p2 = p2

        self.terminals = [(0, 1, 2), (0, 3, 6), (0, 4, 8), 
                          (1, 4, 7), (2, 4, 6), (2, 5, 8), 
                          (3, 4, 5), (6, 7, 8)]
        self.get_opp = lambda p: p1 if p==p2 else p2
    
    def get_valid_moves(self, state, player = 'x'):
        new_states = []
        for i in range(len(state)):
            if state[i] == ".":
                new_states.append(state[:i] + player + state[i+1:])
        return new_states

    def eval(self, state):
        player_set = set([self.p1])
        opp_set = set([self.p2])
        open_wins = 0
        for terminal in self.terminals:
            elements = set([state[i] for i in terminal])
            if elements == player_set: return 100
            if elements == opp_set: return -100
            if self.p2 not in elements: open_wins += 1
            elif self.p1 not in elements: open_wins -= 1
        return open_wins
    
    def is_terminal(self):
        return self.eval(self.state) in [100, -100] or '.' not in self.state
        
    def pretty_print(self, state):
        return state[:3] + '\n' + state[3:6] + '\n' + state[6:]

In [2]:
from agents import AlphaBetaAgent, RandomAgent
from tqdm import trange
import numpy as np

episodes = 1000
wins = 0

for i in trange(episodes):

    game = TicTacToe()
    smartAgent = np.random.choice([-1, 1])
    if smartAgent == 1:
        agent1 = AlphaBetaAgent(game)
        agent2 = RandomAgent(game)
    else:
        agent1 = RandomAgent(game)
        agent2 = AlphaBetaAgent(game)

    done = False
    a1_turn = True
    while not game.is_terminal():
        state, reward = agent1.find_opt_move(game.p1) if a1_turn else agent2.find_opt_move(game.p2)
        game.state = state
        a1_turn = not a1_turn

    wins += 1 if smartAgent*game.eval(game.state) == 100 else 0

print("\nWin Rate:", wins/episodes)

100%|██████████| 1000/1000 [00:02<00:00, 434.89it/s]


Win Rate: 0.751



