# AlphaBeta Demo

This notebook contains a few demos of the AlphaBeta Algorithm on TicTacToe and NineMensMorris. 

## TicTacToe

### Open Wins Strategy

In [1]:
from games import TTT as TicTacToe
from agents import AlphaBetaAgent, RandomAgent
from tqdm import trange
import numpy as np

episodes = 1000
wins = 0

for i in trange(episodes):

    game = TicTacToe()
    smartAgent = np.random.choice([-1, 1])
    if smartAgent == 1:
        agent1 = AlphaBetaAgent(upper_lim=100, lower_lim=-100, max_depth=3,
                                max_player='x', strategy='open_wins', weights=[])
        agent2 = RandomAgent()
    else:
        agent1 = RandomAgent()
        agent2 = AlphaBetaAgent(upper_lim=100, lower_lim=-100, max_depth=3,
                                max_player='x', strategy='open_wins', weights=[])

    done = False
    a1_turn = True
    while not game.is_terminal():
        game, reward = agent1.find_opt_move(game, game.p1) if a1_turn else agent2.find_opt_move(game, game.p2)
        a1_turn = not a1_turn
    
    wins += 1 if smartAgent*game.eval(1, 1) == 100 else 0

print("\nWin Rate:", wins/episodes)

100%|██████████| 1000/1000 [00:03<00:00, 313.02it/s]


Win Rate: 0.736





### Weighted Open Wins Strategy

In [3]:
from games import TTT as TicTacToe
from agents import AlphaBetaAgent, RandomAgent
from tqdm import trange
import numpy as np

episodes = 1000
wins = 0

for i in trange(episodes):

    game = TicTacToe()
    smartAgent = np.random.choice([-1, 1])
    if smartAgent == 1:
        agent1 = AlphaBetaAgent(upper_lim=100, lower_lim=-100, max_depth=3,
                                max_player='x', strategy='weighted_open_wins', 
                                weights=[1,1,1,1,1,1,1,1])
        agent2 = RandomAgent()
    else:
        agent1 = RandomAgent()
        agent2 = AlphaBetaAgent(upper_lim=100, lower_lim=-100, max_depth=3,
                                max_player='x', strategy='weighted_open_wins', 
                                weights=[1,1,1,1,1,1,1,1])

    done = False
    a1_turn = True
    while not game.is_terminal():
        game, reward = agent1.find_opt_move(game, game.p1) if a1_turn else agent2.find_opt_move(game, game.p2)
        a1_turn = not a1_turn
    
    wins += 1 if smartAgent*game.eval(1, 1) == 100 else 0

print("\nWin Rate:", wins/episodes)

100%|██████████| 1000/1000 [00:03<00:00, 307.66it/s]


Win Rate: 0.726





### Captured Strategy

In [5]:
from games import TTT as TicTacToe
from agents import AlphaBetaAgent, RandomAgent
from tqdm import trange
import numpy as np

episodes = 1000
wins = 0

for i in trange(episodes):

    game = TicTacToe()
    smartAgent = np.random.choice([-1, 1])
    if smartAgent == 1:
        agent1 = AlphaBetaAgent(upper_lim=100, lower_lim=-100, max_depth=3,
                                max_player='x', strategy='captured', weights=[])
        agent2 = RandomAgent()
    else:
        agent1 = RandomAgent()
        agent2 = AlphaBetaAgent(upper_lim=100, lower_lim=-100, max_depth=3,
                                max_player='x', strategy='captured', weights=[])

    done = False
    a1_turn = True
    while not game.is_terminal():
        game, reward = agent1.find_opt_move(game, game.p1) if a1_turn else agent2.find_opt_move(game, game.p2)
        a1_turn = not a1_turn
    
    wins += 1 if smartAgent*game.eval(1, 1) == 100 else 0

print("\nWin Rate:", wins/episodes)

100%|██████████| 1000/1000 [00:03<00:00, 309.47it/s]


Win Rate: 0.646





### Weighted Captured Strategy

In [6]:
from games import TTT as TicTacToe
from agents import AlphaBetaAgent, RandomAgent
from tqdm import trange
import numpy as np

episodes = 1000
wins = 0

for i in trange(episodes):

    game = TicTacToe()
    smartAgent = np.random.choice([-1, 1])
    if smartAgent == 1:
        agent1 = AlphaBetaAgent(upper_lim=100, lower_lim=-100, max_depth=3,
                                max_player='x', strategy='weighted_captured', 
                                weights=[1,1,1,1,1,1,1,1,1])
        agent2 = RandomAgent()
    else:
        agent1 = RandomAgent()
        agent2 = AlphaBetaAgent(upper_lim=100, lower_lim=-100, max_depth=3,
                                max_player='x', strategy='weighted_captured', 
                                weights=[1,1,1,1,1,1,1,1,1])

    done = False
    a1_turn = True
    while not game.is_terminal():
        game, reward = agent1.find_opt_move(game, game.p1) if a1_turn else agent2.find_opt_move(game, game.p2)
        a1_turn = not a1_turn
    
    wins += 1 if smartAgent*game.eval(1, 1) == 100 else 0

print("\nWin Rate:", wins/episodes)

100%|██████████| 1000/1000 [00:03<00:00, 296.85it/s]


Win Rate: 0.638





## 9 Mens Morris

In [12]:
from agents import AlphaBetaAgent, RandomAgent
from ninemensmorris import NineMensMorris
from tqdm import trange
import numpy as np

episodes = 15
wins = 0
max_iter = 100

for i in range(episodes):

    game = NineMensMorris()
    smartAgent = np.random.choice([-1, 1])
    if smartAgent == 1:
        agent1 = AlphaBetaAgent(upper_lim=1_000_000_000, lower_lim=-1_000_000_000, max_depth=3,
                                max_player=1, strategy=None, weights=[[1,1,1,1],[1,1,1,1,1,1,1,1],[1,1,1,1]])
        agent2 = RandomAgent()
    else:
        agent1 = RandomAgent()
        agent2 = AlphaBetaAgent(upper_lim=1_000_000_000, lower_lim=-1_000_000_000, max_depth=3,
                                max_player=1, strategy=None, weights=[[1,1,1,1],[1,1,1,1,1,1,1,1],[1,1,1,1]])

    done = False
    a1_turn = True
    #iteration = 0
    for _ in trange(max_iter):
        game, reward = agent1.find_opt_move(game, 1) if a1_turn else agent2.find_opt_move(game, 2)
        a1_turn = not a1_turn
        if (game.isWin(1) or game.isWin(2)): break
        # iteration += 1

    wins += 1 if smartAgent*game.eval(1, 1) == 1_000_000_000 else 0

print("\nWin Rate:", wins/episodes)

 62%|██████▏   | 62/100 [00:07<00:04,  8.48it/s]
 58%|█████▊    | 58/100 [00:11<00:08,  4.92it/s]
 60%|██████    | 60/100 [00:11<00:07,  5.42it/s]
 91%|█████████ | 91/100 [00:13<00:01,  6.95it/s]
 55%|█████▌    | 55/100 [00:11<00:09,  4.81it/s]
 63%|██████▎   | 63/100 [00:11<00:06,  5.50it/s]
 38%|███▊      | 38/100 [00:06<00:11,  5.45it/s]
100%|██████████| 100/100 [02:17<00:00,  1.38s/it]
100%|██████████| 100/100 [01:10<00:00,  1.42it/s]
 52%|█████▏    | 52/100 [00:09<00:09,  5.20it/s]
 99%|█████████▉| 99/100 [00:18<00:00,  5.23it/s]
 44%|████▍     | 44/100 [00:08<00:10,  5.28it/s]
 64%|██████▍   | 64/100 [00:10<00:05,  6.35it/s]
 50%|█████     | 50/100 [00:08<00:08,  5.56it/s]
 48%|████▊     | 48/100 [00:08<00:08,  5.90it/s]


Win Rate: 0.8666666666666667



