# Adverserial Search and Games

## Outline
1. Problem setup
2. Minimax algorithm
3. Alpha-beta algorithm

## 1. Problem setup

In [1]:
## Problem: Tic-Tac-Toe game
# 'x' = 1 
# 'o' = -1
# empty = 0
# state: [[0,0,1],[0,-1,0],[0,0,0]]
# 'x' starts first
# The one who makes a row, a column, 
# or a diagonal of the same type wins

class TicTacToe():
    # initialize game with some state
    def __init__(self, state=[[0,0,0],[0,0,0],[0,0,0]]):
        self.state = state

# make a real move: set val to the cell
# with coordinates [row, col]
def make_move(state, row, col, val):
        made_move = False
        if (isinstance(row, int)) and (row>=0) and (row<=2):
            if (isinstance(col, int)) and (col>=0) and (col<=2):
                if state[row][col] == 0:
                    if (val == -1) or (val == 1):
                        state[row][col] = val
                        made_move = True

        return made_move

# try a move: set val to the cell
# with coordinates [row, col]
def try_move(state, row, col, val):
        if (isinstance(row, int)) and (row>=0) and (row<=2):
            if (isinstance(col, int)) and (col>=0) and (col<=2):
                if state[row][col] == 0:
                    if (val == -1) or (val == 1):
                        state[row][col] = val
                        
        return state

# check if the terminal node
def terminal_node(state):
        # result of the game
        # win1 = +10, win2 = -10, tie=0
        result = 0
        isGameOver = False
    
        # check if there is an empty cell
        emptyCells = False
        for i in range(3):
            for j in range(3):
                if state[i][j] == 0:
                    emptyCells = True

        # check rows if there is a winner
        isWinner = False
        for i in range(3):
            sum_p1 = 0
            sum_p2 = 0
            for j in range(3):
                if state[i][j] == 1:
                    sum_p1 += 1
                if state[i][j] == -1:
                    sum_p2 += -1
            if (sum_p1 == 3) or (sum_p2 == -3):
                isWinner = True 
                if (sum_p1 == 3):
                    result = 10
                if (sum_p2 == -3):
                    result = -10

        # check cols if there is a winner
        for j in range(3):
            sum_p1 = 0
            sum_p2 = 0
            for i in range(3):
                if state[i][j] == 1:
                    sum_p1 += 1
                if state[i][j] == -1:
                    sum_p2 += -1
            if (sum_p1 == 3) or (sum_p2 == -3):
                isWinner = True 
                if (sum_p1 == 3):
                    result = 10
                if (sum_p2 == -3):
                    result = -10

        # check diagonals if there is a winner
        sum_p1 = 0
        sum_p2 = 0
        for i in range(3):
            if state[i][i] == 1:
                sum_p1 += 1
            if state[i][i] == -1:
                sum_p2 += -1
        if (sum_p1 == 3) or (sum_p2 == -3):
            isWinner = True 
            if (sum_p1 == 3):
               result = 10
            if (sum_p2 == -3):
               result = -10
            
        sum_p1 = 0
        sum_p2 = 0
        for i in range(3):
            if state[i][2-i] == 1:
                sum_p1 += 1
            if state[i][2-i] == -1:
                sum_p2 += -1
        if (sum_p1 == 3) or (sum_p2 == -3):
            isWinner = True 
            if (sum_p1 == 3):
               result = 10
            if (sum_p2 == -3):
               result = -10

        isGameOver = isWinner or not emptyCells
        return {"gameover": isGameOver, "result": result}
                
# find the children of the given state
# returns the coordinates (x,y) of empty cells
def expand_state(state):
    children = []
    for i in range(3):
        for j in range(3):
            if state[i][j] == 0:
                child = [i,j]
                children.append(child)
    return children

In [2]:
# setup the game
state = [[0, 1, 1],[-1,0,1],[0,1,-1]]
x,y = terminal_node(state)
print(x,y)
ch = expand_state(state)
print(ch)

gameover result
[[0, 0], [1, 1], [2, 0]]


## 2. Minimax algorithm

In [3]:
import math, copy

# minimax algorithm for 2 players
count_terminal = 0
def minimax(state, depth, isMaxPlayer):
    global count_terminal
    #print("depth: ", state, depth, isMaxPlayer)
    if (depth == 0) or terminal_node(state)["gameover"]: 
       count_terminal += 1 
       #print("depth: ", state, depth, isMaxPlayer, terminal_node(state)["result"])
       return  terminal_node(state)["result"]

    if isMaxPlayer: # player maximizes his score
        v_max = -math.inf
        children = expand_state(state)
        for pos in children:
            child = copy.deepcopy(state)
            child[pos[0]][pos[1]] = 1
            v = minimax(child, depth - 1, not isMaxPlayer)
            v_max = max(v_max, v)
        return v_max
    else: # player minimizes his score
        v_min = math.inf
        children = expand_state(state)
        for pos in children:
            child = copy.deepcopy(state)
            child[pos[0]][pos[1]] = -1
            v = minimax(child, depth - 1, not isMaxPlayer)
            v_min = min(v_min, v)
        return v_min

In [4]:
count_terminal = 0
state = [[0,0,0],[0,0,-1],[0,0,0]]
depth = 8
isMaxPlayer = True
v = minimax(state, depth, isMaxPlayer)
print(v)
print(count_terminal)

0
29592


## 3. Alpha-Beta algorithm

In [5]:
import math, copy

# minimax algorithm for 2 players
count_terminal = 0
def alphabeta(state, depth, alpha, beta, isMaxPlayer):
    global count_terminal
    if (depth == 0) or terminal_node(state)["gameover"]: 
       count_terminal += 1
       return  terminal_node(state)["result"]

    if isMaxPlayer: # player maximizes his score
        v_max = -math.inf
        children = expand_state(state)
        for pos in children:
            child = copy.deepcopy(state)
            child[pos[0]][pos[1]] = 1
            v = alphabeta(child, depth - 1, alpha, beta, not isMaxPlayer)
            v_max = max(v_max, v)
            alpha = max(alpha, v)
            if beta <= alpha:
                break
        return v_max
    else: # player minimizes his score
        v_min = math.inf
        children = expand_state(state)
        for pos in children:
            child = copy.deepcopy(state)
            child[pos[0]][pos[1]] = -1
            v = alphabeta(child, depth - 1, alpha, beta, not isMaxPlayer)
            v_min = min(v_min, v)
            beta = min(beta, v)
            if beta <= alpha:
                break
        return v_min

In [6]:
count_terminal = 0
state = [[0, 0, 0],[0,0,0],[0,0,-1]]
depth = 8
alpha = -math.inf
beta = math.inf
isMaxPlayer = True
v = alphabeta(state, depth, alpha, beta, isMaxPlayer)
print(v)
print(count_terminal)

0
1622
