[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/real-itu/modern-ai-course/blob/master/lecture-02/lab.ipynb)

# Lab 2 - Adversarial Search

[Connect 4](https://en.wikipedia.org/wiki/Connect_Four) is a classic board game in which 2 players alternate placing markers in columns, and the goal is to get 4 in a row, either horizontally, vertically or diagonally. See the short video below

In [None]:
from IPython.display import YouTubeVideo
YouTubeVideo("ylZBRUJi3UQ")

The game is implemented below. It will play a game where both players take random (legal) actions. The MAX player is represented with a X and the MIN player with an O. The MAX player starts. Execute the code.

In [None]:
import random
from copy import deepcopy
from typing import Sequence

NONE = '.'
MAX = 'X'
MIN = 'O'
COLS = 7
ROWS = 6
N_WIN = 4


class ArrayState:
    def __init__(self, board, heights, n_moves):
        self.board = board
        self.heights = heights
        self.n_moves = n_moves

    @staticmethod
    def init():
        board = [[NONE] * ROWS for _ in range(COLS)]
        return ArrayState(board, [0] * COLS, 0)


def result(state: ArrayState, action: int) -> ArrayState:
    """Insert in the given column."""
    assert 0 <= action < COLS, "action must be a column number"

    if state.heights[action] >= ROWS:
        raise Exception('Column is full')

    player = MAX if state.n_moves % 2 == 0 else MIN

    board = deepcopy(state.board)
    board[action][ROWS - state.heights[action] - 1] = player

    heights = deepcopy(state.heights)
    heights[action] += 1

    return ArrayState(board, heights, state.n_moves + 1)


def actions(state: ArrayState) -> Sequence[int]:
    return [i for i in range(COLS) if state.heights[i] < ROWS]


def utility(state: ArrayState) -> float:
    """Get the winner on the current board."""

    board = state.board

    def diagonalsPos():
        """Get positive diagonals, going from bottom-left to top-right."""
        for di in ([(j, i - j) for j in range(COLS)] for i in range(COLS + ROWS - 1)):
            yield [board[i][j] for i, j in di if i >= 0 and j >= 0 and i < COLS and j < ROWS]

    def diagonalsNeg():
        """Get negative diagonals, going from top-left to bottom-right."""
        for di in ([(j, i - COLS + j + 1) for j in range(COLS)] for i in range(COLS + ROWS - 1)):
            yield [board[i][j] for i, j in di if i >= 0 and j >= 0 and i < COLS and j < ROWS]

    lines = board + \
            list(zip(*board)) + \
            list(diagonalsNeg()) + \
            list(diagonalsPos())

    max_win = MAX * N_WIN
    min_win = MIN * N_WIN
    for line in lines:
        str_line = "".join(line)
        if max_win in str_line:
            return 1
        elif min_win in str_line:
            return -1
    return 0


def terminal_test(state: ArrayState) -> bool:
    return state.n_moves >= COLS * ROWS or utility(state) != 0


def printBoard(state: ArrayState):
    board = state.board
    """Print the board."""
    print('  '.join(map(str, range(COLS))))
    for y in range(ROWS):
        print('  '.join(str(board[x][y]) for x in range(COLS)))
    print()


if __name__ == '__main__':
    s = ArrayState.init()
    while not terminal_test(s):
        a = random.choice(actions(s))
        s = result(s, a)
        printBoard(s)
    print(utility(s))


The last number 0, -1 or 1 is the utility or score of the game. 0 means it was a draw, 1 means MAX player won and -1 means MIN player won.

### Exercise 1

Modify the code so that you can play manually as the MIN player against the random AI.

In [None]:
if __name__ == '__main__':
    s = ArrayState.init()
    player = random.choice(['AI','Programmer'])
    while not terminal_test(s):
        print(utility(s))
        if player == 'AI':
            a = random.choice(actions(s))
            s = result(s, a)
            printBoard(s)
            player = 'Programmer'
        else:
            a = input()
            a = int(a)
            while not (a >= 0 and a <= 6):
                print('Invalid value, insert new value')
                a = input()
                a = int(a)
            s = result(s, a)
            printBoard(s)
            player = 'AI'
    print(utility(s))


### Exercise 2

Implement standard minimax with a fixed depth search. Modify the utility function to handle non-terminal positions using heuristics. Find a value for the depth such that moves doesn't take longer than approx. 1s to evaluate. See if you can beat your connect4 AI.

In [None]:
def check_utility(state: ArrayState, action, isMaximizingPlayer) -> int:
    """Get the winner on the current board."""

    old_board = state.board
    
    st = result(state, action)
    new_board = st.board
    

    def diagonalsPos(board):
        """Get positive diagonals, going from bottom-left to top-right."""
        for di in ([(j, i - j) for j in range(COLS)] for i in range(COLS + ROWS - 1)):
            yield [board[i][j] for i, j in di if i >= 0 and j >= 0 and i < COLS and j < ROWS]

    def diagonalsNeg(board):
        """Get negative diagonals, going from top-left to bottom-right."""
        for di in ([(j, i - COLS + j + 1) for j in range(COLS)] for i in range(COLS + ROWS - 1)):
            yield [board[i][j] for i, j in di if i >= 0 and j >= 0 and i < COLS and j < ROWS]

    lines_new = new_board + \
            list(zip(*new_board)) + \
            list(diagonalsNeg(new_board)) + \
            list(diagonalsPos(new_board))

    lines_old = old_board + \
            list(zip(*old_board)) + \
            list(diagonalsNeg(old_board)) + \
            list(diagonalsPos(old_board))
    
    if isMaximizingPlayer:
        piece = MAX
    else:
        piece = MIN
        
    value = 4
    while value > 0:
        for line_new, line_old in zip(lines_new,lines_old):
            str_line_new = "".join(line_new)
            str_line_old = "".join(line_old)
            
            if (piece*value in str_line_new) and (piece*value not in str_line_old):
                return value
        value -= 1
        
    return value

In [None]:
def minimax(board, isMaximizingPlayer):
    move = []
    best_val = -float('inf')
    for action in actions(board):
        val = check_utility(board, action, isMaximizingPlayer)
        if val > best_val:
            move = [action]
            best_val = val
        elif val == best_val:
            move.append(action)
    
    if len(move) > 1:
        move = [random.choice(move)]
            
    return move[0]

In [None]:
if __name__ == '__main__':
    s = ArrayState.init()
    player = random.choice(['AI','Programmer'])
    if player == 'AI':
        isMaximizingPlayer = True
    else:
         isMaximizingPlayer = False
    while not terminal_test(s):
        if player == 'AI':
            _ , move = minimax(s, isMaximizingPlayer)
            s = result(s, move)
            printBoard(s)
            player = 'Programmer'
        else:
            a = input()
            a = int(a)
            while not (a >= 0 and a <= 6):
                print('Invalid value, insert new value')
                a = input()
                a = int(a)
            s = result(s, a)
            printBoard(s)
            player = 'AI'
    print(utility(s))


## Deep Minimax

In [None]:
def check_utility(state: ArrayState, old_board) -> int:
    """Get the winner on the current board."""
    
    board = state.board
    
    def diagonalsPos(board):
        """Get positive diagonals, going from bottom-left to top-right."""
        for di in ([(j, i - j) for j in range(COLS)] for i in range(COLS + ROWS - 1)):
            yield [board[i][j] for i, j in di if i >= 0 and j >= 0 and i < COLS and j < ROWS]

    def diagonalsNeg(board):
        """Get negative diagonals, going from top-left to bottom-right."""
        for di in ([(j, i - COLS + j + 1) for j in range(COLS)] for i in range(COLS + ROWS - 1)):
            yield [board[i][j] for i, j in di if i >= 0 and j >= 0 and i < COLS and j < ROWS]

    lines_new = board + \
            list(zip(*board)) + \
            list(diagonalsNeg(board)) + \
            list(diagonalsPos(board))
    
    if old_board != None:
        lines_old = old_board + \
                list(zip(*old_board)) + \
                list(diagonalsNeg(old_board)) + \
                list(diagonalsPos(old_board))

    
                
    
    win = 0
    for line in lines_new:
        str_line = "".join(line)
        if (MAX*4 in str_line):
            win += 20
        if (MIN*4 in str_line):
            win -= 20
    
    if old_board == None:
        return win
    
    utility = win
    for line_val in [2,3]:
        for line_new, line_old in zip(lines_new,lines_old):
            str_line_new = "".join(line_new)
            str_line_old = "".join(line_old)
            
            if (MAX*line_val in str_line_new) and (MAX*line_val not in str_line_old):
                utility += line_val
            if (MIN*line_val in str_line_new) and (MIN*line_val not in str_line_old):
                utility -= line_val
                
              
        
    return utility

In [None]:
def minimax(board, maxDepth, old_board, is_Maximizing_Player):
    if maxDepth == 0 or terminal_test(board):
        return check_utility(board, old_board)#check_utility(board)
    
    
    if is_Maximizing_Player:
        best = -float('inf')
        for action in actions(board):
            child = deepcopy(result(board, action))
            best = max(best, minimax(child, maxDepth -1, old_board, False))

        return best
    
    
    else: # Minimizing player
        best = float('inf')
        for action in actions(board):
            child = deepcopy(result(board, action))
            best = min(best, minimax(child, maxDepth -1, old_board, True))
        return best

In [None]:
import time

if __name__ == '__main__':
    s = ArrayState.init()
    depth = 5
    player = random.choice(['AI','Programmer'])
    if player == 'AI':
        isMaximizingPlayer = True
    else:
         isMaximizingPlayer = False
            
    while not terminal_test(s):
        if player == 'AI':
            start_time = time.time()
            if isMaximizingPlayer:
                actions_s = actions(s)
                best_val = -float('inf')
                values = []
                for action in actions_s:
                    old_board = deepcopy(s.board)
                    board = deepcopy(result(s, action))
                    value = minimax(board, depth, old_board, False)
                    values.append(value)
                    if value > best_val:
                        best_val = value
                        move = action
                 
                # Check the values
                print(values)
                # check if all have same utility
                all_best = []
                for i in range(len(values)):
                    if values[i] == best_val:
                        all_best.append(actions_s[i])
                        
                if len(all_best) > 1:
                    move = random.choice(all_best)
                
            else:
                actions_s = actions(s)
                best_val = float('inf')
                values = []
                for action in actions_s:
                    old_board = deepcopy(s.board)
                    board = deepcopy(result(s, action))
                    value = minimax(board, depth, old_board, True)
                    values.append(value)
                    if value < best_val:
                        best_val = value
                        move = action
                
                # Check the values
                print(values)
                # check if all have same utility
                all_best = []
                for i in range(len(values)):
                    if values[i] == best_val:
                        all_best.append(actions_s[i])
                        
                if len(all_best) > 1:
                    move = random.choice(all_best)
            print("--- %s seconds ---" % (time.time() - start_time))    
            s = result(s, move)
            printBoard(s)
            player = 'Programmer'
        else:
            a = input()
            a = int(a)
            while not (a >= 0 and a <= 6):
                print('Invalid value, insert new value')
                a = input()
                a = int(a)
            s = result(s, a)
            printBoard(s)
            player = 'AI'
    print(check_utility(s, None))


Depth = 5 for the one minute maximum

### Exercise 3

Add alpha/beta pruning to your minimax. Change your depth so that moves still takes approx 1 second to evaluate. How much deeper can you search? See if you can beat your connect4 AI.

In [None]:
def check_utility(state: ArrayState) -> int:
    """Get the winner on the current board."""
    
    board = state.board
    
    def diagonalsPos(board):
        """Get positive diagonals, going from bottom-left to top-right."""
        for di in ([(j, i - j) for j in range(COLS)] for i in range(COLS + ROWS - 1)):
            yield [board[i][j] for i, j in di if i >= 0 and j >= 0 and i < COLS and j < ROWS]

    def diagonalsNeg(board):
        """Get negative diagonals, going from top-left to bottom-right."""
        for di in ([(j, i - COLS + j + 1) for j in range(COLS)] for i in range(COLS + ROWS - 1)):
            yield [board[i][j] for i, j in di if i >= 0 and j >= 0 and i < COLS and j < ROWS]

    lines = board + \
            list(zip(*board)) + \
            list(diagonalsNeg(board)) + \
            list(diagonalsPos(board))
    
    
    
    win = 0
    for line in lines:
        str_line = "".join(line)
        if (MAX*4 in str_line):
            win += 20
        if (MIN*4 in str_line):
            win -= 20
            
    if win != 0:
        return win * 3
    
    utility = win
    for value in [2,3]:
        for line in lines:
            str_line = "".join(line)
            
            if (MAX*value in str_line):
                utility += value
            if (MIN*value in str_line):
                utility -= value
            
        
    return utility

In [None]:
def alphabeta(board, maxDepth, alpha, beta, is_Maximizing_Player):
    if maxDepth == 0 or terminal_test(board):
        return check_utility(board)#check_utility(board)
    
    
    if is_Maximizing_Player:
        best_val = -float('inf')
        for action in actions(board):
            child = deepcopy(result(board, action))
            best_val = max(best_val, alphabeta(child, maxDepth -1, alpha, beta, False))
            alpha = max(alpha, best_val)
            if best_val >= beta:
                break
        return best_val
    
    
    else: # Minimizing player
        best_val = float('inf')
        for action in actions(board):
            child = deepcopy(result(board, action))
            best_val = min(best_val, alphabeta(child, maxDepth -1, alpha, beta, True))
            beta = min(beta, best_val)
            if best_val <= alpha:
                break
        return best_val

In [None]:
def make_move(isMaximizingPlayer, s):
    if isMaximizingPlayer:
            actions_s = actions(s)
            best_val = -float('inf')
            values = []
            for action in actions_s:
                old_board = deepcopy(s.board)
                board = deepcopy(result(s, action))
                value = alphabeta(board, depth, -float('inf'), float('inf'), False)
                values.append(value)
                if value > best_val:
                    best_val = value
                    move = action

            # Check the values
            # check if all have same utility
            all_best = []
            for i in range(len(values)):
                if values[i] == best_val:
                    all_best.append(actions_s[i])

            if len(all_best) > 1:
                move = random.choice(all_best)

    else:
        actions_s = actions(s)
        best_val = float('inf')
        values = []
        for action in actions_s:
            old_board = deepcopy(s.board)
            board = deepcopy(result(s, action))
            value = alphabeta(board, depth, -float('inf'), float('inf'), True)
            values.append(value)
            if value < best_val:
                best_val = value
                move = action

        # Check the values
        # check if all have same utility
        all_best = []
        for i in range(len(values)):
            if values[i] == best_val:
                all_best.append(actions_s[i])

        if len(all_best) > 1:
            move = random.choice(all_best)
                
    return move

In [None]:
import time

if __name__ == '__main__':
    s = ArrayState.init()
    depth = 6
    player = random.choice(['AI','Programmer'])
    if player == 'AI':
        isMaximizingPlayer = True
    else:
         isMaximizingPlayer = False
            
    while not terminal_test(s):
        if player == 'AI':
            start_time = time.time()
            move = make_move(isMaximizingPlayer, s)
            print("--- %s seconds ---" % (time.time() - start_time))    
            s = result(s, move)
            printBoard(s)
            player = 'Programmer'
        else:
            start_time = time.time()
            a = make_move(not isMaximizingPlayer, s)
            print("--- %s seconds ---" % (time.time() - start_time))  
            s = result(s, a)
            printBoard(s)
            player = 'AI'
    print(check_utility(s))


### Exercise 4

Add move ordering. The middle columns are often "better" since there's more winning positions that contain them. Evaluate the moves in this order: [3,2,4,1,5,0,6]. How much deeper can you search now? See if you can beat your connect4 AI

In [None]:
def make_move_optim(isMaximizingPlayer, s):
    actions_s = actions(s)
    actions_rea = [3, 4, 2, 5, 1, 6, 0]
    actions_sorted = [n for n in actions_rea if n in actions_s]
    
    if isMaximizingPlayer:           
            best_val = -float('inf')
            for action in actions_sorted:
                old_board = deepcopy(s.board)
                board = deepcopy(result(s, action))
                value = alphabeta(board, depth, -float('inf'), float('inf'), False)
                if value > best_val:
                    best_val = value
                    move = action

    else:
        best_val = float('inf')
        for action in actions_sorted:
            old_board = deepcopy(s.board)
            board = deepcopy(result(s, action))
            value = alphabeta(board, depth, -float('inf'), float('inf'), True)
            if value < best_val:
                best_val = value
                move = action

    return move

In [None]:
import time

if __name__ == '__main__':
    s = ArrayState.init()
    depth = 7
    player = random.choice(['AI','Programmer'])
    if player == 'AI':
        isMaximizingPlayer = True
    else:
         isMaximizingPlayer = False
            
    while not terminal_test(s):
        if player == 'AI':
            start_time = time.time()
            move = make_move_optim(isMaximizingPlayer, s)
            print("--- %s seconds ---" % (time.time() - start_time))    
            s = result(s, move)
            printBoard(s)
            player = 'Programmer'
        else:
            start_time = time.time()
            a = make_move_optim(not isMaximizingPlayer, s)
            print("--- %s seconds ---" % (time.time() - start_time))  
            s = result(s, a)
            printBoard(s)
            player = 'AI'
    print(check_utility(s))


### Exercise 5 - Optional

Improve your AI somehow. Consider 


* Better heuristics
* Faster board representations (look up bitboards)
* Adding a transposition table (see class below)
* Better move ordering

In [None]:
class TranspositionTable:

    def __init__(self, size=1_000_000):
        self.size = size
        self.vals = [None] * size

    def board_str(self, state: ArrayState):
        return ''.join([''.join(c) for c in state.board])

    def put(self, state: ArrayState, utility: float):
        bstr = self.board_str(state)
        idx = hash(bstr) % self.size
        self.vals[idx] = (bstr, utility)

    def get(self, state: ArrayState):
        bstr = self.board_str(state)
        idx = hash(bstr) % self.size
        stored = self.vals[idx]
        if stored is None:
            return None
        if stored[0] == bstr:
            return stored[1]
        else:
            return None