Copyright **`(c)`** 2021 Giovanni Squillero `<squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see 'LICENCE.md' for details.

# Connect 4

In [1]:
from collections import Counter
import numpy as np
import math
import copy

In [2]:
class Game:
    def __init__(self, rows=6, cols=7):
        self._rows = rows
        self._cols = cols
        self._nb = 4
        self._board = None
        self._starter = None
        self._turn = None
        self._won = None
        self.last_move = None
        self.create_game()

    def create_game(self):
        """Creates the game state (board and variables)"""
        self._board = self._board = np.zeros((self._cols, self._rows), dtype=np.byte)
        self._starter = 1
        self._turn = self._starter
        self._won = None

    def valid_moves(self):
        """Returns columns where a disc may be played"""
        return [n for n in range(self._cols) if self._board[n, self._rows - 1] == 0]

    def play(self, column, player):
        """Updates `board` as `player` drops a disc in `column`"""
        (index,) = next((i for i, v in np.ndenumerate(self._board[column]) if v == 0))
        self._board[column, index] = player
        self.last_move = [column, index]

    def take_back(self, column):
        """Updates `board` removing top disc from `column`"""
        (index,) = [i for i, v in np.ndenumerate(self._board[column]) if v != 0][-1]
        self._board[column, index] = 0
        self.last_move = None

    def four_in_a_row(self, player):
        """Checks if `player` has a 4-piece line"""
        return (
            any(
                all(self._board[c, r] == player)
                for c in range(self._cols)
                for r in (list(range(n, n + self._nb)) for n in range(self._rows - self._nb + 1))
            )
            or any(
                all(self._board[c, r] == player)
                for r in range(self._rows)
                for c in (list(range(n, n + self._nb)) for n in range(self._cols - self._nb + 1))
            )
            or any(
                np.all(self._board[diag] == player)
                for diag in (
                    (range(ro, ro + self._nb), range(co, co + self._nb))
                    for ro in range(0, self._cols - self._nb + 1)
                    for co in range(0, self._rows - self._nb + 1)
                )
            )
            or any(
                np.all(self._board[diag] == player)
                for diag in (
                    (range(ro, ro + self._nb), range(co + self._nb - 1, co - 1, -1))
                    for ro in range(0, self._cols - self._nb + 1)
                    for co in range(0, self._rows - self._nb + 1)
                )
            )
        )

    def move(self, move):
        self.play(move, self._turn)
        score = self.eval_board(self._turn)
        if score == 1 or score == -1:
            self._won = score
        self._turn = - self._turn

    def _mc(self, player):
        board = self._board.copy()
        p = -player
        while self.valid_moves():
            p = -p
            c = np.random.choice(self.valid_moves())
            self.play(c, p)
            if self.four_in_a_row(p):
                self._board = board
                return p
        self._board = board
        return 0

    def montecarlo(self, player):
        montecarlo_samples = 100
        cnt = Counter(self._mc(player) for _ in range(montecarlo_samples))
        return (cnt[1] - cnt[-1]) / montecarlo_samples

    def eval_board(self, player):
        if self.four_in_a_row(1):
            return 1
        elif self.four_in_a_row(-1):
            return -1
        else:
            return self.montecarlo(player)

    def copy_state(self):
        return copy.deepcopy(self)

    def get_win(self):
        return self._won

    def get_turn(self):
        return self._turn

    def get_board(self):
        return self._board.copy()

## Minimax

Implementation of the MinMax algorithm with varying depth using alpha-beta pruning.

The goal of the MiniMax algorithm is to indicate to a player which move to make in order to maximize its chances of winning the whole game. According to the description of this algorithm, at each turn of a player and starting from the current board game, the code will simulate alternatively the possible moves of the player along with the ones of its adversary with the goal of maximizing the chances of winning of the first while minimizing the ones of the latter. At each depth of the simulation, the algorithm will simulate up to 7 boards, each having a piece dropped in one of the free columns. Through the MonteCarlo Evaluation provided each board will be attributed a score. After creating all the boards of the tree, at each depth, a board will be selected based on its reward and on the type of node (min or max) starting from the bottom of the tree (minimise or maximise the rewards depending on the depth which can be odd or even corresponding to the player or its adversary). The final choice is made among the 7 accessible boards from the current one with the score updated through the reward procedure described above.

To get the adversary to play as an optimal player, each of the two players will be in turn considered as the maximizing player, hence trying to maximise the reward when a max nodes is encountered.

In [3]:
class Minimax:
    def __init__(self, game):
        self._game = game

    def minimax(self, depth, alpha, beta, maximizingPlayer):
        valid_locations = self._game.valid_moves()
        val = self._game.eval_board(self._game.get_turn())

        if depth == 0 or not valid_locations:
            if not valid_locations:
                if self._game.get_win() == self._game.get_turn():
                    return (None, float("inf"))
                elif self._game.get_win() == -self._game.get_turn():
                    return (None, float("-inf"))
                else:
                    return (None, 0)
            else:
                return None, val

        elif maximizingPlayer:
            val = float("-inf")
            column = np.random.choice(valid_locations)
            for l in valid_locations:
                self._game.play(l, self._game.get_turn())
                new_score = self.minimax(depth - 1, alpha, beta, False)[1]
                self._game.take_back(l)
                if new_score > val:
                    val = new_score
                    column = l
                alpha = max(alpha, val)
                if alpha >= beta:
                    break
            return column, val

        else:
            val = float("inf")
            column = np.random.choice(valid_locations)
            for l in valid_locations:
                self._game.play(l, -self._game.get_turn())
                new_score = self.minimax(depth - 1, alpha, beta, True)[1]
                self._game.take_back(l)
                if new_score < val:
                    val = new_score
                    column = l
                beta = min(beta, val)
                if alpha >= beta:
                    break
            return column, val


### AI vs AI

In [4]:
game = Game()
depth = 2

while not game.get_win():
    solver = Minimax(game)
    move, val = solver.minimax(depth, float("-inf"), float("inf"), True)
    game.move(move)

if game.get_win() == 1:
    print("Player 1 won !")
else:
    print("Player 1 lost...")
print(game.get_board())  

Player 1 won !
[[-1 -1  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [-1  0  0  0  0  0]]


### AI vs Human

In [5]:
game = Game()
depth = 2

while not game.get_win():
    solver = Minimax(game)
    move, val = solver.minimax(depth, float("-inf"), float("inf"), True)
    game.move(move)
    print(game.get_board())
    move = int(input())
    game.move(move)

if game.get_win() == 1:
    print("Player 1 won !")
else:
    print("Player 1 lost...")
print(game.get_board()) 

[[0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [1 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]]
0
[[-1  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 0  0  0  0  0  0]]
5
[[-1  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [-1  0  0  0  0  0]
 [ 0  0  0  0  0  0]]
2
[[-1  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [-1  1  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [-1  0  0  0  0  0]
 [ 0  0  0  0  0  0]]
0
[[-1 -1  1  0  0  0]
 [ 1  0  0  0  0  0]
 [-1  1  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [-1  0  0  0  0  0]
 [ 0  0  0  0  0  0]]
5
[[-1 -1  1  0  0  0]
 [ 1  0  0  0  0  0]
 [-1  1  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [-1 -1  1  0  0  0]
 [ 0  0  0  0  0  0]]
4
[[-1 -1  1  0  0  0]
 [ 1  0  0  0  0  0]
 [-1  1  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 1 -1  0  0  0  0]
 [-1 -1  1  1  0  0]
 [ 0  0  0  0  0

## MonteCarlo Tree Search

### Node

To implement the Monte-Carlo Tree Search algorithm, we first need to create the nodes of the tree of boards used during Monte-Carlo Tree Search.

In [6]:
class Node:
    def __init__(self, state, parent=None):
        self.visits = 1
        self.reward = 0.0
        self.state = state
        self.children = []
        self.children_moves = []
        self.parent = parent

    def add_child(self, child_state, move):
        """ Add a child to the current node """
        child = Node(child_state, parent=self)
        self.children.append(child)
        self.children_moves.append(move)

    def update(self, reward):
        """ Update the node's reward """
        self.reward += reward
        self.visits += 1

    def fully_explored(self):
        """ Check if the node is fully explored (i.e. we cannot add any other children to this node) """
        if len(self.children) == len(self.state.valid_moves()):
            return True
        return False

### MCTS

Implementation of the Monte Carlo Tree Search algorithm.

This algorithm performs series of selection, expansion, random simulation and backpropagation for a given number of
iterations (the more iterations, the better the result, but the slower the execution).
A reward system is implemented allowing to decide the best move to make next.

In [23]:
class MonteCarlo:
    def __init__(self, game):
        self._game = game

    def monte_carlo_tree_search(self, iterations, root, exploration_parameter):
        """ Monte-Carlo Tree Search algorithm """
        for i in range(iterations):
            node, turn = self.selection(root, 1, exploration_parameter)
            reward = self.simulation(node.state, turn)
            self.backpropagation(node, reward, turn)

        ans = self.best_child(root, 0)
        return ans.state.last_move[0]

    def selection(self, node, turn, exploration_parameter):
        """ Expand a node and take the best child until a winning state is reached """
        while not node.state.last_move or not node.state.four_in_a_row(turn):
            if not node.fully_explored():
                return self.expansion(node), -turn
            else:
                node = self.best_child(node, exploration_parameter)
                turn *= -1

        return node, turn

    def expansion(self, node):
        """ Add a child state to the node """
        valid_locations = node.state.valid_moves()
        for col in valid_locations:
            if col not in node.children_moves:
                new_state = node.state.copy_state()
                new_state.move(col)
                break

        node.add_child(new_state, col)
        return node.children[-1]

    def simulation(self, state_init, turn):
        """ Simulates random moves until the game is won by someone and returns a reward """
        state = state_init.copy_state()
        while not state.last_move or not state.four_in_a_row(turn):
            free_cols = state.valid_moves()
            col = np.random.choice(free_cols)
            state.move(col)
            turn *= -1

        reward_bool = state.four_in_a_row(turn)
        if reward_bool and turn == -1:
            reward = 1
        elif reward_bool and turn == 1:
            reward = -1
        else:
            reward = 0
        return reward

    def backpropagation(self, node, reward, turn):
        """ Update the rewards of all the ancestors of a node """
        while node is not None:
            node.visits += 1
            node.reward -= turn*reward
            node = node.parent
            turn *= -1
        return

    def best_child(self, node, exploration_parameter):
        """ Returns the best child of a node based on a scoring system proposed by Auer, Cesa-Bianchi and Fischer """
        best_score = float('-inf')
        best_children = []
        for c in node.children:
            exploitation = c.reward / c.visits
            exploration = math.sqrt(math.log(2.0*node.visits)/float(c.visits))
            score = exploitation + exploration_parameter*exploration
            if score == best_score:
                best_children.append(c)
            if score > best_score:
                best_children = [c]
                best_score = score
        res = np.random.choice(best_children)
        return res

In [24]:
game = Game()
iteration = 2

while not game.get_win():
    o = Node(game.copy_state())
    solver = MonteCarlo(game) 
    move = solver.monte_carlo_tree_search(iteration, o, 2.0)
    game.move(move)
    
if game.get_win() == 1:
    print("Player 1 won !")
else:
    print("Player 1 lost...")
print(game.get_board())  

Player 1 won !
[[ 1  0  0  0  0  0]
 [ 1 -1  0  0  0  0]
 [ 1  0  0  0  0  0]
 [ 1  0  0  0  0  0]
 [-1  0  0  0  0  0]
 [ 0  0  0  0  0  0]
 [-1  0  0  0  0  0]]
