Copyright **`(c)`** 2021 Giovanni Squillero `<squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see 'LICENCE.md' for details.

# Connect 4

In [None]:
from collections import Counter
import numpy as np
import time

In [None]:
NUM_COLUMNS = 7
COLUMN_HEIGHT = 6
FOUR = 4

# Board can be initiatilized with `board = np.zeros((NUM_COLUMNS, COLUMN_HEIGHT), dtype=np.byte)`
# Notez Bien: Connect 4 "columns" are actually NumPy "rows"

## Basic Functions

In [None]:
def valid_moves(board):
    """Returns columns where a disc may be played"""
    return [n for n in range(NUM_COLUMNS) if board[n, COLUMN_HEIGHT - 1] == 0]


def play(board, column, player):
    """Updates `board` as `player` drops a disc in `column`"""
    (index,) = next((i for i, v in np.ndenumerate(board[column]) if v == 0))
    board[column, index] = player


def take_back(board, column):
    """Updates `board` removing top disc from `column`"""
    (index,) = [i for i, v in np.ndenumerate(board[column]) if v != 0][-1]
    board[column, index] = 0


def four_in_a_row(board, player):
    """Checks if `player` has a 4-piece line"""
    return (
        any(
            all(board[c, r] == player)
            for c in range(NUM_COLUMNS)
            for r in (list(range(n, n + FOUR)) for n in range(COLUMN_HEIGHT - FOUR + 1))
        )
        or any(
            all(board[c, r] == player)
            for r in range(COLUMN_HEIGHT)
            for c in (list(range(n, n + FOUR)) for n in range(NUM_COLUMNS - FOUR + 1))
        )
        or any(
            np.all(board[diag] == player)
            for diag in (
                (range(ro, ro + FOUR), range(co, co + FOUR))
                for ro in range(0, NUM_COLUMNS - FOUR + 1)
                for co in range(0, COLUMN_HEIGHT - FOUR + 1)
            )
        )
        or any(
            np.all(board[diag] == player)
            for diag in (
                (range(ro, ro + FOUR), range(co + FOUR - 1, co - 1, -1))
                for ro in range(0, NUM_COLUMNS - FOUR + 1)
                for co in range(0, COLUMN_HEIGHT - FOUR + 1)
            )
        )
    )

## Montecarlo Evaluation

In [None]:
def _mc(board, player):
    p = -player
    while valid_moves(board):
        p = -p
        valid_moves_list = valid_moves(board)
        value = np.random.choice(valid_moves_list)
        stop_v_p = [(r, c[0]) for r in range(NUM_COLUMNS)
                    for c in (list(range(n, n + 3)) for n in range(COLUMN_HEIGHT - 4 + 1))
                    if all(board[r, c] == p)]
        stop_v = []
        for r, c in stop_v_p:
          if(c+3 < COLUMN_HEIGHT and board[r, c+3] == 0):
            stop_v.append(r)

        stop_h_p = [(r, c[0]) for r in range(COLUMN_HEIGHT)
                    for c in (list(range(n, n + 2)) for n in range(NUM_COLUMNS - 4 + 1))
                    if all(board[c, r] == p)]
        stop_h = []
        for r, c in stop_h_p:
          if(r-1 >= 0 and r-1 in valid_moves_list and board[r-1, c] == 0 and (c-1 == -1 or board[r-1, c-1] != 0)):
            stop_h.append(r-1)
          if(r+2 < NUM_COLUMNS and r+2 in valid_moves_list and board[r+2, c] == 0 and (c-1 == -1 or board[r+2, c-1] != 0)):
            stop_h.append(r+2)

        stop_d1_p = [(diag[0][0], diag[1][0]) for diag in ((range(ro, ro + 3), range(co, co + 3))
                      for ro in range(0, NUM_COLUMNS - FOUR + 1)
                      for co in range(0, COLUMN_HEIGHT - FOUR + 1))
                    if np.all(board[diag] == p)]
        stop_d1 = []
        for r, c in stop_d1_p:
          if(r-1 in valid_moves_list and r-1 >= 0 and c-1 >= 0 and board[r-1, c-1] == 0 and (c-2 == -1 or board[r-1, c-2] != 0)):
            stop_d1.append(r-1)
          if(r+3 in valid_moves_list and r+3 < NUM_COLUMNS and c+3 < COLUMN_HEIGHT and board[r+3, c+3] == 0 and (c-2 == -1 or board[r+3, c+2] != 0)):
            stop_d1.append(r+3)

        stop_d2_p = [(diag[0][0], diag[1][0]) for diag in ((range(ro, ro + 3), range(co + 3 - 1, co - 1, -1))
                      for ro in range(0, NUM_COLUMNS - FOUR + 1)
                      for co in range(0, COLUMN_HEIGHT - FOUR + 1))
                    if np.all(board[diag] == p)]
        stop_d2 = []
        for r, c in stop_d2_p:
          if(r-1 in valid_moves_list and r-1 >= 0 and c+1 < COLUMN_HEIGHT and board[r-1, c+1] == 0 and (c-4 == -1 or board[r-1, c-4] != 0)):
            stop_d2.append(r-1)
          if(r+3 in valid_moves_list and r+3 < NUM_COLUMNS and c-3 >= 0 and board[r+3, c-3] == 0 and (c-4 == -1 or board[r+3, c-4] != 0)):
            stop_d2.append(r+3)
        stopping_list = stop_v + stop_h + stop_d1 + stop_d2
        stopping_list = Counter(stopping_list)
        #print(stop_v, stop_h, stop_d1, stop_d2, "  ", stopping_list)
        if len(stopping_list) != 0 :
          # if the argument of most common is > 1, the AI can make sub-optimal choices (human-like)
          candidates, _ = zip(*stopping_list.most_common(1))
          t = np.random.choice(candidates)
          if t in valid_moves_list:
            value = t
        play(board, value, p)
        if four_in_a_row(board, p):
            return p
    return 0


def montecarlo(board, player):
    montecarlo_samples = 100
    cnt = Counter(_mc(np.copy(board), player) for _ in range(montecarlo_samples))
    return (cnt[1] - cnt[-1]) / montecarlo_samples


def eval_board(board, player):
    if four_in_a_row(board, 1):
        # Alice won
        return 1
    elif four_in_a_row(board, -1):
        # Bob won
        return -1
    else:
        # Not terminal, let's simulate...
        return montecarlo(board, player)

## Implementation

In [None]:
def print_board(board):
  rotBoard = np.rot90(board, 1)
  print("--------------------")
  for r in range(COLUMN_HEIGHT):
    print("|", end="")
    for c in range(NUM_COLUMNS):
      if (rotBoard[r][c] == 1):
        print("🔵|", end="")
      elif (rotBoard[r][c] == -1):
        print("🔴|", end="")
      else:
        print("⚪|", end="")
    print("\n-------------------")


In [None]:
board = np.zeros((NUM_COLUMNS, COLUMN_HEIGHT), dtype=np.byte)
player = 1
turn = 0

while not four_in_a_row(board, 1) and not four_in_a_row(board, -1):
  turn += 1
  t1 = time.time()
  valid_moves_list = valid_moves(board)
  possible_moves = np.array([-2.0 for i in range(NUM_COLUMNS)])
  for i in valid_moves_list:
    play(board, i, player)
    eval1 = eval_board(board, player)
    #eval2 = eval_board(board, -player)
    possible_moves.put(i, eval1*player)
    take_back(board, i)
  best_moves_list = [m for m in valid_moves_list if possible_moves[m] == np.max(possible_moves)]
  best_move = np.random.choice(best_moves_list)
  t2 = time.time()
  print("Probaility Moves Vector", possible_moves, "Chosen Move", best_move, "Player", player, "Turn Time", t2-t1, "Turn", turn)
  play(board, best_move, player)
  player = -player
  print_board(board)

if(player == -1):
  print(f"Blue player won at turn {turn}!")
else:
  print(f"Red player won at turn {turn}!")

Probaility Moves Vector [0.4  0.52 0.34 0.46 0.28 0.24 0.36] Chosen Move 1 Player 1 Turn Time 18.317399740219116 Turn 1
--------------------
|⚪|⚪|⚪|⚪|⚪|⚪|⚪|
-------------------
|⚪|⚪|⚪|⚪|⚪|⚪|⚪|
-------------------
|⚪|⚪|⚪|⚪|⚪|⚪|⚪|
-------------------
|⚪|⚪|⚪|⚪|⚪|⚪|⚪|
-------------------
|⚪|⚪|⚪|⚪|⚪|⚪|⚪|
-------------------
|⚪|🔵|⚪|⚪|⚪|⚪|⚪|
-------------------
Probaility Moves Vector [ 0.14  0.3   0.2   0.38  0.22  0.28 -0.02] Chosen Move 3 Player -1 Turn Time 17.617565631866455 Turn 2
--------------------
|⚪|⚪|⚪|⚪|⚪|⚪|⚪|
-------------------
|⚪|⚪|⚪|⚪|⚪|⚪|⚪|
-------------------
|⚪|⚪|⚪|⚪|⚪|⚪|⚪|
-------------------
|⚪|⚪|⚪|⚪|⚪|⚪|⚪|
-------------------
|⚪|⚪|⚪|⚪|⚪|⚪|⚪|
-------------------
|⚪|🔵|⚪|🔴|⚪|⚪|⚪|
-------------------
Probaility Moves Vector [0.1  0.26 0.46 0.28 0.16 0.22 0.14] Chosen Move 2 Player 1 Turn Time 16.238693475723267 Turn 3
--------------------
|⚪|⚪|⚪|⚪|⚪|⚪|⚪|
-------------------
|⚪|⚪|⚪|⚪|⚪|⚪|⚪|
-------------------
|⚪|⚪|⚪|⚪|⚪|⚪|⚪|
-------------------
|⚪|⚪|⚪|⚪|⚪|⚪|⚪|
-------------