In [103]:
import numpy as np
import random

In [104]:
NUM_COLUMNS = 7
COLUMN_HEIGHT = 6
FOUR = 4
ITERATIONS = 5
Player1 = 1
Player2 = 2
node = None

# Board can be initiatilized with `board = np.zeros((NUM_COLUMNS, COLUMN_HEIGHT), dtype=np.byte)`
# Notez Bien: Connect 4 "columns" are actually NumPy "rows"

In [105]:
def valid_moves(board):
    """Returns columns where a disc may be played"""
    return [n for n in range(NUM_COLUMNS) if board[n, COLUMN_HEIGHT - 1] == 0]


def play(board, column, player):
    """Updates `board` as `player` drops a disc in `column`"""
    (index,) = next((i for i, v in np.ndenumerate(board[column]) if v == 0))
    board[column, index] = player


def take_back(board, column):
    """Updates `board` removing top disc from `column`"""
    (index,) = [i for i, v in np.ndenumerate(board[column]) if v != 0][-1]
    board[column, index] = 0


def four_in_a_row(board, player):
    """Checks if `player` has a 4-piece line"""
    return (
        any(
            all(board[c, r] == player)
            for c in range(NUM_COLUMNS)
            for r in (list(range(n, n + FOUR)) for n in range(COLUMN_HEIGHT - FOUR + 1))
        )
        or any(
            all(board[c, r] == player)
            for r in range(COLUMN_HEIGHT)
            for c in (list(range(n, n + FOUR)) for n in range(NUM_COLUMNS - FOUR + 1))
        )
        or any(
            np.all(board[diag] == player)
            for diag in (
                (range(ro, ro + FOUR), range(co, co + FOUR))
                for ro in range(0, NUM_COLUMNS - FOUR + 1)
                for co in range(0, COLUMN_HEIGHT - FOUR + 1)
            )
        )
        or any(
            np.all(board[diag] == player)
            for diag in (
                (range(ro, ro + FOUR), range(co + FOUR - 1, co - 1, -1))
                for ro in range(0, NUM_COLUMNS - FOUR + 1)
                for co in range(0, COLUMN_HEIGHT - FOUR + 1)
            )
        )
    )
def eval_terminal(board,hero, villain):
    if four_in_a_row(board,hero):
        return 1
    elif four_in_a_row(board,villain):
        return -1
    else:
        return 0

'''def minimax(state, max_depth, is_player_minimizer):
    if max_depth == 0 or state.is_end_state():
        # We're at the end. Time to evaluate the state we're in
        return evaluation_function(state)

    # Is the current player the minimizer?
    if is_player_minimizer:
        value = -math.inf
        for move in state.possible_moves():
            evaluation = minimax(move, max_depth - 1, False)
            min = min(value, evaluation)
        return value

    # Or the maximizer?
    value = math.inf
    for move in state.possible_moves():
        evaluation = minimax(move, max_depth - 1, True)
        max = max(value, evaluation)
    return value'''

def minmax(board,hero, villain,iterations):
    if iterations == 0:
        return None,0
    val = eval_terminal(board,hero, villain)
    possible = valid_moves(board)
    if val != 0 or not possible:
        return None, val
    evaluations = list()
    for ply in possible:
        play(board, ply, hero)
        _, val = minmax(board,villain,hero,iterations-1)
        take_back(board, ply)
        evaluations.append((ply, -val))
        if val==-1:
            break
        
    return max(evaluations, key=lambda k: k[1])
    

In [110]:
class Board(object):
  def __init__(self):
    self.__last_row = 0
    self.__last_column = 0
    self.board = np.zeros((NUM_COLUMNS, COLUMN_HEIGHT), dtype=np.byte)

  def make_copy(self):
    b = Board()
    for row in range(NUM_COLUMNS):
      for col in range(COLUMN_HEIGHT):
        b.board[row][col] = self.board[row][col]
    return b

  def add_piece(self, piece, column):
    if self.__column_filled(column):
      return False

    for row in range(1, len(self.board)):
      if self.board[row][column] != 0:
        self.board[row - 1][column] = piece
        self.__last_row = row - 1
        self.__last_column = column
        return True

    # If this is the first piece in the column
    self.board[-1][column] = piece
    self.__last_row = len(self.board) - 1
    self.__last_column = column
    return True

  @property
  def coordinate_of_most_recent_piece(self):
    return (self.__last_row, self.__last_column)

  @property
  def last_added_piece(self):
    return self.board[self.__last_row][self.__last_column]

  """
  This function gives the weights to wins(1.0), losses(0.0), and draws(0.5)
  """
  def get_result_for_player(self, player_piece):
    if self.is_draw():
      return 0.5
    elif self.last_added_piece == player_piece:
      return 1.0
    else:
      return 0.0

  def is_draw(self):
    not self.winner_found and self.spaces_left == 0

  def winner_found(self):
    row, column = self.coordinate_of_most_recent_piece
    return self.vertical_winner(column) or self.horizontal_winner(row) or self.diagonal_winner(row, column)

  def possible_moves(self, piece):
    if four_in_a_row(self.board,piece):
      return []
    return  valid_moves(self.board) #[i for i in range(COLUMN_HEIGHT) if not self.__column_filled(i)]

  def spaces_left(self):
    count = 0
    for row in self.board:
      for space in row:
        if space == 0:
          count += 1
    return count

  def __column_filled(self, col):
    for row in self.board:
      if row[col] == 0:
        return False
    return True

  '''def vertical_winner(self, column):
    for row in xrange(len(self.board)-3):
      if (self.board[row][column] == 'X' and
          self.board[row+1][column] == 'X' and
          self.board[row+2][column] == 'X' and
          self.board[row+3][column] == 'X'):
        return True
      if (self.board[row][column] == 'O' and
          self.board[row+1][column] == 'O' and
          self.board[row+2][column] == 'O' and
          self.board[row+3][column] == 'O'):
        return True
    return False


  def horizontal_winner(self, row):
    for col in xrange(len(self.board[row])-3):
      if (self.board[row][col] == 'X' and
          self.board[row][col+1] == 'X' and
          self.board[row][col+2] == 'X' and
          self.board[row][col+3] == 'X'):
        return True
      if (self.board[row][col] == 'O' and
          self.board[row][col+1] == 'O' and
          self.board[row][col+2] == 'O' and
          self.board[row][col+3] == 'O'):
        return True
    return False


  def diagonal_winner(self, row, column):
    return self.__check_upper_right_diagonal(row, column) or self.__check_upper_left_diagonal(row, column)

  def __check_upper_left_diagonal(self, row, column):
    tmp_row, tmp_col = row, column
    run = []
    while tmp_row < ROWS-1 and tmp_col < COLUMNS-1:
      tmp_row += 1
      tmp_col += 1
    while tmp_col >= 0 and tmp_row >= 0:
      run.append(self.board[tmp_row][tmp_col])
      tmp_row -= 1
      tmp_col -= 1
    return self.check_run(run)

  def __check_upper_right_diagonal(self, row, column):
    tmp_row, tmp_col = row, column
    run = []
    while tmp_col < COLUMNS-1 and tmp_row > 0:
      tmp_col += 1
      tmp_row -= 1

    while tmp_col >= 0 and tmp_row < ROWS:
      run.append(self.board[tmp_row][tmp_col])
      tmp_row += 1
      tmp_col -= 1
    return self.check_run(run)


  def check_run(self, run):
    if len(run) < 4:
      return False

    for i in xrange(len(run)-3):
      if (run[i] == 'X' and
          run[i+1] == 'X' and
          run[i+2] == 'X' and
          run[i+3] == 'X'):
        return True
      if (run[i] == 'O' and
          run[i+1] == 'O' and
          run[i+2] == 'O' and
          run[i+3] == 'O'):
        return True
    return False
    '''

In [111]:
class Node:
  def __init__(self, state, piece, column=None, parent=None):
    self.column = column
    self.parentNode = parent
    self.current_player_piece = piece
    self.untried_moves = state.possible_moves(piece)
    self.children = list()
    self.wins = 0
    self.visits = 0

  def uct_select_child(self):
    max_score, max_child = 0, None
    for child in self.children:
      score = child.wins / child.visits + sqrt(2*log(self.visits) / child.visits)
      if score > max_score:
        max_child = child
        max_score = score
    return max_child

  def add_child(self, col, state):
    node = Node(state.make_copy(), self.get_next_piece(self.current_player_piece), column=col, parent=self)
    self.untried_moves.remove(col)
    self.children.append(node)
    return node

  def update(self, result):
    self.visits += 1
    self.wins += result

  def get_next_piece(self, piece):
    if piece == Player1:
      return Player2
    return Player1

In [116]:
class MonteCarlo(object):
  def __init__(self, state, piece, iterations, last_node=None):
    if last_node is not None:
      self.root = last_node
    else:
      self.root = Node(state.make_copy(), piece)
    self.original_state = state
    self.iterations = iterations

  def get_move(self):
    for _ in range(self.iterations):
      node = self.root
      state = self.original_state.make_copy()
      node = self.select(node, state)
      node = self.expand(node, state)
      state = self.rollout(state)
      self.backpropagate(node, state)
    return self.root, sorted(self.root.children, key=lambda c: c.wins/c.visits)[-1].column

  def select(self, node, state):
    while len(node.untried_moves) == 0 and len(node.children) != 0:
      node = node.uct_select_child()
      state.add_piece(node.current_player_piece, node.column)
    return node

  def expand(self, node, state):
    if len(node.untried_moves) != 0:
      col = random.choice(node.untried_moves)
      #play(state.board, node.current_player_piece, col)
      state.add_piece(node.current_player_piece, col)
      node = node.add_child(col, state)
    return node

  def rollout(self, state):
    valid = valid_moves(state.board)
    while len(valid_moves(state.board)) != 0:           ##valid move?!?!?'''state.possible_moves()'''
      column = random.choice(valid_moves(state.board))
      piece = self.__get_next_piece(state.last_added_piece)
      state.add_piece(piece, column)
      #play(state,piece,column)
    return state

  def backpropagate(self, node, state):
    while node is not None:
      node.update(state.get_result_for_player(node.current_player_piece))
      node = node.parentNode

  def __get_next_piece(self, piece):
    if piece == Player1:
      return Player2
    return Player1

In [117]:
BoardObject = Board()
play(BoardObject.board,0,Player1)
play(BoardObject.board,1,Player2)
print(BoardObject.board)
print( )
'''best_ply, eval = minmax(board,Player1,Player2, ITERATIONS)
play(board,best_ply,Player1)
print(board)
print( )

best_ply, eval = minmax(board,Player2,Player1, ITERATIONS)
play(board,best_ply,2)
print(board)
print( )'''
node, column = MonteCarlo(BoardObject.make_copy(), Player1, ITERATIONS, last_node=node).get_move()
print(board)
print( )
node, column = MonteCarlo(BoardObject.make_copy(), Player2, ITERATIONS, last_node=node).get_move()
print(board)
print( )

[[1 0 0 0 0 0]
 [2 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]
 [0 0 0 0 0 0]]



IndexError: index 6 is out of bounds for axis 0 with size 6