In [93]:
import numpy as np
import time

In [94]:
VERY_HIGH = 1e6
VERY_LOW = -1e6

In [95]:
class CeroMataCero():

  def __init__(self, state=None, turn=1.0, depth=0):
    if state is not None: self.matrix = state
    else: self.matrix = np.zeros((3,3))
    self.turn = turn
    self.depth = depth
    self.mappings = {0: "-", 1: "X", -1: "O"}

  def play(self, pos):
    if self.matrix[pos] != 0: return False
    self.matrix[pos] = self.turn
    self.turn = self.whos_next()
    return True

  def __str__(self):
    str_ = []
    for row in self.matrix:
      str_.append(" ".join([self.mappings[val] for val in row]))
    return str("\n".join(str_))

  def __repr__(self):
    return str(self)

  def whos_next(self):
    return self.turn * -1

  def terminal_test(self):
    base_score = (VERY_HIGH - self.depth)
    for i in range(3):
      if len(set(self.matrix[:, i])) == 1 and self.matrix[0, i] != 0.0: return self.matrix[0, i] * base_score
      if len(set(self.matrix[i, :])) == 1 and self.matrix[i, 0] != 0.0: return self.matrix[i, 0] * base_score
    if len(set(self.matrix[np.eye(3, dtype="bool")])) == 1 and self.matrix[1, 1] != 0.0: return self.matrix[1, 1] * base_score
    if len(set(self.matrix[np.fliplr(np.eye(3, dtype="bool"))])) == 1 and self.matrix[1, 1] != 0.0: return self.matrix[1, 1] * base_score
    if (self.matrix != 0).all(): return 0.0
    return None

  def children(self):
    children = []
    for i, row in enumerate(self.matrix):
      for j, val in enumerate(row):
        if val == 0:
          child_state = self.matrix.copy()
          child_state[i, j] = self.turn
          children.append(CeroMataCero(state=child_state, turn=self.whos_next(), depth=self.depth+1))
    return children

  def heuristic(self):
    heuristic = 0
    for i, row in enumerate(self.matrix):
      for j, val in enumerate(row):
        if i > 1:
          if self.matrix[i, j] == self.matrix[i-1, j]: heuristic += self.matrix[i, j]
        if i < len(self.matrix) - 1:
          if self.matrix[i, j] == self.matrix[i+1, j]: heuristic += self.matrix[i, j]
        if j > 1:
          if self.matrix[i, j] == self.matrix[i, j-1]: heuristic += self.matrix[i, j]
        if j < len(self.matrix) - 1:
          if self.matrix[i, j] == self.matrix[i, j+1]: heuristic += self.matrix[i, j]
    return heuristic



In [96]:
class MinimaxSolver():

  def __init__(self, max_depth=10, ts=None, max_time=None, timeit=False):
    self.max_depth = max_depth
    self.ts = ts
    self.max_time = max_time
    self.timeit = timeit

  def __maximize(self, state, alpha, beta, depth):

    if self.timeit:
      if time.time() - self.ts >= self.max_time:
        return (None, -np.inf)

    terminal_val = state.terminal_test()
    if terminal_val is not None:
      return (None, terminal_val)

    if depth >= self.max_depth:
      return (None, state.heuristic())

    max_child, max_utility = (None, -np.inf)

    for child in state.children():

      _, utility = self.__minimize(child, alpha, beta, depth + 1)

      if utility > max_utility:
        max_child, max_utility = child, utility

      if max_utility >= beta:
        break

      alpha = max(alpha, max_utility)

    return max_child, max_utility

  def __minimize(self, state, alpha, beta, depth):

    if self.timeit:
      if time.time() - self.ts >= self.max_time:
        return (None, -np.inf)

    terminal_val = state.terminal_test()
    if terminal_val is not None:
      return (None, terminal_val)

    if depth >= self.max_depth:
      return (None, state.heuristic())

    min_child, min_utility = (None, np.inf)

    for child in state.children():

      _, utility = self.__maximize(child, alpha, beta, depth + 1)

      if utility < min_utility:
        min_child, min_utility = child, utility

      if min_utility <= alpha:
        break

      beta = min(beta, min_utility)

    return min_child, min_utility

  def solve(self, state):

    max_child, _ = self.__maximize(state, -np.inf, np.inf, 0)

    return max_child


In [97]:
# Ejemplo de ejecución en loop de juego

init_state = CeroMataCero()

max_time = 0.5

minimax_solver = MinimaxSolver(max_time=max_time, ts=time.time())

while True:

  ## AI turn (IDS-Minimax)
  best_state = None
  ts = time.time()
  for depth in range(1, 9):
    #print(depth)
    minimax_solver.max_depth = depth
    best_state = minimax_solver.solve(init_state)
    if time.time() - ts >= max_time:
      break
  
  init_state = CeroMataCero(state=best_state.matrix, turn=best_state.turn)
  if init_state.terminal_test() is not None: break

  # Human turn
  print(init_state)
  x, y = map(int, input().split(","))
  init_state.play((x, y))

  if init_state.terminal_test() is not None: break

final_score = init_state.terminal_test()

print(init_state)
print("AI TIES!" if final_score == 0 else "AI WINS!" if final_score > 0 else "AI LOOSES!")

- - -
- X -
- - -
O X -
- X -
- - -
O X O
- X -
- X -
AI WINS!
