In [70]:
import numpy as np
from textwrap import dedent
import time

MARKS = {0: 'X', 1: 'O'}
GRID_NUM = 3 # マス目の数。3x3なら3。4x4にしたいなら4を指定する。

def win_condition():
    condition = []
    grid = np.arange(GRID_NUM*GRID_NUM).reshape(GRID_NUM,GRID_NUM)
    for x in range(GRID_NUM):
        condition.append(grid[x])
        condition.append(grid[:,x])

    condition.append(np.diag(grid))
    condition.append(np.diag(grid[:,::-1]))
    return condition

    
# ボードクラス。盤面に関するロジックを担当するクラス
class Board:
    def __init__(self):
        self.state = np.full(GRID_NUM*GRID_NUM, None)
        self.counter = 0
        self.win_condition = win_condition()


    # 視覚的にわかりやすく表示する
    def render(self):
        text = """
            0|1|2
            3|4|5
            6|7|8
        """

        for idx, x in enumerate(self.state):
            if x is not None:
                text = text.replace(str(idx), MARKS[x])
        print(dedent(text).strip())


    def move(self, idx):
        if self.state[idx] is not None:
            return False

        player = self.counter % 2
        self.state[idx] = player

        self.counter += 1
     
        return True


    def unmove(self, idx):
        self.counter -= 1
        self.state[idx] = None


    def is_win(self, player):
        s = self.state
        if(
            s[0] == s[1] == s[2] == player or
            s[3] == s[4] == s[5] == player or
            s[6] == s[7] == s[8] == player or
            s[0] == s[3] == s[6] == player or
            s[1] == s[4] == s[7] == player or
            s[2] == s[5] == s[8] == player or
            s[0] == s[4] == s[8] == player or
            s[2] == s[4] == s[6] == player
        ):
            return True
        # 毎回ループが入って遅くなるので一旦不採用。4x4とかに拡張できるようにしたいため
        # grid = self.state.reshape(GRID_NUM,GRID_NUM)
        # for idx in range(len(self.win_condition)):
            # if all(board.state[board.win_condition[idx]] == player):
                # return True
        return False


    def is_end(self):
        return False if None in self.state else True


    def valid_moves(self):
        return [idx for idx, s in enumerate(self.state) if s is None]


class AIPlayer:
    def __init__(self, player):
        self.player = player


    def play(self, board):
        score, idx = minimax(board, self.player)
        print("AIプレイヤー", idx)
        moves = board.move(idx)


def minimax(board, player):
    maximize_player = 0
    minimize_player = 1

    if board.is_win(maximize_player):
        return (1, None)
    elif board.is_win(minimize_player):
        return (-1, None)
    elif board.is_end():
        return (0, None)
    
    opp = 1 if player == 0 else 0

    if player == maximize_player:
        max_score = -np.inf
        max_idx = None

        for idx in board.valid_moves():
            board.move(idx)
            score, next_idx = minimax(board, opp)
            if max_score < score:
                max_score = score
                max_idx = idx
            board.unmove(idx)
        return max_score, max_idx

    elif player == minimize_player:
        min_score = np.inf
        min_idx = None

        for idx in board.valid_moves():
            board.move(idx)
            score, next_idx = minimax(board, opp)
            if min_score > score:
                min_score = score
                min_idx = idx
            board.unmove(idx)
        return min_score, min_idx


board = Board()
players = [AIPlayer(0),AIPlayer(1)]
player = 0

while True:
    p = players[player]
    p.play(board)
    board.render()

    if board.is_win(player):
        print(MARKS[player] + 'の勝ち')
        break
    elif board.is_end():
        print('引き分け')
        break
    player = 1 if player == 0 else 0

AIプレイヤー 0
X|1|2
3|4|5
6|7|8
AIプレイヤー 4
X|1|2
3|O|5
6|7|8
AIプレイヤー 1
X|X|2
3|O|5
6|7|8
AIプレイヤー 2
X|X|O
3|O|5
6|7|8
AIプレイヤー 6
X|X|O
3|O|5
X|7|8
AIプレイヤー 3
X|X|O
O|O|5
X|7|8
AIプレイヤー 5
X|X|O
O|O|X
X|7|8
AIプレイヤー 7
X|X|O
O|O|X
X|O|8
AIプレイヤー 8
X|X|O
O|O|X
X|O|X
引き分け


In [21]:
grid = board.state.reshape(GRID_NUM,GRID_NUM)
for idx in range(len(board.win_condition)):
# if all(board.state[board.win_condition[idx]] == player):

In [30]:
all(board.state[board.win_condition] == 0)

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [27]:
board.state

array([0, 0, 0, 1, 1, 0, 0, 1, 0], dtype=object)

In [40]:
np.any(board.state[board.win_condition] == 0)

True

In [37]:
board.state

array([0, 0, 1, 1, 1, 0, 0, 1, 0], dtype=object)

In [36]:
board.state[2] = 1

In [45]:
result = board.state[board.win_condition] == 0

In [46]:
result

array([[ True,  True, False],
       [ True, False,  True],
       [False, False,  True],
       [ True, False, False],
       [ True, False,  True],
       [False,  True,  True],
       [ True, False,  True],
       [False, False,  True]])

In [47]:
result.shape

(8, 3)

In [48]:
board.win_condition

[array([0, 1, 2]),
 array([0, 3, 6]),
 array([3, 4, 5]),
 array([1, 4, 7]),
 array([6, 7, 8]),
 array([2, 5, 8]),
 array([0, 4, 8]),
 array([2, 4, 6])]

In [51]:
grid = board.state[board.win_condition]

In [50]:
player

0

In [60]:
grid[0]

array([0, 0, 1], dtype=object)

In [61]:
np.isclose(grid[0],np.array([0,0,0]))

TypeError: ufunc 'isfinite' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''

In [66]:
board.state[board.win_condition] == 0

array([[ True,  True, False],
       [ True, False,  True],
       [False, False,  True],
       [ True, False, False],
       [ True, False,  True],
       [False,  True,  True],
       [ True, False,  True],
       [False, False,  True]])

In [69]:
board.win_condition[0] == 0

array([ True, False, False])