In [None]:
import numpy as np
import pickle
import os

In [None]:
class State:
    def __init__(self, p1, p2):
        self.board = np.zeros((BOARD_ROWS, BOARD_COLS))
        self.p1 = p1
        self.p2 = p2
        self.isEnd = False
        self.boardHash = None
        # init p1 plays first
        self.playerSymbol = 1

    # get unique hash of current board state
    def getHash(self):
        self.boardHash = str(self.board.reshape(BOARD_COLS * BOARD_ROWS))
        return self.boardHash

    def winner(self):
        # row
        for i in range(BOARD_ROWS):
            for j in range(BOARD_COLS):
                if j + NUMBERTOWIN <= BOARD_COLS:
                    if sum(self.board[i, j:j + NUMBERTOWIN]) == NUMBERTOWIN:
                        self.isEnd = True
                        return 1
                    if sum(self.board[i, j:j + NUMBERTOWIN]) == -NUMBERTOWIN:
                        self.isEnd = True
                        return -1

        # col
        for i in range(BOARD_COLS):
            for j in range(BOARD_ROWS):
                if j + NUMBERTOWIN <= BOARD_ROWS:
                    if sum(self.board[j:j + NUMBERTOWIN, i]) == NUMBERTOWIN:
                        self.isEnd = True
                        return 1
                    if sum(self.board[j:j + NUMBERTOWIN, i]) == -NUMBERTOWIN:
                        self.isEnd = True
                        return -1

        # # diagonal
        for r in range(BOARD_ROWS):
            for c in range(BOARD_COLS):
                if r + NUMBERTOWIN - 1 < BOARD_ROWS and c + NUMBERTOWIN - 1 < BOARD_COLS:
                    ldiagRight = []
                    for i in range(NUMBERTOWIN):
                        ldiagRight.append([r + i, c + i])
                    # print(ldiagRight)
                    # Xét điểm
                    diag_sum1 = sum([self.board[dr[0]][dr[1]] for dr in ldiagRight])
                    if diag_sum1 == NUMBERTOWIN:
                        self.isEnd = True
                        return 1
                    if diag_sum1 == -NUMBERTOWIN:
                        self.isEnd = True
                        return -1

        for r in range(BOARD_ROWS):
            for c in range(BOARD_COLS):
                if r + NUMBERTOWIN - 1 < BOARD_ROWS and c - NUMBERTOWIN + 1 >= 0:
                    ldiagLeft = []
                    for i in range(NUMBERTOWIN):
                        ldiagLeft.append([r + i, c - i])
                    # print(ldiagLeft)
                    # Xét điểm
                    diag_sum2 = sum([self.board[dl[0]][dl[1]] for dl in ldiagLeft])
                    if diag_sum2 == NUMBERTOWIN:
                        self.isEnd = True
                        return 1
                    if diag_sum2 == -NUMBERTOWIN:
                        self.isEnd = True
                        return -1

        # tie
        # no available positions
        if len(self.availablePositions()) == 0:
            self.isEnd = True
            return 0
        # not end
        self.isEnd = False
        return None

    def availablePositions(self):
        positions = []
        for i in range(BOARD_ROWS):
            for j in range(BOARD_COLS):
                if self.board[i, j] == 0:
                    positions.append((i, j))  # need to be tuple
        return positions

    def updateState(self, position):
        self.board[position] = self.playerSymbol
        # switch to another player
        self.playerSymbol = -1 if self.playerSymbol == 1 else 1

    # only when game ends
    def giveReward(self):
        result = self.winner()
        # backpropagate reward
        if result == 1:
            self.p1.feedReward(1)
            self.p2.feedReward(0)
        elif result == -1:
            self.p1.feedReward(0)
            self.p2.feedReward(1)
        else:
            self.p1.feedReward(0.1)
            self.p2.feedReward(0.5)

    # board reset
    def reset(self):
        self.board = np.zeros((BOARD_ROWS, BOARD_COLS))
        self.boardHash = None
        self.isEnd = False
        self.playerSymbol = 1

    def play(self, rounds=100):
        for i in range(rounds):
            if i % 1000 == 0:
                print("Rounds {}".format(i))
            while not self.isEnd:
                # Player 1
                positions = self.availablePositions()
                p1_action = self.p1.chooseAction(positions, self.board, self.playerSymbol)
                # take action and update board state
                self.updateState(p1_action)
                board_hash = self.getHash()
                self.p1.addState(board_hash)
                # check board status if it is ended

                win = self.winner()
                if win is not None:
                    # self.showBoard()
                    # ended with p1 either win or draw
                    self.giveReward()
                    self.p1.reset()
                    self.p2.reset()
                    self.reset()
                    break

                else:
                    # Player 2
                    positions = self.availablePositions()
                    p2_action = self.p2.chooseAction(positions, self.board, self.playerSymbol)
                    self.updateState(p2_action)
                    board_hash = self.getHash()
                    self.p2.addState(board_hash)

                    win = self.winner()
                    if win is not None:
                        # self.showBoard()
                        # ended with p2 either win or draw
                        self.giveReward()
                        self.p1.reset()
                        self.p2.reset()
                        self.reset()
                        break

    # play with human (human first)
    def play1(self):
        while not self.isEnd:
            # Player 1
            print()
            print("Your Turn")
            print()
            positions = self.availablePositions()
            p1_action = self.p1.chooseAction(positions)
            print(p1_action)

            self.updateState(p1_action)
            self.showBoard()
            win = self.winner()
            if win is not None:
                if win == 1:
                    print(self.p1.name, "wins!")
                else:
                    print("tie!")
                    self.reset()
                    break

            else:
                # Player 2
                print()
                print("Computer's Turn")
                print()
                positions = self.availablePositions()
                p2_action = self.p2.chooseAction(positions, self.board, self.playerSymbol)
                print(p2_action)

                # take action and update board state
                self.updateState(p2_action)
                self.showBoard()
                # check board status if it is ended
                win = self.winner()
                if win is not None:
                    if win == -1:
                        print(self.p2.name, "wins!")
                    else:
                        print("tie!")
                    self.reset()
                    break

    # play with human
    def play2(self):
        while not self.isEnd:
            # Player 1
            print()
            print("Computer's Turn")
            print()
            positions = self.availablePositions()
            p1_action = self.p1.chooseAction(positions, self.board, self.playerSymbol)
            print(p1_action)

            # take action and update board state
            self.updateState(p1_action)
            self.showBoard()
            # check board status if it is ended
            win = self.winner()
            if win is not None:
                if win == 1:
                    print(self.p1.name, "wins!")
                else:
                    print("tie!")
                self.reset()
                break

            else:

                # Player 2
                print()
                print("Your Turn")
                print()
                positions = self.availablePositions()
                p2_action = self.p2.chooseAction(positions)
                print(p2_action)

                self.updateState(p2_action)
                self.showBoard()
                win = self.winner()
                if win is not None:
                    if win == -1:
                        print(self.p2.name, "wins!")
                    else:
                        print("tie!")
                    self.reset()
                    break

    def showBoard(self):
        # p1: x  p2: o
        for i in range(0, BOARD_ROWS):
            print(('-----' * BOARD_COLS)[:-(BOARD_COLS - 1)])
            out = '| '
            for j in range(0, BOARD_COLS):
                token = ""
                if self.board[i, j] == 1:
                    token = 'x'
                if self.board[i, j] == -1:
                    token = 'o'
                if self.board[i, j] == 0:
                    token = ' '

                if j == BOARD_COLS - 1:
                    out += token + ' |'
                else:
                    out += token + ' | '
            print(out)
        print(('-----' * BOARD_COLS)[:-(BOARD_COLS - 1)])

In [None]:
class Player:
    def __init__(self, name, exp_rate=0.3):
        self.name = name
        self.states = []  # record all positions taken
        self.lr = 0.3
        self.exp_rate = exp_rate
        self.decay_gamma = 0.9
        self.states_value = {}  # state -&gt; value

    @staticmethod
    def getHash(board):
        boardHash = str(board.reshape(BOARD_COLS * BOARD_ROWS))
        return boardHash

    # noinspection PyGlobalUndefined
    def chooseAction(self, positions, current_board, symbol):

        global action
        randValue = np.random.uniform(0, 1)
        value_max = -999
        if randValue > self.exp_rate:

            for p in positions:
                next_board = current_board.copy()
                next_board[p] = symbol
                next_boardHash = self.getHash(next_board)
                value = -999 if self.states_value.get(next_boardHash) is None else self.states_value.get(next_boardHash)
                # print(&#34;value&#34;, value)
                if value >= value_max:
                    value_max = value
                    action = p

        if value_max == -999:
            # take random action
            idx = np.random.choice(len(positions))
            action = positions[idx]

        # print(&#34;{} takes action {}&#34;.format(self.name, action))
        return action

    # append a hash state
    def addState(self, state):
        self.states.append(state)

    # at the end of game, backpropagate and update states value
    def feedReward(self, reward):
        for st in reversed(self.states):
            if self.states_value.get(st) is None:
                self.states_value[st] = 0
            self.states_value[st] += self.lr * (self.decay_gamma * reward - self.states_value[st])
            reward = self.states_value[st]

    def reset(self):
        self.states = []

    def savePolicy(self, n, m, x):
        fw = open('/content/policy_' + str(n) + '_' + str(m) + '_' + str(x), 'wb')
        pickle.dump(self.states_value, fw)
        fw.close()

    def loadPolicy(self, file):
        fr = open(file, 'rb')
        self.states_value = pickle.load(fr)
        fr.close()


class HumanPlayer:
    def __init__(self, name):
        self.name = name

    @staticmethod
    def chooseAction(positions):
        while True:
            row = int(input('Input your action row ( 0 -- > ' + str(BOARD_ROWS - 1) + ' ): '))
            col = int(input('Input your action col ( 0 -- > ' + str(BOARD_COLS - 1) + ' ): '))
            action_move = (row, col)
            if action_move in positions:
                return action_move
            else:
                print()
                print("Invalid Move !!! Please Make Another Move !!!")
                print()

    # append a hash state
    def addState(self, state):
        pass

    # at the end of game, backpropagate and update states value
    def feedReward(self, reward):
        pass

    def reset(self):
        pass

In [None]:
def Tic_Tac_Toe(n, m, x, p):
    if x > m and x > n:
        print()
        print("Invalid x !!! Please Enter Again !!!")
        print()
        return

    global BOARD_ROWS
    global BOARD_COLS
    global NUMBERTOWIN

    BOARD_ROWS = n
    BOARD_COLS = m
    NUMBERTOWIN = x
    first_player = p

    i = 0

    filename = "policy_" + str(BOARD_ROWS) + "_" + str(BOARD_COLS) + "_" + str(x)
    if os.path.exists("/content/" + filename):
        if p == 1:
            i = i + 1
            # play with human (computer)
            p1 = Player("computer", exp_rate=0)
            p1.loadPolicy("/content/policy_" + str(BOARD_ROWS) + "_" + str(BOARD_COLS) + "_" + str(x))
            p2 = HumanPlayer("human")
            st = State(p1, p2)
            if i == 1:
                print()
                print("Game Start !!!")
                print()
            st.play2()

        else:
            # play with human (human)
            i = i + 1
            p2 = Player("computer", exp_rate=0)
            p2.loadPolicy("/content/policy_" + str(BOARD_ROWS) + "_" + str(BOARD_COLS) + "_" + str(x))

            p1 = HumanPlayer("human")
            st = State(p1, p2)
            if i == 1:
                print()
                print("Game Start !!!")
                print()
                st.showBoard()

            st.play1()

    else:
        p1 = Player("p1")
        p2 = Player("p2")
        st = State(p1, p2)
        print("training...")
        st.play(100)
        p1.savePolicy(n, m, x)
        Tic_Tac_Toe(BOARD_ROWS, BOARD_COLS, NUMBERTOWIN, first_player)

In [None]:
if __name__ == "__main__":
    Tic_Tac_Toe(3, 3, 3, 1)


Game Start !!!


Computer's Turn

(2, 1)
-------------
|   |   |   |
-------------
|   |   |   |
-------------
|   | x |   |
-------------

Your Turn

Input your action row ( 0 -- > 2 ): 0
Input your action col ( 0 -- > 2 ): 1
(0, 1)
-------------
|   | o |   |
-------------
|   |   |   |
-------------
|   | x |   |
-------------

Computer's Turn

(2, 2)
-------------
|   | o |   |
-------------
|   |   |   |
-------------
|   | x | x |
-------------

Your Turn

Input your action row ( 0 -- > 2 ): 2
Input your action col ( 0 -- > 2 ): 0
(2, 0)
-------------
|   | o |   |
-------------
|   |   |   |
-------------
| o | x | x |
-------------

Computer's Turn

(1, 0)
-------------
|   | o |   |
-------------
| x |   |   |
-------------
| o | x | x |
-------------

Your Turn

Input your action row ( 0 -- > 2 ): 1
Input your action col ( 0 -- > 2 ): 1
(1, 1)
-------------
|   | o |   |
-------------
| x | o |   |
-------------
| o | x | x |
-------------

Computer's Turn

(0, 2)
----------