In [1]:
import numpy as np
import pickle

In [2]:
Board_Rows = 3
Board_Cols = 3

In [3]:
class State:

    def __init__(self, p1, p2):
        self.board = np.zeros((Board_Rows, Board_Cols))
        self.p1 = p1
        self.p2 = p2
        self.isEnd = False
        self.boardHash = None 
        self.playerSymbol = 1 #makes player 1 play first.

    def getHash(self):
        self.boardHash = str(self.board.reshape(Board_Cols*Board_Rows))
        return self.boardHash

    def winner(self):
        for i in range(Board_Rows): #rows
            if sum(self.board[i,:]) == 3:
                self.isEnd = True
                return 1
            if sum(self.board[i,:]) == -3:
                self.isEnd = True
                return -1
        for i in range(Board_Cols): #cols
            if sum(self.board[:,i]) == 3:
                self.isEnd = True
                return 1
            if sum(self.board[:,i]) == -3:
                self.isEnd = True
                return -1
        #diagonals 
        if np.trace(self.board) == 3 or np.sum(np.fliplr(self.board).diagonal()) == 3:
            self.isEnd = True
            return 1
        if np.trace(self.board) == -3 or np.sum(np.fliplr(self.board).diagonal()) == -3:
            self.isEnd = True
            return -1
        #cheching for the case of a tie or if the game can be continued
        if len(self.available_positions()) == 0:
            self.isEnd = True
            return 0
        else:
            self.isEnd = False
            return None

    def available_positions(self):
        positions = []
        for i in range(Board_Rows):
            for j in range(Board_Cols):
                if self.board[i,j] == 0:
                    positions.append((i,j))
        return positions

    def updateState(self,position):
        self.board[position] = self.playerSymbol
        if self.playerSymbol == 1: #switching player
            self.playerSymbol =-1
        else:
            self.playerSymbol = 1

    def playWithHuman1(self):
        while not self.isEnd:
            positions = self.available_positions()
            p1_action = self.p1.chooseAction(positions, self.board, self.playerSymbol)
            # take action and upate board state
            self.updateState(p1_action)
            self.showBoard()
            # check board status if it is end
            win = self.winner()
            if win is not None:
                if win == 1:
                    print(self.p1.name, "wins!")
                else:
                    print("tie!")
                break

            else:
                # Player 2
                positions = self.available_positions()
                p2_action = self.p2.chooseAction(positions)
                self.updateState(p2_action)
                self.showBoard()
                win = self.winner()
                if win is not None:
                    if win == -1:
                        print(self.p2.name, "wins!")
                    else:
                        print("tie!")
                    break

    def playWithHuman2(self):
        while not self.isEnd:
            self.showBoard()
            positions = self.available_positions()
            p1_action = self.p1.chooseAction(positions)
            # take action and upate board state
            self.updateState(p1_action)
            # check board status if it is end
            win = self.winner()
            if win is not None:
                if win == 1:
                    self.showBoard()
                    print(self.p1.name, "wins!")
                else:
                    self.showBoard()
                    print("tie!")
                break

            else:
                # Player 2
                positions = self.available_positions()
                p2_action = self.p2.chooseAction(positions, self.board, self.playerSymbol)
                self.updateState(p2_action)
                win = self.winner()
                if win is not None:
                    if win == -1:
                        self.showBoard()
                        print(self.p2.name, "wins!")
                    else:
                        self.showBoard()
                        print("tie!")
                    break

    def playHvH(self):
        while not self.isEnd:
            print("Chance of player 1:", self.p1.name)
            self.showBoard()
            positions = self.available_positions()
            p1_action = self.p1.chooseAction(positions)
            # take action and upate board state
            self.updateState(p1_action)
            # check board status if it is end
            win = self.winner()
            if win is not None:
                if win == 1:
                    self.showBoard()
                    print(self.p1.name, "wins!")
                else:
                    self.showBoard()
                    print("tie!")
                break
            else:
                # Player 2
                print("Chance of player 2:", self.p2.name)
                self.showBoard()
                positions = self.available_positions()
                p2_action = self.p2.chooseAction(positions)
                self.updateState(p2_action)
                self.showBoard()
                if win is not None:
                    if win == -1:
                        self.showBoard()
                        print(self.p2.name, "wins!")
                    else:
                        self.showBoard()
                        print("tie!")
                    break

    def showBoard(self):
        for i in range(Board_Rows):
            print('-------------')
            ch = '| '
            for j in range(Board_Cols):
                if self.board[i,j] == 1:
                    token = 'x'
                if self.board[i,j] == -1:
                    token = 'o'
                if self.board[i,j] == 0:
                    token = ' '
                ch += token + ' | '
            print(ch)
        print('-------------')                

In [4]:
class Player:
    
    def __init__(self, name, exp_rate=0.3):
        self.name = name
        self.states = []
        self.lr = 0.2
        self.exp_rate = exp_rate
        self.decay_gamma = 0.9
        self.states_value = {}

    def getHash(self,board):
        BoardHash = str(board.reshape(Board_Cols*Board_Rows))
        return BoardHash

    def chooseAction(self, positions, current_board, symbol):
        if np.random.uniform(0,1) <= self.exp_rate: #take random action
            idx = np.random.choice(len(positions))
            action = positions[idx]
        else:
            value_max = -999
            for p in positions:
                next_board = current_board.copy()
                next_board[p] = symbol
                next_boardHash = self.getHash(next_board)
                # takes the next action accordingly
                value = 0 if self.states_value.get(next_boardHash) is None else self.states_value.get(next_boardHash)
                if value >= value_max:
                    value_max = value
                    action = p
        return action

    def addState(self,state): #append the state
        self.states.append(state)

    def loadPolicy(self, file):
        fr = open(file,'rb')
        self.states_value = pickle.load(fr)
        fr.close()

In [5]:
class HumanPlayer:
    def __init__(self,name):
        self.name = name

    def chooseAction(self, positions):
        while True:
            row = int(input("Input your action row:"))
            col = int(input("Input your action col:"))
            action = (row -1 , col - 1)
            check = 1
            if action in positions:
                check = 0
                return action
            if check == 1:
                print("Not a valid position, Enter again:\n")

    def addState(self,state):
        pass

In [6]:
from IPython.display import clear_output

def clear():
    clear_output(wait=True)

In [None]:
print("Welcome to tic-tac-toe game\n\n->Choose from the following options:")
while True:
    print("   ===========================")
    print("   |        MAIN MENU        |")
    print("   ===========================")
    print("   | 1 | Human vs Computer   |")
    print("   | 2 | Human vs Human      |")
    print("   | 3 | General Instructions|")
    print("   | 4 | Exit                |")
    print("   ===========================\n")
    choice = input("Enter your choice: ")
    
    if choice.isdigit():
        choice = int(choice)
        
        if choice == 1:
            exp_rate = float(input("On the scale of 1-10, What should be the difficulty level (10 is most difficult):"))
            exp_rate = (10 - exp_rate) / 10
            index = int(input("Want to play as player 1 or player 2? (player 1 gets the first chance)\n1 for Player 1 and 2 for player 2:"))
            
            if index == 2:
                p1 = Player("computer", exp_rate)
                p1.loadPolicy("policy_p1")
                name = input("Enter your name:")
                p2 = HumanPlayer(name)
                st = State(p1, p2)
                st.playWithHuman1()
                input("Press enter to return to main menu.")
                clear()
                
            elif index == 1:
                name = input("Enter your name:")
                p1 = HumanPlayer(name)
                p2 = Player("computer", exp_rate)
                p2.loadPolicy("policy_p2")
                st = State(p1,p2)
                st.playWithHuman2()
                input("Press enter to return to main menu.")
                clear()
                
            else:
                print("Invalid input. \nSystem turning off...")
                break
                
        elif choice == 2:
            name1 = input("(Note: Player 1 gets the first chance.)\nEnter the name of Player 1:")
            p1 = HumanPlayer(name1)
            name2 = input("Enter the name of Player 2:")
            p2 = HumanPlayer(name2)
            st = State(p1,p2)
            st.playHvH()
            input("Press enter to return to main menu.")
            clear()
            
        elif choice == 3:
            print("-> You are required to choose the row and column where you wish to place a cross or circle to play the game.\n-> The coordinates of each box are specified in the format (row, column) below. The game will not proceed until a valid box coordinate is provided.\n")
            print("   -------------------------")
            print("   | (1,1) | (1,2) | (1,3) |")
            print("   -------------------------")
            print("   | (2,1) | (2,2) | (2,3) |")
            print("   -------------------------")
            print("   | (3,1) | (3,2) | (3,3) |")
            print("   -------------------------\n")
            input("Press enter to return to main menu.")
            clear()
            
        elif choice == 4:
            clear()
            print("System turning off...")
            break
            
        else:
            print("Invalid input. \nSystem turning off...")
            break
            
    else:
        print("Invalid input. \nSystem turning off...")
        break



Welcome to tic-tac-toe game

->Choose from the following options:
   |        MAIN MENU        |
   | 1 | Human vs Computer   |
   | 2 | Human vs Human      |
   | 3 | General Instructions|
   | 4 | Exit                |

