<a href="https://colab.research.google.com/github/HSGanesh/1CD22AI023/blob/main/TicTacToe/code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import pickle

BOARD_ROWS = 3
BOARD_COLS = 3


# =========================
# Game State
# =========================
class State:
    def __init__(self, p1, p2):
        self.board = np.zeros((BOARD_ROWS, BOARD_COLS))
        self.p1 = p1
        self.p2 = p2
        self.isEnd = False
        self.boardHash = None
        self.playerSymbol = 1  # Player 1 starts

    def getHash(self):
        self.boardHash = str(self.board.reshape(BOARD_ROWS * BOARD_COLS))
        return self.boardHash

    def availablePositions(self):
        return [(i, j) for i in range(BOARD_ROWS)
                for j in range(BOARD_COLS) if self.board[i, j] == 0]

    def updateState(self, position):
        self.board[position] = self.playerSymbol
        self.playerSymbol = -1 if self.playerSymbol == 1 else 1

    def winner(self):
        # rows and columns
        for i in range(BOARD_ROWS):
            if abs(sum(self.board[i, :])) == 3:
                self.isEnd = True
                return int(np.sign(sum(self.board[i, :])))
            if abs(sum(self.board[:, i])) == 3:
                self.isEnd = True
                return int(np.sign(sum(self.board[:, i])))

        # diagonals
        diag1 = sum(self.board[i, i] for i in range(BOARD_COLS))
        diag2 = sum(self.board[i, BOARD_COLS - i - 1] for i in range(BOARD_COLS))
        if abs(diag1) == 3 or abs(diag2) == 3:
            self.isEnd = True
            return int(np.sign(diag1 if abs(diag1) == 3 else diag2))

        # draw
        if not self.availablePositions():
            self.isEnd = True
            return 0

        self.isEnd = False
        return None

    def giveReward(self):
        result = self.winner()
        if result == 1:
            self.p1.feedReward(1)
            self.p2.feedReward(0)
        elif result == -1:
            self.p1.feedReward(0)
            self.p2.feedReward(1)
        else:
            self.p1.feedReward(0.3)
            self.p2.feedReward(0.3)

    def reset(self):
        self.board = np.zeros((BOARD_ROWS, BOARD_COLS))
        self.boardHash = None
        self.isEnd = False
        self.playerSymbol = 1

    def play(self, rounds=10000):
        for _ in range(rounds):
            while not self.isEnd:
                # Player 1
                positions = self.availablePositions()
                action = self.p1.chooseAction(positions, self.board, self.playerSymbol)
                self.updateState(action)
                self.p1.addState(self.getHash())

                win = self.winner()
                if win is not None:
                    self.giveReward()
                    self.p1.reset()
                    self.p2.reset()
                    self.reset()
                    break

                # Player 2
                positions = self.availablePositions()
                action = self.p2.chooseAction(positions, self.board, self.playerSymbol)
                self.updateState(action)
                self.p2.addState(self.getHash())

                win = self.winner()
                if win is not None:
                    self.giveReward()
                    self.p1.reset()
                    self.p2.reset()
                    self.reset()
                    break

    def showBoard(self):
        print("\n-------------")
        for i in range(BOARD_ROWS):
            print("| ", end="")
            for j in range(BOARD_COLS):
                if self.board[i, j] == 1:
                    print("X", end=" | ")
                elif self.board[i, j] == -1:
                    print("O", end=" | ")
                else:
                    print(" ", end=" | ")
            print("\n-------------")


# =========================
# AI Player
# =========================
class Player:
    def __init__(self, name, exp_rate=0.3):
        self.name = name
        self.states = []
        self.lr = 0.2
        self.exp_rate = exp_rate
        self.decay_gamma = 0.9
        self.states_value = {}

    def getHash(self, board):
        return str(board.reshape(BOARD_ROWS * BOARD_COLS))

    def chooseAction(self, positions, board, symbol):
        if np.random.rand() <= self.exp_rate:
            return positions[np.random.choice(len(positions))]

        value_max = -999
        for p in positions:
            next_board = board.copy()
            next_board[p] = symbol
            value = self.states_value.get(self.getHash(next_board), 0)
            if value >= value_max:
                value_max = value
                action = p
        return action

    def addState(self, state):
        self.states.append(state)

    def feedReward(self, reward):
        for st in reversed(self.states):
            self.states_value[st] = self.states_value.get(st, 0)
            self.states_value[st] += self.lr * (self.decay_gamma * reward - self.states_value[st])
            reward = self.states_value[st]

    def reset(self):
        self.states = []

    def savePolicy(self):
        with open("policy_" + self.name, "wb") as fw:
            pickle.dump(self.states_value, fw)

    def loadPolicy(self, file):
        with open(file, "rb") as fr:
            self.states_value = pickle.load(fr)


# =========================
# Human Player (FIXED)
# =========================
class HumanPlayer:
    def __init__(self, name):
        self.name = name

    def chooseAction(self, positions, board=None, symbol=None):
        while True:
            try:
                row = int(input("Enter row (0-2): "))
                col = int(input("Enter col (0-2): "))
                if (row, col) in positions:
                    return (row, col)
                else:
                    print("Invalid move. Try again.")
            except ValueError:
                print("Please enter valid integers.")

    def addState(self, state):
        pass

    def feedReward(self, reward):
        pass

    def reset(self):
        pass


# =========================
# Main Execution
# =========================
if __name__ == "__main__":
    # Training phase
    p1 = Player("AI1")
    p2 = Player("AI2")

    state = State(p1, p2)
    print("Training AI agents...")
    state.play(50000)

    p1.savePolicy()

    # Play with human
    p1 = Player("Computer", exp_rate=0)
    p1.loadPolicy("policy_AI1")
    p2 = HumanPlayer("Human")

    state = State(p1, p2)

    while True:
        state.play(rounds=1)
        state.showBoard()
        choice = input("Play again? (y/n): ")
        if choice.lower() != 'y':
            break


Training AI agents...
Enter row (0-2): 2
Enter col (0-2): 2
Enter row (0-2): 1
Enter col (0-2): 1
Invalid move. Try again.
Enter row (0-2): 0
Enter col (0-2): 1

-------------
|   |   |   | 
-------------
|   |   |   | 
-------------
|   |   |   | 
-------------
Play again? (y/n): n
