In [20]:
import numpy as np
import random
from tqdm import tqdm
import pickle


In [None]:
class TicTacToe:
    def __init__(self):
        self.reset()

    def reset(self):
        self.board = np.zeros((3, 3), dtype=int)
        self.available_positions = [(i, j) for i in range(3) for j in range(3)]
        self.used_numbers = []
        return self.board

    def get_available_numbers(self, player):
        full = [1, 3, 5, 7, 9] if player == 1 else [2, 4, 6, 8]
        return [n for n in full if n not in self.used_numbers]

    def check_win(self, player):
        player_numbers = [1, 3, 5, 7, 9] if player == 1 else [2, 4, 6, 8]
        filtered_board = np.where(np.isin(self.board, player_numbers), self.board, 0)

        lines = [
            filtered_board[0, :], filtered_board[1, :], filtered_board[2, :],
            filtered_board[:, 0], filtered_board[:, 1], filtered_board[:, 2],
            filtered_board.diagonal(), np.fliplr(filtered_board).diagonal()
        ]

        for line in lines:
            if np.sum(line) == 15 and np.count_nonzero(line) == 3:
                return True
        return False

    def step(self, pos, num):
        self.board[pos] = num
        self.used_numbers.append(num)
        self.available_positions.remove(pos)

    def get_state(self):
        return tuple(self.board.flatten())


In [None]:
class QLearningAgent:
    def __init__(self, alpha=0.3, gamma=0.9, epsilon=0.2):
        self.q_values = dict()
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon

    def get_q(self, state, action):
        return self.q_values.get((state, action), 0.0)

    def choose_action(self, env, player):
        state = env.get_state()
        moves = []
        for pos in env.available_positions:
            for num in env.get_available_numbers(player):
                moves.append((pos, num))

        if not moves:
            return None

        if random.random() < self.epsilon:
            return random.choice(moves)

        qs = [self.get_q(state, move) for move in moves]
        return moves[np.argmax(qs)]

    def learn(self, state, action, reward, next_state):
        old_q = self.q_values.get((state, action), 0.0)
        future_qs = [
            self.get_q(next_state, a)
            for a in self.q_values
            if a[0] == next_state
        ]
        max_future_q = max(future_qs, default=0)
        new_q = old_q + self.alpha * (reward + self.gamma * max_future_q - old_q)
        self.q_values[(state, action)] = new_q


In [None]:
agent = QLearningAgent()

for episode in tqdm(range(10000)):
    env = TicTacToe()
    state = env.get_state()
    player = 1

    while True:
        action = agent.choose_action(env, player)
        if action is None:
            break
        pos, num = action
        env.step(pos, num)
        win = env.check_win(player)
        draw = len(env.available_positions) == 0
        next_state = env.get_state()

        if win:
            reward = 1 if player == 1 else -1
            agent.learn(state, action, reward, next_state)
            break
        elif draw:
            reward = 0
            agent.learn(state, action, reward, next_state)
            break
        else:
            reward = 0
            agent.learn(state, action, reward, next_state)

        state = next_state
        player = 2 if player == 1 else 1


100%|██████████| 10000/10000 [02:32<00:00, 65.38it/s]


In [21]:
with open("q_agent.pkl", "wb") as f:
    pickle.dump(agent.q_values, f)


In [22]:
#Check Manually
'''def play_against_agent(agent):
    env = TicTacToe()
    player = 2  # You start
    print("🎮 Welcome to Numerical Tic Tac Toe!")
    print("You are Player 2 (Even Numbers: 2, 4, 6, 8)")
    print("Agent is Player 1 (Odd Numbers: 1, 3, 5, 7, 9)")

    while True:
        print("\nCurrent Board:\n", env.board)

        if player == 1:
            action = agent.choose_action(env, 1)
            if action is None:
                print("No valid moves left for Agent.")
                break
            pos, num = action
            env.step(pos, num)
            print(f"Agent (Player 1) played {num} at {pos}")
        else:
            print(f"\nYour Available Numbers: {env.get_available_numbers(2)}")
            print(f"Available Positions: {env.available_positions}")
            try:
                num = int(input("Enter an even number to play: "))
                if num not in env.get_available_numbers(2):
                    print("❌ Invalid number. Try again.")
                    continue

                pos_input = input("Enter position (row and col e.g., 0 1): ")
                i, j = map(int, pos_input.strip().split())
                pos = (i, j)

                if pos not in env.available_positions:
                    print("❌ Invalid position. Try again.")
                    continue

                env.step(pos, num)

            except Exception:
                print("❌ Invalid input. Please enter valid number and position.")
                continue

        print("\nBoard After Move:\n", env.board)
        win = env.check_win(player)
        draw = len(env.available_positions) == 0

        if win:
            print(f"🎉 {'Agent' if player == 1 else 'You'} (Player {player}) win!")
            break
        if draw:
            print("🤝 It's a draw!")
            break

        player = 2 if player == 1 else 1

# To play, uncomment below:
play_against_agent(agent)'''

'def play_against_agent(agent):\n    env = TicTacToe()\n    player = 2  # You start\n    print("🎮 Welcome to Numerical Tic Tac Toe!")\n    print("You are Player 2 (Even Numbers: 2, 4, 6, 8)")\n    print("Agent is Player 1 (Odd Numbers: 1, 3, 5, 7, 9)")\n\n    while True:\n        print("\nCurrent Board:\n", env.board)\n\n        if player == 1:\n            action = agent.choose_action(env, 1)\n            if action is None:\n                print("No valid moves left for Agent.")\n                break\n            pos, num = action\n            env.step(pos, num)\n            print(f"Agent (Player 1) played {num} at {pos}")\n        else:\n            print(f"\nYour Available Numbers: {env.get_available_numbers(2)}")\n            print(f"Available Positions: {env.available_positions}")\n            try:\n                num = int(input("Enter an even number to play: "))\n                if num not in env.get_available_numbers(2):\n                    print("❌ Invalid number. Try agai

In [24]:
def random_player_action(env, player):
    moves = []
    for pos in env.available_positions:
        for num in env.get_available_numbers(player):
            moves.append((pos, num))
    return random.choice(moves) if moves else None

def evaluate_agent(agent, games=1000):
    agent_wins = 0
    random_wins = 0
    draws = 0

    for _ in range(games):
        env = TicTacToe()
        player = 1
        while True:
            if player == 1:
                action = agent.choose_action(env, player)
            else:
                action = random_player_action(env, player)

            if not action:
                draws += 1
                break

            pos, num = action
            env.step(pos, num)

            if env.check_win(player):
                if player == 1:
                    agent_wins += 1
                else:
                    random_wins += 1
                break

            if len(env.available_positions) == 0:
                draws += 1
                break

            player = 2 if player == 1 else 1

    print(f"\n🧠 Agent Evaluation Over {games} Games:")
    print(f"✅ Agent Wins: {agent_wins}")
    print(f"❌ Random Player Wins: {random_wins}")
    print(f"🤝 Draws: {draws}")


In [25]:
evaluate_agent(agent, games=1000)



🧠 Agent Evaluation Over 1000 Games:
✅ Agent Wins: 139
❌ Random Player Wins: 0
🤝 Draws: 861
