<a href="https://colab.research.google.com/github/Sayandeep27/Reinforcement-Learning/blob/main/Tic_Tac_Toe_Agent_using_Q_Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import numpy as np
import random


In [6]:
# Tic-Tac-Toe game class
class TicTacToe:
    def __init__(self):
        self.board = np.zeros((3, 3))  # 3x3 board, 0 represents empty
        self.current_player = 1  # Player 1 starts

    def reset(self):
        self.board = np.zeros((3, 3))
        self.current_player = 1

    def get_state(self):
        return tuple(map(tuple, self.board))

    def is_valid_move(self, move):
        row, col = move
        return self.board[row, col] == 0  # Check if the position is empty

    def make_move(self, move):
        if not self.is_valid_move(move):
            return False  # Invalid move

        row, col = move
        self.board[row, col] = self.current_player
        self.current_player = -self.current_player  # Switch player
        return True  # Valid move

    def is_winner(self, player):
        # Check rows, columns, and diagonals for win
        for i in range(3):
            if all(self.board[i, :] == player) or all(self.board[:, i] == player):
                return True
        if all(np.diag(self.board) == player) or all(np.diag(np.fliplr(self.board)) == player):
            return True
        return False

    def is_draw(self):
        return np.all(self.board != 0) and not self.is_winner(1) and not self.is_winner(-1)

    def game_over(self):
        return self.is_winner(1) or self.is_winner(-1) or self.is_draw()

    def available_moves(self):
        return list(zip(*np.where(self.board == 0)))

In [7]:
# Q-learning agent class
class QLearningAgent:
    def __init__(self, alpha=0.1, gamma=0.9, epsilon=0.1):
        self.alpha = alpha  # Learning rate
        self.gamma = gamma  # Discount factor
        self.epsilon = epsilon  # Exploration-exploitation trade-off
        self.q_table = {}  # Q-table dictionary

    def get_q_value(self, state, action):
        if state not in self.q_table:
            self.q_table[state] = np.zeros((3, 3))
        return self.q_table[state][action]

    def choose_action(self, state, available_moves):
        if np.random.uniform(0, 1) < self.epsilon:
            return random.choice(available_moves)  # Exploration: choose random move
        else:
            q_values = self.q_table.get(state, np.zeros((3, 3)))
            best_moves = [move for move in available_moves if q_values[move] == np.max(q_values[available_moves])]
            return random.choice(best_moves)  # Exploitation: choose move with highest Q-value

    def update_q_table(self, state, action, reward, next_state):
        if state not in self.q_table:
            self.q_table[state] = np.zeros((3, 3))
        if next_state not in self.q_table:
            self.q_table[next_state] = np.zeros((3, 3))
        old_value = self.q_table[state][action]
        next_max = np.max(self.q_table[next_state])
        new_value = (1 - self.alpha) * old_value + self.alpha * (reward + self.gamma * next_max)
        self.q_table[state][action] = new_value

In [8]:
# Function to play Tic-Tac-Toe game
def play_game(agent, human_player_first=False):
    game = TicTacToe()
    current_player = 1 if human_player_first else -1
    agent_player = -current_player

    while not game.game_over():
        if current_player == 1:  # Human player's turn
            print("Current board:")
            print(game.board)
            print("Available moves:", game.available_moves())
            row = int(input("Enter row (0-2): "))
            col = int(input("Enter column (0-2): "))
            move = (row, col)
            while move not in game.available_moves():
                print("Invalid move. Please choose from available moves.")
                row = int(input("Enter row (0-2): "))
                col = int(input("Enter column (0-2): "))
                move = (row, col)
        else:  # Agent's turn
            state = game.get_state()
            available_moves = game.available_moves()
            action = agent.choose_action(state, available_moves)
            move = action

        game.make_move(move)
        next_state = game.get_state()

        if game.is_winner(agent_player):
            print("Agent wins!")
            reward = 1
        elif game.is_winner(-agent_player):
            print("Human wins!")
            reward = -1
        else:
            print("It's a draw!")
            reward = 0

        agent.update_q_table(state, move, reward, next_state)
        current_player = -current_player  # Switch player

    print("Final board:")
    print(game.board)

In [None]:
# Main function
def main():
    agent = QLearningAgent()

    episodes = 10000
    print_every = 1000

    for episode in range(1, episodes + 1):
        agent.epsilon = 0.1 + 0.9 * (episodes - episode) / episodes  # Decreasing epsilon
        play_game(agent)

        if episode % print_every == 0:
            print(f"Episode {episode}/{episodes} completed.")

    print("Training finished.")

    # Test against human player
    while True:
        play_game(agent, human_player_first=True)
        again = input("Play again? (y/n): ")
        if again.lower() != 'y':
            break

if __name__ == "__main__":
    main()

It's a draw!
Current board:
[[0. 0. 0.]
 [0. 0. 0.]
 [1. 0. 0.]]
Available moves: [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (1, 2), (2, 1), (2, 2)]
Enter row (0-2): 1
Enter column (0-2): 2
It's a draw!
It's a draw!
Current board:
[[ 1.  0.  0.]
 [ 0.  0. -1.]
 [ 1.  0.  0.]]
Available moves: [(0, 1), (0, 2), (1, 0), (1, 1), (2, 1), (2, 2)]
Enter row (0-2): 0
Enter column (0-2): 0
Invalid move. Please choose from available moves.
Enter row (0-2): 1
Enter column (0-2): 1
It's a draw!
It's a draw!
Current board:
[[ 1.  0.  1.]
 [ 0. -1. -1.]
 [ 1.  0.  0.]]
Available moves: [(0, 1), (1, 0), (2, 1), (2, 2)]
Enter row (0-2): 2
Enter column (0-2): 1
It's a draw!
Agent wins!
Final board:
[[ 1.  1.  1.]
 [ 0. -1. -1.]
 [ 1. -1.  0.]]
It's a draw!
Current board:
[[0. 0. 0.]
 [0. 0. 1.]
 [0. 0. 0.]]
Available moves: [(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (2, 0), (2, 1), (2, 2)]
