In [2]:
import numpy as np
import random
class TicTacToe:
    def __init__(self):
        self.board = [[' ' for _ in range(3)] for _ in range(3)]
        self.current_turn = 'X'  # X starts the game

    def print_board(self):
        for row in self.board:
            print('|' + '|'.join(row) + '|')
            print('-------')
    
        
    def toggle_turn(self):
        self.current_turn = 'O' if self.current_turn == 'X' else 'X'

    def make_move(self, row, col):
        if self.board[row][col] == ' ':
            self.board[row][col] = self.current_turn
            self.toggle_turn()
            return True
        else:
            print("This cell is already taken.")
            return False

    def check_win(self):
        # Check rows, columns, and diagonals for a win
        for i in range(3):
            if self.board[i][0] == self.board[i][1] == self.board[i][2] != ' ':
                return True
            if self.board[0][i] == self.board[1][i] == self.board[2][i] != ' ':
                return True
        
        if self.board[0][0] == self.board[1][1] == self.board[2][2] != ' ':
            return True
        if self.board[0][2] == self.board[1][1] == self.board[2][0] != ' ':
            return True

        return False

    def check_draw(self):
        for row in self.board:
            if ' ' in row:
                return False
        return True

    def reset_board(self):
        self.board = [[' ' for _ in range(3)] for _ in range(3)]
        self.current_turn = 'X'


# Tic Tac Toe Q-Learning Agent

## Overview

This repository contains the implementation of a Q-Learning agent designed to learn how to play Tic Tac Toe effectively. By engaging in repeated games against a random opponent, the agent utilizes the Q-Learning reinforcement learning algorithm to optimize its strategy over time.

## Purpose

The purpose of this project is to demonstrate the application of Q-Learning, a fundamental reinforcement learning technique, in a simple yet challenging environment. Tic Tac Toe serves as an ideal platform for understanding the dynamics of Q-Learning due to its straightforward rules and finite state space.

## How It Works

- **Q-Learning Agent**: Learns optimal moves by exploring the state space and adjusting its strategy based on the outcome of each game.
- **Training**: The agent improves through self-play, adjusting its Q-values (expected utility of actions) based on rewards received for winning, losing, or drawing games.

## Key Features

- **Flexible Training**: Easily adjust the number of episodes for training to observe how the agent's performance evolves.
- **State Management**: The agent learns to recognize different states of the Tic Tac Toe board and chooses actions accordingly.
- **Exploration vs. Exploitation**: Balances between exploring new moves and exploiting known strategies to enhance learning.


In [39]:
import random

class QLearningAgent:
    def __init__(self):
        self.q_table = {}
        self.learning_rate = 0.1
        self.discount_factor = 0.95
        self.exploration_rate = 1.0
        self.exploration_decay = 0.99
        self.min_exploration_rate = 0.01

    def state_to_key(self, state):
        # Simplify state representation
        return ''.join([''.join(row) for row in state])

    def choose_action(self, state, available_actions):
        if random.uniform(0, 1) < self.exploration_rate:
            return random.choice(available_actions)
        else:
            state_key = self.state_to_key(state)
            values = [self.q_table.get((state_key, action), -float('inf')) for action in available_actions]
            max_value = max(values)
            return available_actions[values.index(max_value)]

    def update_q_value(self, state, action, reward, next_state, done):
        state_key = self.state_to_key(state)
        next_state_key = self.state_to_key(next_state)
        old_value = self.q_table.get((state_key, action), 0)
        future_rewards = 0 if done else max(self.q_table.get((next_state_key, a), 0) for a in self.get_possible_actions(next_state))
        new_value = old_value + self.learning_rate * (reward + self.discount_factor * future_rewards - old_value)
        self.q_table[(state_key, action)] = new_value

    def update_exploration_rate(self):
        self.exploration_rate *= self.exploration_decay
        self.exploration_rate = max(self.min_exploration_rate, self.exploration_rate)

    def get_possible_actions(self, state):
        return [(i, j) for i in range(3) for j in range(3) if state[i][j] == ' ']


def train_two_agents(episodes=100000):
    agent1 = QLearningAgent()
    agent2 = QLearningAgent()
    for episode in range(episodes):
        game = TicTacToe()
        done = False
        turn = 0  # Alternating turns: 0 for agent1, 1 for agent2
        
        while not done:
            current_agent = agent1 if turn == 0 else agent2
            state = game.board.copy()
            available_actions = current_agent.get_possible_actions(state)
            
            action = current_agent.choose_action(state, available_actions)
            game.make_move(*action)
            
            # Adjust rewards to give more nuanced feedback
            if game.check_win():
                reward = 1 if turn == 0 else -1  # Win or lose depending on the agent
                done = True
            elif game.check_draw():
                reward = 0.5  # Encourage draws over losses
                done = True
            else:
                reward = -0.01  # Minor penalty to encourage shorter games

            next_state = game.board.copy()
            current_agent.update_q_value(state, action, reward, next_state, done)

            # Toggle turn
            turn = 1 - turn
            
            # Update exploration rates more gradually
            if episode % 1000 == 0:
                agent1.update_exploration_rate()
                agent2.update_exploration_rate()

    return agent1  # Return one of the trained agents

# Now you can train your agents
trained_agent = train_two_agents()

In [40]:
display(trained_agent.q_table)

{('     X   ', (1, 2)): -0.009999999999999992,
 ('  XO X   ', (0, 2)): -0.0056953279,
 ('X XO X O ', (0, 0)): -0.001,
 ('X XOXXOO ', (1, 1)): -0.001,
 ('XOXOXXOOX', (2, 2)): 0.9999999999999996,
 (' X       ', (0, 1)): -0.009999999999999992,
 (' X O  X  ', (2, 0)): -0.007712320754503899,
 ('OX O XX  ', (1, 2)): -0.003439,
 ('OX OOXX X', (2, 2)): -0.003439,
 ('OXXOOXXOX', (0, 2)): 0.6513215599000001,
 ('X        ', (0, 0)): -0.009999999999999992,
 ('X XO     ', (0, 2)): -0.009880274848174381,
 ('X XOO X  ', (2, 0)): -0.0019000000000000002,
 ('X O   X  ', (2, 0)): -0.009948462247926799,
 ('X O  XX O', (1, 2)): -0.001,
 ('X O OXXXO', (2, 1)): -0.0019000000000000002,
 ('XOOXOXXXO', (1, 0)): 0.6125795110000001,
 ('   X     ', (1, 0)): -0.009999999999999992,
 ('  XX    O', (0, 2)): -0.0040951,
 ('XOXX    O', (0, 0)): -0.00271,
 ('XOXXOX  O', (1, 2)): -0.009999999999999992,
 ('XOXXOXOXO', (2, 1)): 0.4999999999999998,
 ('      X  ', (2, 0)): -0.009999999999999992,
 ('   O  X X', (2, 2)): -0.005

# Two-Player Tic Tac Toe Game in the Terminal

This Python implementation of Tic Tac Toe allows two human players to play the classic game directly in the terminal. The game is played on a 3x3 grid, where players take turns to mark a space in an attempt to place three of their marks in a horizontal, vertical, or diagonal row.

## How to Play

- The game starts with an empty 3x3 grid.
- Player 1 uses the mark 'X', and Player 2 uses the mark 'O'.
- Players take turns entering their moves by specifying the row and column where they wish to place their mark. The top-left corner is (0, 0), and the bottom-right corner is (2, 2).
- The game checks after each move to see if there is a winner or if the game is a draw (i.e., the board is full and no player has won).
- If a player wins or the game is a draw, the game will end, announcing the outcome.
- Players can play multiple rounds by restarting the game after it concludes.


In [None]:
import numpy as np
from IPython.display import clear_output

def extract_row_column(input_str):
    # Removing the parentheses and spaces
    clean_str = input_str.strip("() ")
    # Splitting the string by comma
    parts = clean_str.split(",")
    # Converting the parts to integers
    row = int(parts[0].strip())
    column = int(parts[1].strip())
    return row, column

def is_valid_position(row, col):
    # Check if the row and column are within the valid range
    return 0 <= row <= 2 and 0 <= col <= 2

# Assuming you have a working TicTacToe class
game1 = TicTacToe()

while not game1.check_win() and not game1.check_draw():
    valid_move = False
    while not valid_move:
        player1 = input("Player 1's turn (X): (Row, Column) ")
        row_1, column_1 = extract_row_column(player1)
        if not is_valid_position(row_1, column_1):
            print("Invalid position. Please try again with a row and column between 0 and 2.")
            continue  # This skips the rest of the loop iteration and prompts for input again
        valid_move = game1.make_move(row_1, column_1)
        if valid_move:
            clear_output(wait=True)
            game1.print_board()
    
    if game1.check_win():
        print("Player 1 (X) wins!")
        game1.reset_board()
        break
    elif game1.check_draw():
        print("It's a draw!")
        game1.reset_board()
        break

    valid_move = False
    while not valid_move:
        player2 = input("Player 2's turn (O): (Row, Column) ")
        row_2, column_2 = extract_row_column(player2)
        if not is_valid_position(row_2, column_2):
            print("Invalid position. Please try again with a row and column between 0 and 2.")
            continue  # Same as above, repeat the loop for valid input
        valid_move = game1.make_move(row_2, column_2)
        if valid_move:
            clear_output(wait=True)
            game1.print_board()
    
    if game1.check_win():
        print("Player 2 (O) wins!")
        game1.reset_board()
        break
    elif game1.check_draw():
        print("It's a draw!")
        game1.reset_board()
        break

## Playing Tic Tac Toe Against a Q-Learning Agent

This script enables you to play Tic Tac Toe in your terminal against a computer-controlled opponent trained via Q-Learning. The game is designed for a single player (you) to challenge the computer in a classic match of Tic Tac Toe, where strategic moves determine the winner.

### How to Play:

- **Start the Game**: Run the script to initiate the game. The game board is displayed in the terminal.
- **Make Your Move**: You play as 'X'. When prompted, input your move by specifying the row and column where you wish to place your 'X', using a zero-based index. For example, entering `0, 0` places an 'X' in the top-left corner of the board.
- **Watch the Agent's Move**: After your move, the agent (playing as 'O') will make its move, automatically chosen based on its Q-Learning trained strategy.
- **Game Progress**: The board is updated after each move, showing the current state of play. This continues until one player wins by aligning three of their symbols vertically, horizontally, or diagonally, or until the board is full and the game is declared a draw.
- **Game End**: The game concludes with a message announcing the outcome: a win for you, a win for the computer, or a draw.

In [15]:
import numpy as np
from IPython.display import clear_output

def extract_row_column(input_str):
    # Removing the parentheses and spaces
    clean_str = input_str.strip("() ")
    # Splitting the string by comma
    parts = clean_str.split(",")
    # Converting the parts to integers
    row = int(parts[0].strip())
    column = int(parts[1].strip())
    return row, column

def is_valid_position(row, col):
    # Check if the row and column are within the valid range
    return 0 <= row <= 2 and 0 <= col <= 2

def play_against_agent(agent, game):
    while not game.check_win() and not game.check_draw():
        # Human's turn (Player 1)
        valid_move = False
        while not valid_move:
            player_input = input("Your turn (X): (Row, Column) ")
            row, column = extract_row_column(player_input)
            if not is_valid_position(row, column):
                print("Invalid position. Please try again with a row and column between 0 and 2.")
                continue
            valid_move = game.make_move(row, column)
            if valid_move:
                clear_output()
                game.print_board()

        if game.check_win():
            print("Congratulations! You've won!")
            break
        elif game.check_draw():
            print("It's a draw!")
            break

        # Agent's turn (Player 2)
        if not game.check_win() and not game.check_draw():
            state = game.board
            available_actions = agent.get_possible_actions(state)
            action = agent.choose_action(state, available_actions)
            game.make_move(*action)
            clear_output()  # Ensure this is before printing the agent's move
            print("Agent's move (O):")
            game.print_board()

        if game.check_win():
            print("The agent has won. Try again!")
            break
        elif game.check_draw():
            print("It's a draw!")
            break
# Initialize your game and the agent
game = TicTacToe()
# Assuming `trained_agent` is your trained QLearningAgent instance from before
play_against_agent(trained_agent, game)

|X|O|X|
-------
|O| |X|
-------
|O| |X|
-------
Congratulations! You've won!


# 2 player Tic Tac Toe Game with GUI

This Python program implements a classic Tic Tac Toe game with a graphical user interface (GUI) using Tkinter. It allows two players to play the game in a more interactive way compared to terminal-based versions.

## Features

- **Graphical User Interface**: Utilizes Tkinter for a simple and user-friendly interface.
- **Two-Player Game**: Designed for two human players to take turns making moves.
- **Win and Draw Detection**: Automatically detects and announces when a player wins or the game ends in a draw.
- **Game Reset**: Allows players to reset the game and start a new round immediately after a game concludes.

## How to Play

1. Run the program to open the game window.
2. Players take turns clicking on the grid to place their mark (Player 1 is 'X', and Player 2 is 'O').
3. The first player to align three of their marks vertically, horizontally, or diagonally wins the game.
4. If the grid is filled and no player has aligned three marks, the game is a draw.
5. Click the "Reset" button to start a new game at any time.

In [23]:
import numpy as np
import tkinter as tk
from tkinter import messagebox

class GameEndedException(Exception):
    pass

class TicTacToeGUI:
    def __init__(self, master):
        self.master = master
        self.master.title('Tic Tac Toe')
        self.game = TicTacToe()
        self.game_active = True  # Track if the game is active
        self.initialize_ui()

    def initialize_ui(self):
        self.buttons = [[None for _ in range(3)] for _ in range(3)]
        for row in range(3):
            for col in range(3):
                self.buttons[row][col] = tk.Button(self.master, text=' ', font=('normal', 40), height=2, width=5,
                                                   command=lambda r=row, c=col: self.on_button_click(r, c))
                self.buttons[row][col].grid(row=row, column=col)
        self.reset_button = tk.Button(self.master, text='Reset', command=self.reset_board)
        self.reset_button.grid(row=3, column=0, columnspan=3)

    def on_button_click(self, row, col):
        if self.game_active and self.board[row][col] == ' ':
            self.game.make_move(row, col)
            self.buttons[row][col]['text'] = self.board[row][col]
            if self.game.check_win():
                messagebox.showinfo("Game Over", f"{self.game.current_turn} wins!")
                self.game_active = False  # Game is no longer active
                self.disable_all_buttons()
            elif self.game.check_draw():
                messagebox.showinfo("Game Over", "It's a draw!")
                self.game_active = False  # Game is no longer active
                self.disable_all_buttons()
            self.game.toggle_turn()

    def reset_board(self):
        self.game.reset_board()
        self.game_active = True  # Reset the game status to active
        for row in range(3):
            for col in range(3):
                self.buttons[row][col]['text'] = ' '
                self.buttons[row][col]['state'] = tk.NORMAL  # Re-enable the button

    def disable_all_buttons(self):
        for row in range(3):
            for col in range(3):
                self.buttons[row][col]['state'] = tk.DISABLED

def main():
    root = tk.Tk()
    gui = TicTacToeGUI(root)
    root.mainloop()

if __name__ == "__main__":
    main()

Exception in Tkinter callback
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/tkinter/__init__.py", line 1892, in __call__
    return self.func(*args)
  File "/var/folders/b6/5z9z5ywd37z412b7t74r_84c0000gn/T/ipykernel_2831/2677095996.py", line 21, in <lambda>
    command=lambda r=row, c=col: self.on_button_click(r, c))
  File "/var/folders/b6/5z9z5ywd37z412b7t74r_84c0000gn/T/ipykernel_2831/2677095996.py", line 27, in on_button_click
    if self.game_active and self.board[row][col] == ' ':
AttributeError: 'TicTacToeGUI' object has no attribute 'board'
Exception in Tkinter callback
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/tkinter/__init__.py", line 1892, in __call__
    return self.func(*args)
  File "/var/folders/b6/5z9z5ywd37z412b7t74r_84c0000gn/T/ipykernel_2831/2677095996.py", line 21, in <lambda>
    command=lambda r=row, c=col: self.on_button_click(r, c))
  Fi

## Playing Tic Tac Toe Against a Q-Learning Agent with GUI 

In [41]:
import numpy as np
import tkinter as tk
from tkinter import messagebox

class TicTacToeGUI:
    def __init__(self, master, agent):
        self.master = master
        self.master.title('Tic Tac Toe')
        self.game = TicTacToe()
        self.agent = agent  # The trained QLearningAgent
        self.game_active = True  # Track if the game is active
        self.human_player = 'X'  # Human is 'X'
        self.agent_player = 'O'  # Agent is 'O'
        self.initialize_ui()

    def initialize_ui(self):
        self.buttons = [[None for _ in range(3)] for _ in range(3)]
        for row in range(3):
            for col in range(3):
                self.buttons[row][col] = tk.Button(self.master, text=' ', font=('normal', 40), height=2, width=5,
                                                   command=lambda r=row, c=col: self.on_button_click(r, c))
                self.buttons[row][col].grid(row=row, column=col)
        self.reset_button = tk.Button(self.master, text='Reset', command=self.reset_board)
        self.reset_button.grid(row=3, column=0, columnspan=3)

    def on_button_click(self, row, col):
        if self.game_active and self.game.board[row][col] == ' ' and self.game.current_turn == self.human_player:
            self.game.make_move(row, col)
            self.buttons[row][col]['text'] = self.game.board[row][col]
            if self.game.check_win():
                messagebox.showinfo("Game Over", "You win!")
                self.game_active = False
            elif self.game.check_draw():
                messagebox.showinfo("Game Over", "It's a draw!")
                self.game_active = False
            else:
                self.agent_move()  # Let the agent make its move

    def agent_move(self):
        self.master.after(500, self.make_agent_move)  # Schedule the agent's move after 1000 milliseconds (1 second)

    def make_agent_move(self):
        # Implement the logic for the agent to choose a move based on the current state
        available_actions = self.agent.get_possible_actions(self.game.board)
        action = self.agent.choose_action(self.game.board, available_actions)
        self.game.make_move(*action)
        self.buttons[action[0]][action[1]]['text'] = self.game.board[action[0]][action[1]]
        
        if self.game.check_win():
            messagebox.showinfo("Game Over", "The agent wins!")
            self.game_active = False
        elif self.game.check_draw():
            messagebox.showinfo("Game Over", "It's a draw!")
            self.game_active = False
        # Check if it's the agent's turn again (e.g., in cases where the game wasn't ended by the agent's move)
        if self.game_active and self.game.current_turn == self.agent_player:
            self.agent_move()

    def reset_board(self):
        self.game.reset_board()
        self.game_active = True  # Reset the game status to active
        for row in range(3):
            for col in range(3):
                self.buttons[row][col]['text'] = ' '
                self.buttons[row][col]['state'] = tk.NORMAL  # Re-enable the button

def main():
    root = tk.Tk()
    agent = trained_agent  # Assuming this is your trained agent
    gui = TicTacToeGUI(root, agent)
    root.mainloop()

if __name__ == "__main__":
    main()


