In [1]:
import random
import numpy as np

In [2]:
# --- Display the Tic-Tac-Toe board ---
def display_board(board):
    print("\n")
    for i in range(3):
        print(" | ".join(board[i*3:(i+1)*3]))
        if i < 2:
            print("--+---+--")
    print()

In [3]:
# --- Check if thereâ€™s a winner ---
def check_winner(board, player):
    wins = [(0,1,2), (3,4,5), (6,7,8),
            (0,3,6), (1,4,7), (2,5,8),
            (0,4,8), (2,4,6)]
    return any(all(board[i] == player for i in win) for win in wins)


In [4]:
# --- Q-learning setup ---
q_table = {}  # Stores Q-values for state-action pairs

def get_state(board):
    return "".join(board)

In [5]:
def choose_action(board, epsilon=0.1):
    state = get_state(board)
    # Explore or Exploit
    if random.random() < epsilon or state not in q_table:
        # Choose random empty cell
        return random.choice([i for i, x in enumerate(board) if x == ' '])
    # Choose best known action
    q_values = q_table[state]
    return max([i for i, x in enumerate(board) if x == ' '], key=lambda x: q_values.get(x, 0))

In [6]:
def update_q(state, action, reward, next_state, alpha=0.5, gamma=0.9):
    if state not in q_table:
        q_table[state] = {}
    old_q = q_table[state].get(action, 0)
    next_max = max(q_table.get(next_state, {}).values(), default=0)
    q_table[state][action] = old_q + alpha * (reward + gamma * next_max - old_q)

In [7]:
# --- Train the AI using reinforcement learning ---
def train_ai(episodes=5000):
    for _ in range(episodes):
        board = [' '] * 9
        while True:
            state = get_state(board)
            action = choose_action(board)
            board[action] = 'X'

            # If AI wins
            if check_winner(board, 'X'):
                update_q(state, action, 1, get_state(board))
                break

            # If draw
            if ' ' not in board:
                update_q(state, action, 0.5, get_state(board))
                break

            # Opponent move (random)
            opp_action = random.choice([i for i, x in enumerate(board) if x == ' '])
            board[opp_action] = 'O'

            # If opponent wins
            if check_winner(board, 'O'):
                update_q(state, action, -1, get_state(board))
                break

            # Update Q with next state
            update_q(state, action, 0, get_state(board))


In [8]:
# --- Play a game against the trained AI ---
def play_game():
    board = [' '] * 9
    print("Game start! You are 'O'. AI is 'X'. Positions (0-8):")
    print(np.arange(9).reshape(3, 3))

    while True:
        # AI Move
        ai_action = choose_action(board, epsilon=0)  # No exploration during play
        board[ai_action] = 'X'
        display_board(board)

        if check_winner(board, 'X'):
            print("AI wins!")
            break
        if ' ' not in board:
            print("It's a draw!")
            break

        # Player Move
        while True:
            try:
                player_action = int(input("Enter your move (0-8): "))
                if board[player_action] == ' ':
                    board[player_action] = 'O'
                    break
                else:
                    print("Invalid move, try again.")
            except ValueError:
                print("Please enter a number between 0 and 8.")

        if check_winner(board, 'O'):
            display_board(board)
            print("You win!")
            break
        if ' ' not in board:
            print("It's a draw!")
            break


In [9]:
# --- Main ---
print("Training AI...")
train_ai()
print("Training complete!\n")
print("Game starts! You are 'O', AI is 'X'")
play_game()


Training AI...
Training complete!

Game starts! You are 'O', AI is 'X'
Game start! You are 'O'. AI is 'X'. Positions (0-8):
[[0 1 2]
 [3 4 5]
 [6 7 8]]


X |   |  
--+---+--
  |   |  
--+---+--
  |   |  



Enter your move (0-8):  3




X |   | X
--+---+--
O |   |  
--+---+--
  |   |  



Enter your move (0-8):  


Please enter a number between 0 and 8.


Enter your move (0-8):  1




X | O | X
--+---+--
O | X |  
--+---+--
  |   |  



Enter your move (0-8):  8




X | O | X
--+---+--
O | X |  
--+---+--
X |   | O

AI wins!
