In [None]:
import random
import numpy as np

# Display the Tic-Tac Toe board
def display_board(board):
 print("\n".join([" | ".join(board [i*3:(i+1)*3]) for i in range(3)]))
 print("-" * 9)

# Check if there's a winner
def check_winner (board, player):
 wins = [(0,1,2), (3,4,5), (6,7,8), (0,3,6), (1,4,7), (2,5,8), (0,4,8), (2,4,6)]
 return any(all(board[i] == player for i in win) for win in wins)

#Q-Learning agent functions
q_table = {} # Stores Q-values for state-action pairs

def get_state(board):
 return "".join(board)

def choose_action(board, epsilon=0.1):
 state = get_state(board)
 available_actions = [i for i, x in enumerate (board) if x == ' ']
 if random.random() < epsilon or state not in q_table:
  return random.choice(available_actions)
 # Exploit: Choose the best action from Q-table
 q_values = {action: q_table[state].get(action, 0) for action in available_actions}
 return max(q_values, key=q_values.get)

def update_q(state, action, reward, next_state, alpha =0.5, gamma=0.9):
 if state not in q_table:
  q_table [state] = {}
 old_q = q_table[state].get(action, 0)
 next_max = max(q_table.get(next_state, {}).values(), default=0)
 q_table[state] [action] = old_q + alpha * (reward + gamma * next_max - old_q)

# Training the AI through reinforcement Learning.
def train_ai (episodes=5000):
 for _ in range(episodes):
  board=[' '] * 9
  while True:
   state = get_state(board)
   action = choose_action(board)
   board [action] = 'X'
   
   if check_winner (board, 'X'):
    update_q(state, action, 1, get_state(board))
    break
   
   if ' ' not in board:
    update_q(state, action, 0.5, get_state(board))
    break
   
   opp_action = random.choice([i for i, x in enumerate (board) if x == ' '])
   board [opp_action] = '0'
   
   if check_winner (board, '0'):
    update_q(state, action, -1, get_state(board))
    break
   
   update_q(state, action, 0, get_state(board))

# Play a game against the trained AI
def play_game():
 board =[' '] * 9
 while True:
  display_board (board)
  ai_action = choose_action(board, epsilon=0) # No exploration in test
  board[ai_action] = 'X'
  print("\nAI moved:")
  display_board (board)
  
  if check_winner (board, 'X'):
   print("AI wins!")
   break
  
  if ' ' not in board:
   print("It's a draw!")
   break
  
  while True:
   try:
    player_action = int(input("\nEnter your move (0-8): "))
    if board [player_action] == ' ':
     board [player_action] = '0'
     break
    else:
     print("Invalid move, try again.")
   except (ValueError, IndexError):
    print("Invalid input, try again.")
  
  if check_winner (board, '0'):
   display_board(board)
   print("You win!")
   break
  
  if ' ' not in board:
   display_board (board)
   print("It's a draw!")
   break

# Main
print("Training AI...")
train_ai()
print("Training complete!\n")
print("Game starts! You are '0', AI is 'X'")
print("Positions (0-8):")
print(np.arange(9).reshape(3,3))

play_game()