In [2]:
import numpy as np

# Section 1: Initialization
# Initialize Q-table with zeros
Q = np.zeros([3**9, 9])

# Set learning parameters
lr = .2
y = .95
num_episodes = 20000

In [4]:
# Section 2: Helper Functions
# Function to convert state to index
def state_to_index(state):
    return sum(3**i * cell for i, cell in enumerate(np.array(state).flatten()))

# Function to print the game board
def print_board(state):
    symbols = {0: ' ', 1: 'X', 2: 'O'}
    board = [[symbols[cell] for cell in row] for row in state]
    print('\n'.join([' | '.join(row) for row in board]))

# Function to choose an action
def choose_action(state, Q, epsilon=0.1):
    valid_actions = [i for i in range(9) if state[i // 3, i % 3] == 0]
    if np.random.uniform(0, 1) < epsilon:
        return np.random.choice(valid_actions)  # Random action
    else:
        return valid_actions[np.argmax(Q[state_to_index(state), valid_actions])]

# Function to check the reward
def check_reward(state):
    # Check rows, columns, and diagonals for a win
    for axis in range(2):
        for player in [1, 2]:
            if np.any(np.all(state == player, axis=axis)):
                return 1 if player == 1 else -1
    if (state[0, 0] == state[1, 1] == state[2, 2] != 0) or (state[0, 2] == state[1, 1] == state[2, 0] != 0):
        return 1 if state[1, 1] == 1 else -1
    # Check for a draw
    if not np.any(state == 0):
        return 0
    # Game is not over
    return None

# Function to make a move
def make_move(state, action):
    new_state = state.copy()
    new_state[action % 3, action // 3] = 1 if np.sum(state) <= 4 else 2
    reward = check_reward(new_state)
    return new_state, reward

# Function to reset the game
def reset_game():
    return np.zeros((3, 3), dtype=int)

# Function to check if the game is over
def game_over(state):
    # Check rows, columns, and diagonals for a win
    for axis in range(2):
        for player in [1, 2]:
            if np.any(np.all(state == player, axis=axis)):
                return True
    if (state[0, 0] == state[1, 1] == state[2, 2] != 0) or (state[0, 2] == state[1, 1] == state[2, 0] != 0):
        return True
    # Check for a draw
    if not np.any(state == 0):
        return True
    # Game is not over
    return False

In [5]:
# Section 3: Training the Model
for i in range(num_episodes):
    # Reset the game and get the first state
    s = reset_game()
    for _ in range(9):
        # Choose an action
        a = choose_action(s, Q)
        
        # Perform the action and get the new state and reward
        s1, r = make_move(s, a)
        # Check if the game is over
        if game_over(s1):
            # Use the final reward to update the Q-table
            Q[s,a] = Q[s,a] + lr*(r - Q[s,a])
        elif r is not None:
            # Use the estimated future reward from the Q-table
            Q[s,a] = Q[s,a] + lr*(r + y*np.max(Q[s1,:]) - Q[s,a])
        
        # Update state
        s = s1
        
        # Break if the game is over
        if game_over(s):
            break

In [8]:
# Section 4: Playing the Game
def play_game(Q):
    # Reset the game
    state = reset_game()

    # Game loop
    for _ in range(9):
        # Human player's turn
        print("Your turn!")
        print_board(state)
        while True:
            move = input("Enter your move (row and column separated by a space): ")
            row, col = map(int, move.split())
            if row in [0, 1, 2] and col in [0, 1, 2] and state[row, col] == 0:
                break
            else:
                print("Invalid move. Please try again.")
        state[row, col] = 1

        # Check if the game is over
        if game_over(state):
            print("You won!")
            return

        # AI player's turn
        print("AI's turn!")
        action = choose_action(state, Q, epsilon=0)
        state[action // 3, action % 3] = 2
        print_board(state)

        # Check if the game is over
        if game_over(state):
            print("AI won!")
            return

    print("It's a draw!")
play_game(Q)


Your turn!
  |   |  
  |   |  
  |   |  
AI's turn!
O |   |  
  | X |  
  |   |  
Your turn!
O |   |  
  | X |  
  |   |  
AI's turn!
O | O |  
  | X |  
  |   | X
Your turn!
O | O |  
  | X |  
  |   | X
AI's turn!
O | O | X
O | X |  
  |   | X
Your turn!
O | O | X
O | X |  
  |   | X
You won!
