In [2]:
import numpy as np

class TicTacToe:
    def __init__(self):
        self.board = np.zeros((3, 3))
        self.current_player = 1
        self.game_over = False

    def get_valid_moves(self):
        return np.argwhere(self.board == 0)

    def make_move(self, row, col):
        self.board[row, col] = self.current_player
        self.current_player = -self.current_player

    def check_winner(self):
        # Check rows
        for row in range(3):
            if np.all(self.board[row, :] == 1) or np.all(self.board[row, :] == -1):
                return self.board[row, 0]

        # Check columns
        for col in range(3):
            if np.all(self.board[:, col] == 1) or np.all(self.board[:, col] == -1):
                return self.board[0, col]

        # Check diagonals
        if np.all(np.diag(self.board) == 1) or np.all(np.diag(self.board) == -1):
            return self.board[0, 0]

        if np.all(np.diag(np.fliplr(self.board)) == 1) or np.all(np.diag(np.fliplr(self.board)) == -1):
            return self.board[0, 2]

        # No winner
        return 0

    def check_game_over(self):
        if self.check_winner() != 0 or np.all(self.board != 0):
            self.game_over = True


In [19]:
import numpy as np
import random

class QLearningAgent:
    def __init__(self, alpha, gamma, epsilon):
        self.q_table = {}
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon

    def get_q_value(self, state, action):
        state_str = state.tostring()  # Convert numpy array to string
        if state_str not in self.q_table:
            self.q_table[state_str] = {}
        if action not in self.q_table[state_str]:
            self.q_table[state_str][action] = 0
        return self.q_table[state_str][action]

    def update_q_table(self, state, action, reward, next_state):
        state_str = state.tostring()  # Convert numpy array to string
        next_state_str = next_state.tostring()  # Convert numpy array to string
        if state_str not in self.q_table:
            self.q_table[state_str] = {}
        if next_state_str not in self.q_table:
            self.q_table[next_state_str] = {}

        max_q_value = max(self.q_table[next_state_str].values()) if self.q_table[next_state_str] else 0
        new_q_value = (1 - self.alpha) * self.get_q_value(state, action) + self.alpha * (reward + self.gamma * max_q_value)
        self.q_table[state_str][action] = new_q_value

    def get_action(self, state):
        if random.uniform(0, 1) < self.epsilon:
            # Choose a random action
            row = random.randint(0, 2)
            col = random.randint(0, 2)
        else:
            # Choose the best action based on Q-values
            actions = [(i, j) for i in range(3) for j in range(3)]
            q_values = [self.get_q_value(state, action) for action in actions]
            max_q_value = max(q_values)
            best_actions = [action for action, q_value in zip(actions, q_values) if q_value == max_q_value]
            row, col = random.choice(best_actions)

        return row, col


In [20]:
def train_agent(agent, episodes):
    for episode in range(episodes):
        game = TicTacToe()

        while not game.game_over:
            # Get current state and choose action
            state = game.board.copy()
            action = agent.get_action(state)

            # Make the chosen move
            game.make_move(action[0], action[1])

            # Check if the game is over
            game.check_game_over()
            winner = game.check_winner()

            if winner == 1:
                reward = 1  # Agent won
            elif winner == -1:
                reward = -1  # Agent lost
            else:
                reward = 0  # Draw

            # Update Q-table
            next_state = game.board.copy()
            agent.update_q_table(state, tuple(action), reward, next_state)

In [23]:
def play_game(agent):
    game = TicTacToe()

    while not game.game_over:
        # Display the board
        print(game.board)

        if game.current_player == 1:
            # Agent's turn
            state = game.board.copy()
            action = agent.get_action(state)
            game.make_move(action[0], action[1])
            print("Agent's move:")
            print(game.board)
        else:
            # Human's turn
            row = int(input("Enter row (0-2): "))
            col = int(input("Enter column (0-2): "))
            game.make_move(row, col)
            print("Your move:")
            print(game.board)

        game.check_game_over()

    winner = game.check_winner()

    if winner == 1:
        print("Agent wins!")
    elif winner == -1:
        print("You win!")
    else:
        print("It's a draw!")

# Create an instance of the agent
agent = QLearningAgent(alpha=0.5, gamma=0.9, epsilon=0.1)

# Train the agent
train_agent(agent, episodes=10000)

# Play against the trained agent
play_game(agent)


  state_str = state.tostring()  # Convert numpy array to string
  state_str = state.tostring()  # Convert numpy array to string
  next_state_str = next_state.tostring()  # Convert numpy array to string


[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
Agent's move:
[[0. 0. 0.]
 [0. 1. 0.]
 [0. 0. 0.]]
[[0. 0. 0.]
 [0. 1. 0.]
 [0. 0. 0.]]
Enter row (0-2): 0
Enter column (0-2): 0
Your move:
[[-1.  0.  0.]
 [ 0.  1.  0.]
 [ 0.  0.  0.]]
[[-1.  0.  0.]
 [ 0.  1.  0.]
 [ 0.  0.  0.]]
Agent's move:
[[-1.  0.  0.]
 [ 0.  1.  1.]
 [ 0.  0.  0.]]
[[-1.  0.  0.]
 [ 0.  1.  1.]
 [ 0.  0.  0.]]
Enter row (0-2): 0
Enter column (0-2): 1
Your move:
[[-1. -1.  0.]
 [ 0.  1.  1.]
 [ 0.  0.  0.]]
[[-1. -1.  0.]
 [ 0.  1.  1.]
 [ 0.  0.  0.]]
Agent's move:
[[-1. -1.  0.]
 [ 0.  1.  1.]
 [ 0.  1.  0.]]
[[-1. -1.  0.]
 [ 0.  1.  1.]
 [ 0.  1.  0.]]
Enter row (0-2): 0
Enter column (0-2): 2
Your move:
[[-1. -1. -1.]
 [ 0.  1.  1.]
 [ 0.  1.  0.]]
You win!
