In [1]:
class TicTacToe:
    def __init__(self):
        self.board = [' ' for _ in range(9)]  # Use a single list to represent 3x3 board
        self.current_winner = None  # Keep track of winner

    def print_board(self):
        for row in [self.board[i*3:(i+1)*3] for i in range(3)]:
            print('| ' + ' | '.join(row) + ' |')

    def available_moves(self):
        return [i for i, spot in enumerate(self.board) if spot == ' ']

    def empty_squares(self):
        return ' ' in self.board

    def num_empty_squares(self):
        return self.board.count(' ')

    def make_move(self, square, letter):
        if self.board[square] == ' ':
            self.board[square] = letter
            if self.winner(square, letter):
                self.current_winner = letter
            return True
        return False

    def winner(self, square, letter):
        # Check row, column, and diagonal
        row_ind = square // 3
        row = self.board[row_ind*3 : (row_ind + 1) * 3]
        if all([s == letter for s in row]):
            return True

        col_ind = square % 3
        column = [self.board[col_ind+i*3] for i in range(3)]
        if all([s == letter for s in column]):
            return True

        if square % 2 == 0:  # Check diagonals
            diagonal1 = [self.board[i] for i in [0, 4, 8]]
            if all([s == letter for s in diagonal1]):
                return True
            diagonal2 = [self.board[i] for i in [2, 4, 6]]
            if all([s == letter for s in diagonal2]):
                return True

        return False

In [2]:
import numpy as np
import random

class QAgent:
    def __init__(self, learning_rate=0.1, discount_factor=0.9, epsilon=0.1):
        self.q_table = {}  # Q(s,a) table
        self.lr = learning_rate
        self.gamma = discount_factor
        self.epsilon = epsilon

    def get_state_key(self, board):
        return ''.join(board)

    def choose_action(self, state, available_moves):
        if random.uniform(0, 1) < self.epsilon:
            return random.choice(available_moves)  # Explore
        else:
            q_values = [self.get_q_value(state, move) for move in available_moves]
            max_q = max(q_values)
            if q_values.count(max_q) > 1:
                best_moves = [i for i in range(len(available_moves)) if q_values[i] == max_q]
                i = random.choice(best_moves)
            else:
                i = q_values.index(max_q)
            return available_moves[i]

    def get_q_value(self, state, action):
        return self.q_table.get((state, action), 0.0)

    def learn(self, state, action, reward, next_state):
        current_q = self.get_q_value(state, action)
        max_next_q = max([self.get_q_value(next_state, a) for a in TicTacToe().available_moves()]) if TicTacToe().available_moves() else 0
        new_q = current_q + self.lr * (reward + self.gamma * max_next_q - current_q)
        self.q_table[(state, action)] = new_q

In [3]:
def play_game(p1, p2, env, draw=False):
    current_player = None
    while env.empty_squares():
        current_player = p1 if current_player != p1 else p2
        state_key = p1.get_state_key(env.board)
        action = current_player.choose_action(state_key, env.available_moves())
        env.make_move(action, current_player_letter(current_player))
        if draw:
            env.print_board()
        if env.current_winner:
            if draw:
                print(f"Player {env.current_winner} wins!")
            return env.current_winner
    if draw:
        print("It's a tie!")
    return 'Tie'

def current_player_letter(player):
    return 'X' if isinstance(player, QAgent) else 'O'

# Training the agent
def train(episodes=10000):
    player1 = QAgent()
    player2 = QAgent()
    env = TicTacToe()
    for episode in range(episodes):
        if episode % 1000 == 0:
            print(f"Episode: {episode}")
        play_game(player1, player2, env)
        env = TicTacToe()  # Reset environment after each game
    return player1

# Play against human
def play_with_human(agent):
    env = TicTacToe()
    human_player = 'O'
    while env.empty_squares():
        if human_player == 'O':
            move = int(input("Enter your move (0-8): "))
            if move not in env.available_moves():
                print("Invalid move. Try again.")
                continue
        else:
            move = agent.choose_action(agent.get_state_key(env.board), env.available_moves())
        env.make_move(move, human_player)
        env.print_board()
        winner = play_game(agent, 'human', env)
        if winner != 'Tie':
            print(f"{winner} won!")
            break
        human_player = 'X' if human_player == 'O' else 'O'

# Train the agent
trained_agent = train()

# Test against human
play_with_human(trained_agent)

Episode: 0
Episode: 1000
Episode: 2000
Episode: 3000
Episode: 4000
Episode: 5000
Episode: 6000
Episode: 7000
Episode: 8000
Episode: 9000
| O |   |   |
|   |   |   |
|   |   |   |


AttributeError: 'str' object has no attribute 'choose_action'