In [3]:
import numpy as np

def print_board(board):
    for row in board:
        print(" | ".join(row))
        print("-" * 9)

def check_win(board, player):
    for i in range(3):
        if all(board[i] == player) or all(board[:, i] == player):
            return True
    if all(np.diag(board) == player) or all(np.diag(np.fliplr(board)) == player):
        return True
    return False

def check_draw(board):
    return " " not in board

def main():
    board = np.array([[" ", " ", " "], [" ", " ", " "], [" ", " ", " "]])
    players = ["X", "O"]
    player_index = 0
    game_over = False

    print("Tic-Tac-Toe Game")
    print_board(board)

    while not game_over:
        player = players[player_index]
        row, col = map(int, input(f"Player {player}: Enter row and column (e.g., 0 0): ").split())
        if board[row, col] == " ":
            board[row, col] = player
        else:
            print("Invalid move. Try again.")
            continue

        print_board(board)

        if check_win(board, player):
            print(f"Player {player} wins!")
            game_over = True
        elif check_draw(board):
            print("It's a draw!")
            game_over = True
        else:
            player_index = 1 - player_index

if __name__ == "__main__":
    main()


Tic-Tac-Toe Game
  |   |  
---------
  |   |  
---------
  |   |  
---------
Player X: Enter row and column (e.g., 0 0): g


ValueError: invalid literal for int() with base 10: 'g'

In [None]:
import numpy as np

def print_board(board):
    for row in board:
        print(" | ".join(row))
        print("-" * 9)

def check_win(board, player):
    for i in range(3):
        if all(board[i] == player) or all(board[:, i] == player):
            return True
    if all(np.diag(board) == player) or all(np.diag(np.fliplr(board)) == player):
        return True
    return False

def check_draw(board):
    return " " not in board

def main():
    board = np.array([[" ", " ", " "], [" ", " ", " "], [" ", " ", " "]])
    players = ["X", "O"]
    player_index = 0
    game_over = False

    print("Tic-Tac-Toe Game")
    print_board(board)

    while not game_over:
        player = players[player_index]
        row, col = map(int, input(f"Player {player}: Enter row and column (e.g., 0 0): ").split())
        if board[row, col] == " ":
            board[row, col] = player
        else:
            print("Invalid move. Try again.")
            continue

        print_board(board)

        if check_win(board, player):
            print(f"Player {player} wins!")
            game_over = True
        elif check_draw(board):
            print("It's a draw!")
            game_over = True
        else:
            player_index = 1 - player_index

if __name__ == "__main__":
    main()


In [1]:
import numpy as np

# Tic-Tac-Toe game environment
class TicTacToe:
    def __init__(self):
        self.board = np.zeros((3, 3), dtype=int)
        self.current_player = 1

    def is_valid_move(self, row, col):
        return self.board[row, col] == 0

    def make_move(self, row, col):
        if self.is_valid_move(row, col):
            self.board[row, col] = self.current_player
            self.current_player = 3 - self.current_player

    def check_win(self, player):
        for i in range(3):
            if all(self.board[i] == player) or all(self.board[:, i] == player):
                return True
        if all(np.diag(self.board) == player) or all(np.diag(np.fliplr(self.board)) == player):
            return True
        return False

    def is_draw(self):
        return not any(0 in row for row in self.board)

    def is_game_over(self):
        return self.check_win(1) or self.check_win(2) or self.is_draw()

    def get_state(self):
        return tuple(map(tuple, self.board))

    def print_board(self):
        for row in self.board:
            print(" | ".join(["XO "[cell] for cell in row]))
            print("-" * 9)

# Q-learning agent
class QLearningAgent:
    def __init__(self, learning_rate=0.1, discount_factor=0.9, exploration_prob=0.3):
        self.q_table = {}
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_prob = exploration_prob

    def get_q_value(self, state, action):
        if (state, action) in self.q_table:
            return self.q_table[(state, action)]
        return 0

    def update_q_value(self, state, action, value):
        self.q_table[(state, action)] = value

    def select_action(self, state, legal_actions):
        if np.random.rand() < self.exploration_prob:
            return legal_actions[np.random.choice(len(legal_actions))]
        else:
            q_values = [self.get_q_value(state, action) for action in legal_actions]
            max_q = max(q_values)
            max_q_indices = [i for i in range(len(q_values)) if q_values[i] == max_q]
            return legal_actions[np.random.choice(max_q_indices)]

# Training the agent
def train_q_learning_agent():
    episodes = 10000
    learning_rate = 0.1
    discount_factor = 0.9
    exploration_prob = 0.3

    agent = QLearningAgent(learning_rate, discount_factor, exploration_prob)
    env = TicTacToe()

    for episode in range(episodes):
        env = TicTacToe()
        state = env.get_state()

        while not env.is_game_over():
            legal_actions = [(i, j) for i in range(3) for j in range(3) if env.is_valid_move(i, j)]
            action = agent.select_action(state, legal_actions)
            env.make_move(action[0], action[1])
            next_state = env.get_state()
            if env.check_win(1):
                reward = 1
            elif env.check_win(2):
                reward = -1
            else:
                reward = 0
            q_value = agent.get_q_value(state, action)
            max_next_q = max(agent.get_q_value(next_state, a) for a in legal_actions)
            new_q_value = (1 - learning_rate) * q_value + learning_rate * (reward + discount_factor * max_next_q)
            agent.update_q_value(state, action, new_q_value)
            state = next_state

    return agent

# Testing the trained agent
def test_q_learning_agent(agent):
    env = TicTacToe()
    state = env.get_state()

    while not env.is_game_over():
        legal_actions = [(i, j) for i in range(3) for j in range(3) if env.is_valid_move(i, j)]
        action = agent.select_action(state, legal_actions)
        env.make_move(action[0], action[1])
        state = env.get_state()
        env.print_board()

        if env.check_win(1):
            print("Agent (X) wins!")
        elif env.check_win(2):
            print("Opponent (O) wins!")
        elif env.is_draw():
            print("It's a draw!")

if __name__ == "__main__":
    trained_agent = train_q_learning_agent()
    test_q_learning_agent(trained_agent)


X | X | O
---------
X | X | X
---------
X | X | X
---------
X | X | O
---------
X | X | X
---------
  | X | X
---------
X | X | O
---------
X | O | X
---------
  | X | X
---------
X | X | O
---------
X | O |  
---------
  | X | X
---------
X | X | O
---------
X | O |  
---------
  | X | O
---------
X | X | O
---------
X | O |  
---------
  |   | O
---------
O | X | O
---------
X | O |  
---------
  |   | O
---------
Agent (X) wins!
