# **Import necessary libraries**

In [1]:
import numpy as np
import csv
import random
from itertools import groupby

# Define TicTacToeGame class

In [2]:
class TicTacToeGame():
    def __init__(self):
        self.state = '         '
        self.player = 'X'
        self.winner = None

    def allowed_moves(self):
        states = []
        for i in range(len(self.state)):
            if self.state[i] == ' ':
                states.append(self.state[:i] + self.player + self.state[i+1:])
        return states

    def make_move(self, next_state):
        if self.winner:
            raise(Exception("Game already completed, cannot make another move!"))
        if not self.__valid_move(next_state):
            raise(Exception("Cannot make move {} to {} for player {}".format(
                    self.state, next_state, self.player)))

        self.state = next_state
        self.winner = self.predict_winner(self.state)
        if self.winner:
            self.player = None
        elif self.player == 'X':
            self.player = 'O'
        else:
            self.player = 'X'

    def playable(self):
        return ( (not self.winner) and any(self.allowed_moves()) )

    def predict_winner(self, state):
        lines = [(0,1,2), (3,4,5), (6,7,8), (0,3,6), (1,4,7), (2,5,8), (0,4,8), (2,4,6)]
        winner = None
        for line in lines:
            line_state = state[line[0]] + state[line[1]] + state[line[2]]
            if line_state == 'XXX':
                winner = 'X'
            elif line_state == 'OOO':
                winner = 'O'
        return winner

    def __valid_move(self, next_state):
        allowed_moves = self.allowed_moves()
        if any(state == next_state for state in allowed_moves):
            return True
        return False

    def print_board(self):
        s = self.state
        print('     {} | {} | {} '.format(s[0],s[1],s[2]))
        print('    -----------')
        print('     {} | {} | {} '.format(s[3],s[4],s[5]))
        print('    -----------')
        print('     {} | {} | {} '.format(s[6],s[7],s[8]))

# class Agent():
    def __init__(self, game_class, epsilon=0.1, alpha=0.5, value_player='X'):
        self.V = dict()
        self.NewGame = game_class
        self.epsilon = epsilon
        self.alpha = alpha
        self.value_player = value_player

    def state_value(self, game_state):
        return self.V.get(game_state, 0.0)

    def learn_game(self, num_episodes=1000):
        for episode in range(1, num_episodes + 1):
            self.learn_from_episode()
            if episode in [500, 1000, 5000, 10000, 20000, 30000]:
                print(f"After {episode} games:")
                demo_game_stats(self)

    def learn_from_episode(self):
        game = self.NewGame()
        _, move = self.learn_select_move(game)
        while move:
            move = self.learn_from_move(game, move)

    def learn_from_move(self, game, move):
        game.make_move(move)
        r = self.__reward(game)
        td_target = r
        next_state_value = 0.0
        selected_next_move = None
        if game.playable():
            best_next_move, selected_next_move = self.learn_select_move(game)
            next_state_value = self.state_value(best_next_move)
        current_state_value = self.state_value(move)
        td_target = r + next_state_value
        self.V[move] = current_state_value + self.alpha * (td_target - current_state_value)
        return selected_next_move

    def learn_select_move(self, game):
        allowed_state_values = self.__state_values(game.allowed_moves())
        if game.player == self.value_player:
            best_move = self.__argmax_V(allowed_state_values)
        else:
            best_move = self.__argmin_V(allowed_state_values)

        selected_move = best_move
        if random.random() < self.epsilon:
            selected_move = self.__random_V(allowed_state_values)

        return best_move, selected_move

    def play_select_move(self, game):
        allowed_state_values = self.__state_values(game.allowed_moves())
        if game.player == self.value_player:
            return self.__argmax_V(allowed_state_values)
        else:
            return self.__argmin_V(allowed_state_values)

    def demo_game(self, verbose=False):
        game = self.NewGame()
        t = 0
        while game.playable():
            if verbose:
                print(f"\nTurn {t}\n")
                game.print_board()
            move = self.play_select_move(game)
            game.make_move(move)
            t += 1
        if verbose:
            print(f"\nTurn {t}\n")
            game.print_board()
        if game.winner:
            if verbose:
                print(f"\n{game.winner} is the winner!")
            return game.winner
        else:
            if verbose:
                print("\nIt's a draw!")
            return '-'

    def interactive_game(self):
        while True:
            play_game = input("Do you want to play a game of Tic Tac Toe? (yes/no): ").lower()
            if play_game != 'yes':
                print("Maybe next time!")
                return
            
            while True:
                agent_player = input("Do you want to play as 'X' or 'O'? ").upper()
                if agent_player == 'X' or agent_player == 'O':
                    break
                else:
                    print("Invalid choice. Please choose 'X' or 'O'.")

            while True:
                game = self.NewGame()
                human_player = agent_player
                if agent_player == 'X':
                    agent_player = 'O'
                else:
                    agent_player = 'X'

                t = 0
                while game.playable():
                    print(f"\nTurn {t}\n")
                    game.print_board()
                    if game.player == agent_player:
                        move = self.play_select_move(game)
                        game.make_move(move)
                    else:
                        move = self.__request_human_move(game)
                        game.make_move(move)
                    t += 1

                print(f"\nTurn {t}\n")
                game.print_board()

                if game.winner:
                    print(f"\n{game.winner} is the winner!")
                else:
                    print("\nIt's a draw!")

                break

        print("Thanks for playing!")
        
    def round_V(self):
        for k in self.V.keys():
            self.V[k] = round(self.V[k], 1)

    def save_v_table(self):
        with open('state_values.csv', 'w', newline='') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(['State', 'Value'])
            all_states = list(self.V.keys())
            all_states.sort()
            for state in all_states:
                writer.writerow([state, self.V[state]])

    def __state_values(self, game_states):
        return {state: self.state_value(state) for state in game_states}

    def __argmax_V(self, state_values):
        max_V = max(state_values.values())
        chosen_state = random.choice([state for state, v in state_values.items() if v == max_V])
        return chosen_state

    def __argmin_V(self, state_values):
        min_V = min(state_values.values())
        chosen_state = random.choice([state for state, v in state_values.items() if v == min_V])
        return chosen_state

    def __random_V(self, state_values):
        return random.choice(list(state_values.keys()))

    def __reward(self, game):
        if game.winner == self.value_player:
            return 1.0
        elif game.winner:
            return -1.0
        else:
            return 0.0

    def __request_human_move(self, game):
        allowed_moves = [i + 1 for i in range(9) if game.state[i] == ' ']
        human_move = None
        while not human_move:
            idx = int(input(f'Choose move for {game.player}, from {allowed_moves}: '))
            if idx in allowed_moves:
                human_move = game.state[:idx - 1] + game.player + game.state[idx:]
        return human_move

def demo_game_stats(agent):
    results = [agent.demo_game() for _ in range(10000)]
    game_stats = {k: results.count(k) / 100 for k in ['X', 'O', '-']}
    print(f"    Percentage results: {game_stats}")Define Agent class

In [3]:
class Agent():
    def __init__(self, game_class, epsilon=0.1, alpha=0.5, value_player='X'):
        self.V = dict()
        self.NewGame = game_class
        self.epsilon = epsilon
        self.alpha = alpha
        self.value_player = value_player

    def state_value(self, game_state):
        return self.V.get(game_state, 0.0)

    def learn_game(self, num_episodes=1000):
        for episode in range(1, num_episodes + 1):
            self.learn_from_episode()
            if episode in [500, 1000, 5000, 10000, 20000, 30000]:
                print(f"After {episode} games:")
                demo_game_stats(self)

    def learn_from_episode(self):
        game = self.NewGame()
        _, move = self.learn_select_move(game)
        while move:
            move = self.learn_from_move(game, move)

    def learn_from_move(self, game, move):
        game.make_move(move)
        r = self.__reward(game)
        td_target = r
        next_state_value = 0.0
        selected_next_move = None
        if game.playable():
            best_next_move, selected_next_move = self.learn_select_move(game)
            next_state_value = self.state_value(best_next_move)
        current_state_value = self.state_value(move)
        td_target = r + next_state_value
        self.V[move] = current_state_value + self.alpha * (td_target - current_state_value)
        return selected_next_move

    def learn_select_move(self, game):
        allowed_state_values = self.__state_values(game.allowed_moves())
        if game.player == self.value_player:
            best_move = self.__argmax_V(allowed_state_values)
        else:
            best_move = self.__argmin_V(allowed_state_values)

        selected_move = best_move
        if random.random() < self.epsilon:
            selected_move = self.__random_V(allowed_state_values)

        return best_move, selected_move

    def play_select_move(self, game):
        allowed_state_values = self.__state_values(game.allowed_moves())
        if game.player == self.value_player:
            return self.__argmax_V(allowed_state_values)
        else:
            return self.__argmin_V(allowed_state_values)

    def demo_game(self, verbose=False):
        game = self.NewGame()
        t = 0
        while game.playable():
            if verbose:
                print(f"\nTurn {t}\n")
                game.print_board()
            move = self.play_select_move(game)
            game.make_move(move)
            t += 1
        if verbose:
            print(f"\nTurn {t}\n")
            game.print_board()
        if game.winner:
            if verbose:
                print(f"\n{game.winner} is the winner!")
            return game.winner
        else:
            if verbose:
                print("\nIt's a draw!")
            return '-'

    def interactive_game(self):
        while True:
            play_game = input("Do you want to play a game of Tic Tac Toe? (yes/no): ").lower()
            if play_game != 'yes':
                print("Maybe next time!")
                return
            
            while True:
                agent_player = input("Do you want to play as 'X' or 'O'? ").upper()
                if agent_player == 'X' or agent_player == 'O':
                    break
                else:
                    print("Invalid choice. Please choose 'X' or 'O'.")

            while True:
                game = self.NewGame()
                human_player = agent_player
                if agent_player == 'X':
                    agent_player = 'O'
                else:
                    agent_player = 'X'

                t = 0
                while game.playable():
                    print(f"\nTurn {t}\n")
                    game.print_board()
                    if game.player == agent_player:
                        move = self.play_select_move(game)
                        game.make_move(move)
                    else:
                        move = self.__request_human_move(game)
                        game.make_move(move)
                    t += 1

                print(f"\nTurn {t}\n")
                game.print_board()

                if game.winner:
                    print(f"\n{game.winner} is the winner!")
                else:
                    print("\nIt's a draw!")

                break

        print("Thanks for playing!")
        
    def round_V(self):
        for k in self.V.keys():
            self.V[k] = round(self.V[k], 1)

    def save_v_table(self):
        with open('state_values.csv', 'w', newline='') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(['State', 'Value'])
            all_states = list(self.V.keys())
            all_states.sort()
            for state in all_states:
                writer.writerow([state, self.V[state]])

    def __state_values(self, game_states):
        return {state: self.state_value(state) for state in game_states}

    def __argmax_V(self, state_values):
        max_V = max(state_values.values())
        chosen_state = random.choice([state for state, v in state_values.items() if v == max_V])
        return chosen_state

    def __argmin_V(self, state_values):
        min_V = min(state_values.values())
        chosen_state = random.choice([state for state, v in state_values.items() if v == min_V])
        return chosen_state

    def __random_V(self, state_values):
        return random.choice(list(state_values.keys()))

    def __reward(self, game):
        if game.winner == self.value_player:
            return 1.0
        elif game.winner:
            return -1.0
        else:
            return 0.0

    def __request_human_move(self, game):
        allowed_moves = [i + 1 for i in range(9) if game.state[i] == ' ']
        human_move = None
        while not human_move:
            idx = int(input(f'Choose move for {game.player}, from {allowed_moves}: '))
            if idx in allowed_moves:
                human_move = game.state[:idx - 1] + game.player + game.state[idx:]
        return human_move

def demo_game_stats(agent):
    results = [agent.demo_game() for _ in range(10000)]
    game_stats = {k: results.count(k) / 100 for k in ['X', 'O', '-']}
    print(f"    Percentage results: {game_stats}")

# Main training and execution block

In [4]:
if __name__ == '__main__':
    agent = Agent(TicTacToeGame, epsilon=0.1, alpha=1.0)
    print("Training the agent...")
    agent.learn_game(30000)
    print("Training completed.")

    while True:
        agent.interactive_game()
        break
    print("Thanks for playing")

Training the agent...
After 500 games:
    Percentage results: {'X': 58.42, 'O': 30.76, '-': 10.82}
After 1000 games:
    Percentage results: {'X': 56.21, 'O': 32.52, '-': 11.27}
After 5000 games:
    Percentage results: {'X': 18.69, 'O': 6.88, '-': 74.43}
After 10000 games:
    Percentage results: {'X': 1.8, 'O': 0.9, '-': 97.3}
After 20000 games:
    Percentage results: {'X': 0.05, 'O': 0.01, '-': 99.94}
After 30000 games:
    Percentage results: {'X': 0.0, 'O': 0.0, '-': 100.0}
Training completed.


Do you want to play a game of Tic Tac Toe? (yes/no):  yes
Do you want to play as 'X' or 'O'?  X



Turn 0

       |   |   
    -----------
       |   |   
    -----------
       |   |   


Choose move for X, from [1, 2, 3, 4, 5, 6, 7, 8, 9]:  3



Turn 1

       |   | X 
    -----------
       |   |   
    -----------
       |   |   

Turn 2

       |   | X 
    -----------
       | O |   
    -----------
       |   |   


Choose move for X, from [1, 2, 4, 6, 7, 8, 9]:  9



Turn 3

       |   | X 
    -----------
       | O |   
    -----------
       |   | X 

Turn 4

       |   | X 
    -----------
       | O | O 
    -----------
       |   | X 


Choose move for X, from [1, 2, 4, 7, 8]:  4



Turn 5

       |   | X 
    -----------
     X | O | O 
    -----------
       |   | X 

Turn 6

     O |   | X 
    -----------
     X | O | O 
    -----------
       |   | X 


Choose move for X, from [2, 7, 8]:  7



Turn 7

     O |   | X 
    -----------
     X | O | O 
    -----------
     X |   | X 

Turn 8

     O |   | X 
    -----------
     X | O | O 
    -----------
     X | O | X 


Choose move for X, from [2]:  2



Turn 9

     O | X | X 
    -----------
     X | O | O 
    -----------
     X | O | X 

It's a draw!


Do you want to play a game of Tic Tac Toe? (yes/no):  yes
Do you want to play as 'X' or 'O'?  O



Turn 0

       |   |   
    -----------
       |   |   
    -----------
       |   |   

Turn 1

       |   |   
    -----------
     X |   |   
    -----------
       |   |   


Choose move for O, from [1, 2, 3, 5, 6, 7, 8, 9]:  7



Turn 2

       |   |   
    -----------
     X |   |   
    -----------
     O |   |   

Turn 3

       |   |   
    -----------
     X | X |   
    -----------
     O |   |   


Choose move for O, from [1, 2, 3, 6, 8, 9]:  6



Turn 4

       |   |   
    -----------
     X | X | O 
    -----------
     O |   |   

Turn 5

       |   | X 
    -----------
     X | X | O 
    -----------
     O |   |   


Choose move for O, from [1, 2, 8, 9]:  1



Turn 6

     O |   | X 
    -----------
     X | X | O 
    -----------
     O |   |   

Turn 7

     O | X | X 
    -----------
     X | X | O 
    -----------
     O |   |   


Choose move for O, from [8, 9]:  8



Turn 8

     O | X | X 
    -----------
     X | X | O 
    -----------
     O | O |   

Turn 9

     O | X | X 
    -----------
     X | X | O 
    -----------
     O | O | X 

It's a draw!


Do you want to play a game of Tic Tac Toe? (yes/no):  yes
Do you want to play as 'X' or 'O'?  X



Turn 0

       |   |   
    -----------
       |   |   
    -----------
       |   |   


Choose move for X, from [1, 2, 3, 4, 5, 6, 7, 8, 9]:  3



Turn 1

       |   | X 
    -----------
       |   |   
    -----------
       |   |   

Turn 2

       |   | X 
    -----------
       | O |   
    -----------
       |   |   


Choose move for X, from [1, 2, 4, 6, 7, 8, 9]:  1



Turn 3

     X |   | X 
    -----------
       | O |   
    -----------
       |   |   

Turn 4

     X | O | X 
    -----------
       | O |   
    -----------
       |   |   


Choose move for X, from [4, 6, 7, 8, 9]:  8



Turn 5

     X | O | X 
    -----------
       | O |   
    -----------
       | X |   

Turn 6

     X | O | X 
    -----------
       | O |   
    -----------
     O | X |   


Choose move for X, from [4, 6, 9]:  4



Turn 7

     X | O | X 
    -----------
     X | O |   
    -----------
     O | X |   

Turn 8

     X | O | X 
    -----------
     X | O | O 
    -----------
     O | X |   


Choose move for X, from [9]:  9



Turn 9

     X | O | X 
    -----------
     X | O | O 
    -----------
     O | X | X 

It's a draw!


Do you want to play a game of Tic Tac Toe? (yes/no):  yes
Do you want to play as 'X' or 'O'?  O



Turn 0

       |   |   
    -----------
       |   |   
    -----------
       |   |   

Turn 1

       |   |   
    -----------
       |   |   
    -----------
     X |   |   


Choose move for O, from [1, 2, 3, 4, 5, 6, 8, 9]:  1



Turn 2

     O |   |   
    -----------
       |   |   
    -----------
     X |   |   

Turn 3

     O |   |   
    -----------
       |   |   
    -----------
     X | X |   


Choose move for O, from [2, 3, 4, 5, 6, 9]:  2



Turn 4

     O | O |   
    -----------
       |   |   
    -----------
     X | X |   

Turn 5

     O | O |   
    -----------
       |   |   
    -----------
     X | X | X 

X is the winner!


Do you want to play a game of Tic Tac Toe? (yes/no):  no


Maybe next time!
Thanks for playing


# **Summary of Tic Tac Toe Game and Reinforcement Learning Agent**

#### 1. 'TicTacToeGame' Class
- **Initialization**: Initializes a Tic Tac Toe game with an empty board ("         "), sets the starting player as 'X', and initializes 'winner' as 'None'.
- **Allowed Moves**: Returns a list of all possible board states after the current player makes a move.
- **Make Move**: Updates the game state to the next state provided it's a valid move.
- **Playable**: Checks if the game is still ongoing (no winner yet and there are allowed moves).
- **Predict Winner**: Determines if there is a winner based on the current board state.
- **Print Board**: Prints the current state of the board in a formatted manner.

#### 2. 'Agent' Class
- **Initialization**: Initializes the reinforcement learning agent with parameters like 'epsilon' (exploration rate), 'alpha' (learning rate), and the player it will optimize for ('value_player').
- **State Value**: Returns the estimated value (Q-value) of a given game state.
- **Learn Game**: Trains the agent by playing multiple episodes of Tic Tac Toe.
- **Learn From Episode**: Plays a single episode of Tic Tac Toe and learns from it.
- **Learn From Move**: Updates the Q-value of the current state based on the reward received and the predicted future rewards.
- **Learn Select Move**: Selects the next move based on the current policy (exploitation vs exploration).
- **Play Select Move**: Selects the next move during gameplay based on the learned Q-values.
- **Demo Game**: Plays a complete game of Tic Tac Toe either silently or with verbose output.
- **Interactive Game**: Allows the user to interactively play Tic Tac Toe against the trained agent.
- **Round V**: Rounds all Q-values to one decimal place.
- **Save V Table**: Saves the learned Q-values to a CSV file.
- **Private Helper Methods**:
  - '__state_values': Calculates Q-values for all possible moves.
  - '__argmax_V', '__argmin_V', '__random_V': Helper methods for selecting moves based on Q-values.
  - '__reward': Computes the reward for the agent based on the game outcome.
  - '__request_human_move': Prompts the user for a valid move during interactive gameplay.

#### 3. 'demo_game_stats' Function
- **Demo Game Stats**: Runs multiple Tic Tac Toe games to gather statistics on win/draw percentages for 'X', 'O', and draws.

#### 4. Main Execution Block
- Initializes an instance of 'Agent' with 'TicTacToeGame'.
- Trains the agent ('learn_game') over 30,000 episodes.
- Offers the user the option to play interactive games against the trained agent.

### Functionality Overview
- **Training**: The agent learns to play Tic Tac Toe by playing against itself using reinforcement learning (Q-learning).
- **Interactive Gameplay**: Users can choose to play Tic Tac Toe against the trained agent, selecting 'X' or 'O'.
- **Statistics**: After training, the code provides statistics on game outcomes (win/draw percentages).

### Usage
- Copy the entire code into a Jupyter Notebook or Python script.
- Run the script to train the agent and interactively play Tic Tac Toe.
- Modify parameters such as "epsilon", 'alpha', or the number of training episodes to experiment with different learning behaviors.

This setup encapsulates a complete implementation of Tic Tac Toe with a reinforcement learning agent, demonstrating both training and interactive gameplay capabilities. Adjustments can be made for further customization or integration into larger projects.

# **Here is the full code in a single snippet**

In [5]:
import numpy as np
import csv
import random
from itertools import groupby

class TicTacToeGame():
    def __init__(self):
        self.state = '         '
        self.player = 'X'
        self.winner = None

    def allowed_moves(self):
        states = []
        for i in range(len(self.state)):
            if self.state[i] == ' ':
                states.append(self.state[:i] + self.player + self.state[i+1:])
        return states

    def make_move(self, next_state):
        if self.winner:
            raise(Exception("Game already completed, cannot make another move!"))
        if not self.__valid_move(next_state):
            raise(Exception("Cannot make move {} to {} for player {}".format(
                    self.state, next_state, self.player)))

        self.state = next_state
        self.winner = self.predict_winner(self.state)
        if self.winner:
            self.player = None
        elif self.player == 'X':
            self.player = 'O'
        else:
            self.player = 'X'

    def playable(self):
        return ( (not self.winner) and any(self.allowed_moves()) )

    def predict_winner(self, state):
        lines = [(0,1,2), (3,4,5), (6,7,8), (0,3,6), (1,4,7), (2,5,8), (0,4,8), (2,4,6)]
        winner = None
        for line in lines:
            line_state = state[line[0]] + state[line[1]] + state[line[2]]
            if line_state == 'XXX':
                winner = 'X'
            elif line_state == 'OOO':
                winner = 'O'
        return winner

    def __valid_move(self, next_state):
        allowed_moves = self.allowed_moves()
        if any(state == next_state for state in allowed_moves):
            return True
        return False

    def print_board(self):
        s = self.state
        print('     {} | {} | {} '.format(s[0],s[1],s[2]))
        print('    -----------')
        print('     {} | {} | {} '.format(s[3],s[4],s[5]))
        print('    -----------')
        print('     {} | {} | {} '.format(s[6],s[7],s[8]))


class Agent():
    def __init__(self, game_class, epsilon=0.1, alpha=0.5, value_player='X'):
        self.V = dict()
        self.NewGame = game_class
        self.epsilon = epsilon
        self.alpha = alpha
        self.value_player = value_player

    def state_value(self, game_state):
        return self.V.get(game_state, 0.0)

    def learn_game(self, num_episodes=1000):
        for episode in range(1, num_episodes + 1):
            self.learn_from_episode()
            if episode in [500, 1000, 5000, 10000, 20000, 30000]:
                print(f"After {episode} games:")
                demo_game_stats(self)

    def learn_from_episode(self):
        game = self.NewGame()
        _, move = self.learn_select_move(game)
        while move:
            move = self.learn_from_move(game, move)

    def learn_from_move(self, game, move):
        game.make_move(move)
        r = self.__reward(game)
        td_target = r
        next_state_value = 0.0
        selected_next_move = None
        if game.playable():
            best_next_move, selected_next_move = self.learn_select_move(game)
            next_state_value = self.state_value(best_next_move)
        current_state_value = self.state_value(move)
        td_target = r + next_state_value
        self.V[move] = current_state_value + self.alpha * (td_target - current_state_value)
        return selected_next_move

    def learn_select_move(self, game):
        allowed_state_values = self.__state_values(game.allowed_moves())
        if game.player == self.value_player:
            best_move = self.__argmax_V(allowed_state_values)
        else:
            best_move = self.__argmin_V(allowed_state_values)

        selected_move = best_move
        if random.random() < self.epsilon:
            selected_move = self.__random_V(allowed_state_values)

        return best_move, selected_move

    def play_select_move(self, game):
        allowed_state_values = self.__state_values(game.allowed_moves())
        if game.player == self.value_player:
            return self.__argmax_V(allowed_state_values)
        else:
            return self.__argmin_V(allowed_state_values)

    def demo_game(self, verbose=False):
        game = self.NewGame()
        t = 0
        while game.playable():
            if verbose:
                print(f"\nTurn {t}\n")
                game.print_board()
            move = self.play_select_move(game)
            game.make_move(move)
            t += 1
        if verbose:
            print(f"\nTurn {t}\n")
            game.print_board()
        if game.winner:
            if verbose:
                print(f"\n{game.winner} is the winner!")
            return game.winner
        else:
            if verbose:
                print("\nIt's a draw!")
            return '-'

    def interactive_game(self):
        while True:
            play_game = input("Do you want to play a game of Tic Tac Toe? (yes/no): ").lower()
            if play_game != 'yes':
                print("Maybe next time!")
                return
            
            while True:
                agent_player = input("Do you want to play as 'X' or 'O'? ").upper()
                if agent_player == 'X' or agent_player == 'O':
                    break
                else:
                    print("Invalid choice. Please choose 'X' or 'O'.")

            while True:
                game = self.NewGame()
                human_player = agent_player
                if agent_player == 'X':
                    agent_player = 'O'
                else:
                    agent_player = 'X'

                t = 0
                while game.playable():
                    print(f"\nTurn {t}\n")
                    game.print_board()
                    if game.player == agent_player:
                        move = self.play_select_move(game)
                        game.make_move(move)
                    else:
                        move = self.__request_human_move(game)
                        game.make_move(move)
                    t += 1

                print(f"\nTurn {t}\n")
                game.print_board()

                if game.winner:
                    print(f"\n{game.winner} is the winner!")
                else:
                    print("\nIt's a draw!")

                break

        print("Thanks for playing!")
        
    def round_V(self):
        for k in self.V.keys():
            self.V[k] = round(self.V[k], 1)

    def save_v_table(self):
        with open('state_values.csv', 'w', newline='') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(['State', 'Value'])
            all_states = list(self.V.keys())
            all_states.sort()
            for state in all_states:
                writer.writerow([state, self.V[state]])

    def __state_values(self, game_states):
        return {state: self.state_value(state) for state in game_states}

    def __argmax_V(self, state_values):
        max_V = max(state_values.values())
        chosen_state = random.choice([state for state, v in state_values.items() if v == max_V])
        return chosen_state

    def __argmin_V(self, state_values):
        min_V = min(state_values.values())
        chosen_state = random.choice([state for state, v in state_values.items() if v == min_V])
        return chosen_state

    def __random_V(self, state_values):
        return random.choice(list(state_values.keys()))

    def __reward(self, game):
        if game.winner == self.value_player:
            return 1.0
        elif game.winner:
            return -1.0
        else:
            return 0.0

    def __request_human_move(self, game):
        allowed_moves = [i + 1 for i in range(9) if game.state[i] == ' ']
        human_move = None
        while not human_move:
            idx = int(input(f'Choose move for {game.player}, from {allowed_moves}: '))
            if idx in allowed_moves:
                human_move = game.state[:idx - 1] + game.player + game.state[idx:]
        return human_move

def demo_game_stats(agent):
    results = [agent.demo_game() for _ in range(10000)]
    game_stats = {k: results.count(k) / 100 for k in ['X', 'O', '-']}
    print(f"    Percentage results: {game_stats}")

if __name__ == '__main__':
    agent = Agent(TicTacToeGame, epsilon=0.1, alpha=1.0)
    print("Training the agent...")
    agent.learn_game(30000)
    print("Training completed.")

    while True:
        agent.interactive_game()
        break
    print("Thanks for playing")

Training the agent...
After 500 games:
    Percentage results: {'X': 59.08, 'O': 30.15, '-': 10.77}
After 1000 games:
    Percentage results: {'X': 57.17, 'O': 32.0, '-': 10.83}
After 5000 games:
    Percentage results: {'X': 18.24, 'O': 7.74, '-': 74.02}
After 10000 games:
    Percentage results: {'X': 1.99, 'O': 0.47, '-': 97.54}
After 20000 games:
    Percentage results: {'X': 0.0, 'O': 0.0, '-': 100.0}
After 30000 games:
    Percentage results: {'X': 0.0, 'O': 0.0, '-': 100.0}
Training completed.


Do you want to play a game of Tic Tac Toe? (yes/no):  no


Maybe next time!
Thanks for playing
