## TicTacToe

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random

In [3]:
#Génération de labyrinthe
#0 = vide
#1 = X
#2 = O

labyrinth = np.array([
  [0, 0, 0],
  [0, 0, 0],
  [0, 0, 0],
])

In [32]:
import numpy as np
import random

class TicTacToeQLearningAgent:
    def __init__(self, alpha=0.8, gamma=0.95, epsilon=1.0, epsilon_min=0.1, epsilon_decay=0.99, n_episodes=50000):
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        self.n_episodes = n_episodes
        self.q_table = {}

    def reset(self):
        self.board = [' '] * 9
        self.current_winner = None
        return tuple(self.board)

    def available_moves(self):
        return [i for i, x in enumerate(self.board) if x == ' ']

    def make_move(self, square, player):
        if self.board[square] == ' ':
            self.board[square] = player
            if self.winner(square, player):
                self.current_winner = player
            return True
        return False

    def winner(self, square, player):
        row_ind = square // 3
        row = self.board[row_ind*3:(row_ind+1)*3]
        if all([s == player for s in row]):
            return True
        col_ind = square % 3
        column = [self.board[col_ind+i*3] for i in range(3)]
        if all([s == player for s in column]):
            return True
        if square % 2 == 0:
            diagonal1 = [self.board[i] for i in [0, 4, 8]]
            if all([s == player for s in diagonal1]):
                return True
            diagonal2 = [self.board[i] for i in [2, 4, 6]]
            if all([s == player for s in diagonal2]):
                return True
        return False

    def is_draw(self):
        return ' ' not in self.board and self.current_winner is None

    def get_q(self, state, action):
        if (state, action) not in self.q_table:
            self.q_table[(state, action)] = 0.0
        return self.q_table[(state, action)]

    def choose_action(self, state, available_moves):
        if np.random.rand() < self.epsilon:
            return random.choice(available_moves)
        else:
            qs = [self.get_q(state, a) for a in available_moves]
            max_q = max(qs)
            best_actions = [a for a, q in zip(available_moves, qs) if q == max_q]
            return random.choice(best_actions)

    def update_q(self, state, action, reward, next_state, next_available_moves):
        best_next_q = max([self.get_q(next_state, a) for a in next_available_moves], default=0)
        current_q = self.get_q(state, action)
        self.q_table[(state, action)] = current_q + self.alpha * (reward + self.gamma * best_next_q - current_q)

    def simulate_opponent_move(self):
        """Opponent strategy to either block, win, or pick the center or a random move"""
        for move in self.available_moves():
            # Check if opponent can win
            self.board[move] = 'O'
            if self.winner(move, 'O'):
                return move
            self.board[move] = ' '

        for move in self.available_moves():
            # Check if need to block agent
            self.board[move] = 'X'
            if self.winner(move, 'X'):
                self.board[move] = 'O'
                return move
            self.board[move] = ' '

        # Prefer the center if available
        if 4 in self.available_moves():
            return 4

        # Otherwise, pick randomly
        return random.choice(self.available_moves())

    def train(self):
        for episode in range(self.n_episodes):
            state = self.reset()
            done = False
            player = 'X'  # Agent is 'X'

            while not done:
                available_moves = self.available_moves()
                
                # Agent's turn
                if player == 'X':
                    action = self.choose_action(state, available_moves)
                    self.make_move(action, player)
                    next_state = tuple(self.board)
                    reward = 0

                    if self.current_winner == player:
                        reward = 10  # Large reward for winning
                        done = True
                    elif self.is_draw():
                        reward = 5  # Small reward for a draw
                        done = True
                    
                    next_available_moves = self.available_moves()
                    self.update_q(state, action, reward, next_state, next_available_moves)

                    state = next_state
                    player = 'O'
                else:
                    # Simulated opponent move
                    action = self.simulate_opponent_move()
                    self.make_move(action, 'O')
                    if self.current_winner == 'O':
                        reward = -10  # Large penalty for losing
                        done = True
                    elif self.is_draw():
                        done = True
                    player = 'X'
            
            # Decay epsilon
            if self.epsilon > self.epsilon_min:
                self.epsilon *= self.epsilon_decay

            # Optional: Print progress every 10,000 episodes for tracking
            if (episode + 1) % 10000 == 0:
                print(f"Episode {episode + 1}/{self.n_episodes} completed")

        print("Training complete!")

    def display_board(self):
        for i in range(3):
            print("| " + " | ".join(self.board[i*3:(i+1)*3]) + " |")
        print()

    def play_against_agent(self):
        state = self.reset()
        player = 'X'

        print("Welcome to Tic Tac Toe! You are 'O'")
        self.display_board()

        while True:
            if ' ' not in self.board:
                print("Draw!")
                break

            if player == 'O':
                human_move = int(input("Choose a square (0-8): "))
                if human_move not in self.available_moves():
                    print("Invalid move. Try again.")
                    continue

                self.make_move(human_move, 'O')
                self.display_board()

                if self.current_winner == 'O':
                    print("Congratulations! You won!")
                    break
                elif self.is_draw():
                    print("It's a draw!")
                    break
                player = 'X'
            else:
                agent_move = self.choose_action(tuple(self.board), self.available_moves())
                self.make_move(agent_move, 'X')
                print(f"The agent played at position {agent_move}:")
                self.display_board()

                if self.current_winner == 'X':
                    print("The agent won! Better luck next time.")
                    break
                elif self.is_draw():
                    print("It's a draw!")
                    break
                player = 'O'


# Initialize and train the agent
agent = TicTacToeQLearningAgent(n_episodes=50000)
agent.train()
agent.play_against_agent()


Episode 10000/50000 completed
Episode 20000/50000 completed
Episode 30000/50000 completed
Episode 40000/50000 completed
Episode 50000/50000 completed
Training complete!
Welcome to Tic Tac Toe! You are 'O'
|   |   |   |
|   |   |   |
|   |   |   |

The agent played at position 6:
|   |   |   |
|   |   |   |
| X |   |   |

|   |   |   |
|   | O |   |
| X |   |   |

The agent played at position 0:
| X |   |   |
|   | O |   |
| X |   |   |

| X |   |   |
| O | O |   |
| X |   |   |

The agent played at position 7:
| X |   |   |
| O | O |   |
| X | X |   |

| X |   |   |
| O | O |   |
| X | X | O |

The agent played at position 1:
| X | X |   |
| O | O |   |
| X | X | O |



KeyboardInterrupt: Interrupted by user

In [47]:
import numpy as np
import random

class TicTacToeQLearningAgent:
    def __init__(self, alpha=0.8, gamma=0.95, epsilon=1.0, epsilon_min=0.01, epsilon_decay=0.995, n_episodes=50000):
        self.board = [' ' for _ in range(9)]
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_min = epsilon_min
        self.epsilon_decay = epsilon_decay
        self.n_episodes = n_episodes
        self.q_table = {}
        self.current_winner = None

    def reset(self):
        self.board = [' ' for _ in range(9)]
        self.current_winner = None
        return tuple(self.board)

    def available_moves(self):
        return [i for i, spot in enumerate(self.board) if spot == ' ']

    def make_move(self, position, player):
        if self.board[position] == ' ':
            self.board[position] = player
            if self.check_winner(player):
                self.current_winner = player
            return True
        return False

    def check_winner(self, player):
        win_conditions = [(0, 1, 2), (3, 4, 5), (6, 7, 8),
                          (0, 3, 6), (1, 4, 7), (2, 5, 8),
                          (0, 4, 8), (2, 4, 6)]
        for condition in win_conditions:
            if all(self.board[i] == player for i in condition):
                return True
        return False

    def is_draw(self):
        return ' ' not in self.board

    def get_reward(self, player):
        if self.current_winner == player:
            return 100
        elif self.current_winner is not None:
            return -100
        elif self.is_draw():
            return 0
        else:
            return -1

    def choose_action(self, state, available_moves):
        if not available_moves:
            return None  # Si aucune action disponible, retourner None
        if random.uniform(0, 1) < self.epsilon:
            return random.choice(available_moves)
        elif state in self.q_table:
            return np.argmax(self.q_table[state])
        else:
            return random.choice(available_moves)

    def update_q_value(self, state, action, reward, next_state):
        if state not in self.q_table:
            self.q_table[state] = np.zeros(9)
        old_value = self.q_table[state][action]
        future_reward = np.max(self.q_table.get(next_state, np.zeros(9)))
        new_value = (1 - self.alpha) * old_value + self.alpha * (reward + self.gamma * future_reward)
        self.q_table[state][action] = new_value

    def train(self):
        for episode in range(self.n_episodes):
            state = self.reset()
            done = False
            player = 'X'

            while not done:
                if player == 'X':
                    available_moves = self.available_moves()
                    action = self.choose_action(state, available_moves)
                    if action is None:  # Si aucune action n'est disponible, sortir
                        done = True
                        break
                    self.make_move(action, player)
                    next_state = tuple(self.board)
                    reward = self.get_reward(player)
                    self.update_q_value(state, action, reward, next_state)
                    state = next_state
                    if reward == 100 or reward == -100 or self.is_draw():
                        done = True
                    player = 'O'
                else:
                    available_moves = self.available_moves()
                    opponent_action = self.choose_best_move('O', 'X')
                    if opponent_action is None:
                        done = True
                        break
                    self.make_move(opponent_action, player)
                    if self.check_winner('O'):
                        self.current_winner = 'O'
                    player = 'X'

            if self.epsilon > self.epsilon_min:
                self.epsilon *= self.epsilon_decay

            if (episode + 1) % 10000 == 0:
                print(f"Episode {episode + 1}/{self.n_episodes} completed.")

    def choose_best_move(self, player, opponent):
        available_moves = self.available_moves()
        for move in available_moves:
            self.make_move(move, player)
            if self.check_winner(player):
                self.board[move] = ' '
                return move
            self.board[move] = ' '
        for move in available_moves:
            self.make_move(move, opponent)
            if self.check_winner(opponent):
                self.board[move] = ' '
                return move
            self.board[move] = ' '
        if available_moves:
            return random.choice(available_moves)
        return None

    def play_against_agent(self):
        state = self.reset()
        player = 'X'
        print("You are playing against the Q-learning agent!\n")
        self.display_board()

        while True:
            if self.is_draw():
                print("It's a draw!")
                break

            if player == 'O':
                try:
                    human_move = int(input("Choose a square (0-8): "))
                    if human_move not in self.available_moves():
                        print("Invalid move. Try again.")
                        continue
                    self.make_move(human_move, player)
                    if self.check_winner(player):
                        self.display_board()
                        print("You won!")
                        break
                    player = 'X'
                except ValueError:
                    print("Please enter a valid number between 0 and 8.")
                    continue
            else:
                agent_move = self.choose_action(tuple(self.board), self.available_moves())
                if agent_move is None:
                    print("No valid moves available. Ending game.")
                    break
                self.make_move(agent_move, 'X')
                print(f"Agent chose position {agent_move}")
                self.display_board()
                if self.check_winner('X'):
                    print("The agent won!")
                    break
                player = 'O'

    def display_board(self):
        for row in range(3):
            print(" | ".join(self.board[row * 3:(row + 1) * 3]))
            if row < 2:
                print("---------")

# Utilisation de l'agent
agent = TicTacToeQLearningAgent()
agent.train()  # Entraînement avec 50 000 épisodes
agent.play_against_agent()  # Partie contre l'agent


Episode 10000/50000 completed.
Episode 20000/50000 completed.
Episode 30000/50000 completed.
Episode 40000/50000 completed.
Episode 50000/50000 completed.
You are playing against the Q-learning agent!

  |   |  
---------
  |   |  
---------
  |   |  
Agent chose position 4
  |   |  
---------
  | X |  
---------
  |   |  
Invalid move. Try again.
Agent chose position 4
  |   |  
---------
  | X |  
---------
  |   | O
Agent chose position 6
O |   |  
---------
  | X |  
---------
X |   | O
Agent chose position 7
O |   | O
---------
  | X |  
---------
X | X | O


KeyboardInterrupt: Interrupted by user