In [None]:
import gym
import numpy as np

# Créez un environnement Puissance 4 personnalisé à l'aide de Gym
class ConnectFourEnv(gym.Env):
    def __init__(self):
        self.board = np.zeros((6, 7), dtype=int)  # Plateau de jeu de 6x7
        self.current_player = 1  # Joueur 1 commence
        self.action_space = gym.spaces.Discrete(7)  # 7 colonnes possibles
        self.observation_space = gym.spaces.Box(low=0, high=2, shape=(6, 7), dtype=int)
        self.winner = None  # Stocke le vainqueur

    def reset(self):
        self.board = np.zeros((6, 7), dtype=int)
        self.current_player = 1
        self.winner = None
        return self.board

    def step(self, action):
        if self.winner is not None:
            return self.board, 0, True, {}  # Le jeu est terminé

        # Vérifiez si la colonne est valide
        if self.board[0, action] != 0:
            return self.board, -1, False, {}  # Coup invalide, pénalité

        # Placez le jeton dans la colonne
        for row in range(5, -1, -1):
            if self.board[row, action] == 0:
                self.board[row, action] = self.current_player
                break

        # Vérifiez s'il y a un gagnant
        self.winner = self.check_winner()

        reward = 0
        done = False

        if self.winner is not None:
            if self.winner == 1:
                reward = 1
            elif self.winner == -1:
                reward = -1
            done = True
        elif np.count_nonzero(self.board) == 42:
            done = True

        # Changez de joueur
        self.current_player *= -1

        return self.board, reward, done, {}

    def check_winner(self):
        # Vérifiez s'il y a un gagnant
        for player in [1, -1]:
            for row in range(6):
                for col in range(7):
                    if (
                        self.board[row, col] == player
                        and self.check_direction(row, col, player, 1, 0)
                        or self.check_direction(row, col, player, 0, 1)
                        or self.check_direction(row, col, player, 1, 1)
                        or self.check_direction(row, col, player, -1, 1)
                    ):
                        return player
        return None

    def check_direction(self, row, col, player, dr, dc):
        for i in range(3):
            if (
                row + i * dr < 0
                or row + i * dr >= 6
                or col + i * dc < 0
                or col + i * dc >= 7
                or self.board[row + i * dr, col + i * dc] != player
            ):
                return False
        return True

# Définissez une politique simple pour votre agent (Q-Learning)
def q_learning_agent(env, num_episodes=10000, learning_rate=0.1, discount_factor=0.99, epsilon=0.1):
    Q = np.zeros((6, 7, 7))  # Tableau Q (état, action)
    for episode in range(num_episodes):
        state = env.reset()
        done = False
        while not done:
            if np.random.rand() < epsilon:
                action = env.action_space.sample()  # Exploration aléatoire
            else:
                action = np.argmax(Q[state])
            next_state, reward, done, _ = env.step(action)
            # Mise à jour de la table Q
            Q[state][action] = (1 - learning_rate) * Q[state][action] + learning_rate * (
                reward + discount_factor * np.max(Q[next_state])
            )
            state = next_state
    return Q

# Entraînez l'agent
env = ConnectFourEnv()
Q = q_learning_agent(env)

# Fonction pour jouer contre l'IA
def play_vs_agent(Q):
    state = env.reset()
    done = False
    while not done:
        env.render()
        if env.current_player == 1:
            action = int(input("Votre tour (0-6) : "))
        else:
            action = np.argmax(Q[state])
            print(f"L'IA joue {action}")
        next_state, _, done, _ = env.step(action)
        state = next_state
    env.render()
    if env.winner == 1:
        print("Vous avez gagné !")
    elif env.winner == -1:
        print("L'IA a gagné !")
    else:
        print("Match nul !")

# Jouez contre l'IA
play_vs_agent(Q)
