In [2]:
!pip install prettytable




[notice] A new release of pip is available: 23.2.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import numpy as np
import pandas as pd
import random
from prettytable import PrettyTable
from tqdm import tqdm
import os
import time

In [4]:
class Player:
    def __init__(self, symbol):
        self.symbol = symbol

    def __str__(self):
        return f'Player ({self.symbol})'

    def __repr__(self):
        return self.__str__()


In [5]:
class Board:
    def __init__(self, player1, player2, line_number=6, columns_number=7, q_table=None):
        self.columns_number = columns_number
        self.line_number = line_number
        self.table = [['' for _ in range(columns_number)] for _ in range(line_number)]
        self.player1 = player1
        self.player2 = player2
        self.learning_rate = 0.1
        self.discount_factor = 0.9

        if q_table is None:
            headers = [f'col {x}' for x in range(self.columns_number)]
            self.q_table = pd.DataFrame(columns=headers)
        else:
            self.q_table = q_table

    def is_draw(self):
        return all(self.table[0][i] != '' for i in range(self.columns_number))

    def play_in_column(self, player, column):
        if column < 0 or column >= self.columns_number:
            raise ValueError("Invalid column number")

        if self.check_if_column_is_full(column):
            raise ValueError("Column is full")

        for line in range(self.line_number-1, -1, -1):
            if self.table[line][column] == '':
                self.table[line][column] = player.symbol
                break
        return self.get_string_representation()

    def get_q_table(self, column_number):
        state = self.get_string_representation()
        if state in self.q_table.index and column_number < len(self.q_table.columns):
            return self.q_table.loc[state]
        else:
            return None

    def get_q_value(self, state, column_number):
        if state in self.q_table.index and column_number < len(self.q_table.columns):
            return self.q_table.loc[state, f'col {column_number}']
        else:
            return None

    def get_possible_actions(self):
        return [col for col in range(self.columns_number) if not self.check_if_column_is_full(col)]

    def get_max_action(self, state):
        if state not in self.q_table.index:
            self.initialize_state(state)
        possible_actions = self.get_possible_actions()
        action_values = {action: self.q_table.loc[state, f'col {action}'] for action in possible_actions}
        max_action = max(action_values, key=action_values.get)
        return max_action

    def check_if_column_is_full(self, column_number):
        if column_number < 0 or column_number >= self.columns_number:
            raise ValueError("Invalid column number")
        return self.table[0][column_number] != ''

    def print_board(self):
        tab = PrettyTable()
        tab.header = False
        for line in self.table:
            tab.add_row([x if x != '' else '.' for x in line])
        print(tab)

    def print_q_table(self):
        print(self.q_table.head())

    def initialize_state(self, state=None):
        if state is None:
            state = self.get_string_representation()
        if state not in self.q_table.index:
            new_row = [0]*self.columns_number
            self.q_table.loc[state] = new_row

    def update_q_table(self, old_state, action, reward, new_state):
        if old_state not in self.q_table.index:
            self.initialize_state(old_state)
        if new_state not in self.q_table.index:
            self.initialize_state(new_state)

        old_q_value = self.q_table.loc[old_state, f'col {action}']
        max_new_q_value = self.q_table.loc[new_state].max()

        new_q_value = old_q_value + self.learning_rate * (reward + self.discount_factor * max_new_q_value - old_q_value)

        self.q_table.loc[old_state, f'col {action}'] = new_q_value

    def get_string_representation(self):
        return str(self.table)

    def is_winning_move(self, column):
        for line in range(self.line_number):
            if self.table[line][column] != '':
                break
        else:
            raise ValueError("La colonne est vide")

        piece = self.table[line][column]
        directions = [(0, 1), (1, 0), (1, 1), (1, -1)]
        for dr, dc in directions:
            count = 1
            for i in range(1, 4):
                r, c = line + dr*i, column + dc*i
                if 0 <= r < self.line_number and 0 <= c < self.columns_number and self.table[r][c] == piece:
                    count += 1
                else:
                    break
            for i in range(1, 4):
                r, c = line - dr*i, column - dc*i
                if 0 <= r < self.line_number and 0 <= c < self.columns_number and self.table[r][c] == piece:
                    count += 1
                else:
                    break
            if count >= 4:
                return True
        return False

    def train_ai(self, epochs, epsilon=1, min_epsilon=0.01, epsilon_decay=0.0001):
        for e in tqdm(range(epochs), desc="Training"):
            self.reset()
            current_player = self.player1

            while True:
                state = self.get_string_representation()
                possible_actions = self.get_possible_actions()

                if random.uniform(0, 1) < epsilon:
                    action = random.choice(possible_actions)
                else:
                    action = self.get_max_action(state)

                next_state = self.play_in_column(current_player, action)

                if self.is_winning_move(action):
                    reward = 5
                    self.update_q_table(state, action, reward, next_state)
                    break
                elif self.is_draw():
                    reward = 0
                    self.update_q_table(state, action, reward, next_state)
                    break
                else:
                    reward = -0.01
                    self.update_q_table(state, action, reward, next_state)

                current_player = self.player1 if current_player == self.player2 else self.player2

                if self.is_winning_move(action):
                    reward = -1
                    self.update_q_table(state, action, reward, next_state)
                    break

                epsilon = max(min_epsilon, epsilon - epsilon_decay)

    def reset(self):
        self.table = [['' for _ in range(self.columns_number)] for _ in range(self.line_number)]

    def export_q_table_to_csv(self, filename):
        self.q_table.to_csv(filename)

    def play_human_vs_ai(self):
        try:
            self.reset()
            current_player = self.player1

            while True:
                os.system('cls' if os.name == 'nt' else 'clear')

                self.print_board()
                if current_player == self.player1:
                    while True:
                        try:
                            input_player = input(f"Player {current_player.symbol}, choose a column (0-{self.columns_number-1}): ")
                            if input_player == 'Escape':
                                raise KeyboardInterrupt
                            column = int(input_player)
                            if column not in self.get_possible_actions():
                                raise ValueError("Column is full or out of range")
                            break
                        except ValueError as e:
                            print(e)
                else:
                    state = self.get_string_representation()
                    column = self.get_max_action(state)
                    print(f"AI {current_player.symbol} chooses column {column}")
                    time.sleep(1) 

                self.play_in_column(current_player, column)

                if self.is_winning_move(column):
                    os.system('cls' if os.name == 'nt' else 'clear')
                    self.print_board()
                    print(f"Player {current_player.symbol} wins!")
                    break
                elif self.is_draw():
                    os.system('cls' if os.name == 'nt' else 'clear')
                    self.print_board()
                    print("It's a draw!")
                    break

                current_player = self.player1 if current_player == self.player2 else self.player2

        except KeyboardInterrupt:
            print("\nGame interrupted. Exiting...")



Entrainer l'ia sans q_table en entrée, mais en sortant une q_table en sortie

In [6]:
player1 = Player('X')
player2 = Player('O')

board = Board(player1, player2)

board.export_q_table_to_csv('q_table.csv')



Entrainer l'ia avec une q_table en entrée

In [10]:
player1 = Player('X')
player2 = Player('O')
q_table = pd.read_csv('q_table.csv', index_col=0)

board = Board(player1, player2, q_table=q_table)

board.train_ai(epochs=100)
board.print_q_table()
board.export_q_table_to_csv('q_table.csv')

Training: 100%|██████████| 100/100 [00:56<00:00,  1.78it/s]


                                                       col 0     col 1  \
[['', '', '', '', '', '', ''], ['', '', '', '',... -0.027214  2.610349   
[['', '', '', '', '', '', ''], ['', '', '', '',...  0.917919 -0.017453   
[['', '', '', '', '', '', ''], ['', '', '', '',... -0.009653 -0.010151   
[['', '', '', '', '', '', ''], ['', '', '', '',... -0.006005 -0.006935   
[['', '', '', '', '', '', ''], ['', '', '', '',... -0.001000 -0.001000   

                                                       col 2     col 3  \
[['', '', '', '', '', '', ''], ['', '', '', '',...  2.573227  2.583195   
[['', '', '', '', '', '', ''], ['', '', '', '',... -0.017828 -0.017912   
[['', '', '', '', '', '', ''], ['', '', '', '',... -0.009712 -0.009336   
[['', '', '', '', '', '', ''], ['', '', '', '',... -0.006575 -0.006126   
[['', '', '', '', '', '', ''], ['', '', '', '',... -0.001900 -0.001000   

                                                       col 4     col 5  \
[['', '', '', '', '', '', ''], ['', 

Jouer contre l'ia

In [9]:
player1 = Player('X')
player2 = Player('O')

q_table = pd.read_csv('q_table.csv', index_col=0)


board = Board(player1, player2, q_table=q_table)
board.play_human_vs_ai()

+---+---+---+---+---+---+---+
| . | . | . | . | . | . | . |
| . | . | . | . | . | . | . |
| . | . | . | . | . | . | . |
| . | . | . | . | . | . | . |
| . | . | . | . | . | . | . |
| . | . | . | . | . | . | . |
+---+---+---+---+---+---+---+
+---+---+---+---+---+---+---+
| . | . | . | . | . | . | . |
| . | . | . | . | . | . | . |
| . | . | . | . | . | . | . |
| . | . | . | . | . | . | . |
| . | . | . | . | . | . | . |
| . | . | . | X | . | . | . |
+---+---+---+---+---+---+---+
AI O chooses column 6
+---+---+---+---+---+---+---+
| . | . | . | . | . | . | . |
| . | . | . | . | . | . | . |
| . | . | . | . | . | . | . |
| . | . | . | . | . | . | . |
| . | . | . | . | . | . | . |
| . | . | . | X | . | . | O |
+---+---+---+---+---+---+---+
+---+---+---+---+---+---+---+
| . | . | . | . | . | . | . |
| . | . | . | . | . | . | . |
| . | . | . | . | . | . | . |
| . | . | . | . | . | . | . |
| . | . | . | . | . | . | . |
| X | . | . | X | . | . | O |
+---+---+---+---+---+---+---+
AI O chooses colum