CLASSIFICATION

iris

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from transformers import AutoTokenizer

def cla_gen(lignes=10000, colonnes=10, nb_classes=30):

    max_n_informative = min(colonnes, 10)
    n_informative = min(colonnes, max_n_informative)
    n_clusters_per_class = max(1, (colonnes // nb_classes) // 2)
    
    X, y = make_classification(
        n_samples=lignes, 
        n_features=colonnes, 
        n_informative=n_informative, 
        n_redundant=colonnes - n_informative, 
        n_clusters_per_class=n_clusters_per_class, 
        n_classes=nb_classes
    )

    for i in range(colonnes):
        X[:, i] = X[:, i] * np.random.randint(50, 150)
    
    df = pd.DataFrame(X, columns=[f"Col{j}" for j in range(1, X.shape[1]+1)])
    df["y"] = pd.Series(y, name="y")
    
    return df

# Générateur de dataframe pour la regression
def reg_gen(lignes=10000,colonnes=6):
    temp = {}
    y=0
    
    for i in range(colonnes):
        temp[f"X{i+1}"] = np.random.rand(lignes) * np.random.randint(1,100)
    
    for i in temp.values():
        y += i * np.random.randint(1,100)

    y += np.random.randn(lignes)

    df = pd.DataFrame(temp)
    df["y"] = pd.Series(y, name="y")

    return df


def tokenizator(serie):
    tokenizer = AutoTokenizer.from_pretrained("bert-base-multilingual-cased")
    batch_encoding = tokenizer(serie['review'].tolist(), padding=True, truncation=True, return_tensors="pt")
    series_dict = {key: pd.Series(value.numpy().flatten()) for key, value in batch_encoding.items()}
    df_series = pd.DataFrame(series_dict)
    df_series.iloc[:, 0]
    return df_series

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, Input
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from keras.utils import to_categorical
from deap import base, creator, tools
import tensorflow as tf

# Charger les données depuis un fichier CSV
def load_data(filepath):
    data = pd.read_csv(filepath)
    X = data.iloc[:, :-1].values  # Toutes les colonnes sauf la dernière sont des caractéristiques
    y = data.iloc[:, -1].values  # La dernière colonne est la cible
    return X, y

# Prétraiter les données
def preprocess_data(X, y):
    if y.dtype == 'object' or y.dtype == 'str':
        le = LabelEncoder()
        y = le.fit_transform(y)
    y = to_categorical(y)  # Conversion en one-hot encoding
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_test_scaled, y_train, y_test, X.shape[1], y.shape[1]

# Créer le toolbox pour les individus
def create_individual_toolbox(input_dim, output_dim):
    if 'FitnessMin' not in creator.__dict__:
        creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
    if 'Individual' not in creator.__dict__:
        creator.create("Individual", list, fitness=creator.FitnessMin)
    
    toolbox = base.Toolbox()
    toolbox.register("attr_int", np.random.randint, 5, 12)
    toolbox.register("attr_float", np.random.uniform, -1, 1)
    
    def generate_individual(num_layers=None):
        if num_layers is None:
            num_layers = np.random.randint(1, 5)  # Nombre de couches entre 1 et 4
        structure = [np.random.randint(5, 12) for _ in range(num_layers)]
        total_weights = sum([structure[i-1] * structure[i] if i > 0 else input_dim * structure[i] for i in range(num_layers)])
        weights = [toolbox.attr_float() for _ in range(total_weights)]
        individual = structure + weights
        return creator.Individual(individual)
    
    def generate_diverse_population(n):
        population = []
        seen_structures = set()
        while len(population) < n:
            num_layers = np.random.randint(1, 5)
            individual = generate_individual(num_layers=num_layers)
            structure, _ = decode_individual(individual)
            structure_tuple = tuple(structure)
            if structure_tuple not in seen_structures:
                population.append(individual)
                seen_structures.add(structure_tuple)
        return population

    toolbox.register("individual", generate_individual)
    toolbox.register("population", generate_diverse_population)

    return toolbox

# Fonction pour extraire la structure et les poids d'un individu
def decode_individual(individual):
    num_layers = len([x for x in individual if isinstance(x, int)])
    structure = individual[:num_layers]
    weights = individual[num_layers:]
    return structure, weights

# Déplacer la définition de la fonction eval_nn ici
def eval_nn(individual, input_dim, output_dim, X_train, y_train, X_test, y_test):
    structure, weights = decode_individual(individual)
    model = Sequential()
    model.add(Input(shape=(input_dim,)))
    total_neurons = 0
    weight_idx = 0
    for neurons in structure:
        if not isinstance(neurons, int):
            neurons = int(neurons)
        model.add(Dense(neurons, activation='relu'))
        total_neurons += neurons
        output_dim_layer = neurons
        if weight_idx + input_dim * output_dim_layer <= len(weights):
            layer_weights = np.array(weights[weight_idx:weight_idx + input_dim * output_dim_layer]).reshape(input_dim, output_dim_layer)
            weight_idx += input_dim * output_dim_layer
            input_dim = output_dim_layer
        else:
            return (float('inf'),), 0, float('inf'), [], [], [], []  # Eviter les erreurs d'index
    model.add(Dense(output_dim, activation='softmax'))
    
    weight_idx = 0
    input_dim = X_train.shape[1]
    for layer in model.layers:
        if isinstance(layer, Dense):
            output_dim_layer = layer.units
            if weight_idx + input_dim * output_dim_layer <= len(weights):
                layer_weights = np.array(weights[weight_idx:weight_idx + input_dim * output_dim_layer]).reshape(input_dim, output_dim_layer)
                weight_idx += input_dim * output_dim_layer
                layer.set_weights([layer_weights, np.zeros(output_dim_layer)])
                input_dim = output_dim_layer
    
    model.compile(loss='categorical_crossentropy', optimizer=Adam(0.01), metrics=['accuracy'])
    history = model.fit(X_train, y_train, epochs=20, verbose=0, validation_data=(X_test, y_test))
    loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
    return (loss,), accuracy, loss, history.history['accuracy'], history.history['val_accuracy'], history.history['loss'], history.history['val_loss']



# Mutation des individus
def mutate_individual(individual):
    structure, weights = decode_individual(individual)
    if np.random.random() < 0.5:
        for i in range(len(structure)):
            if np.random.random() < 0.2:
                structure[i] = np.random.randint(5, 12)
    if np.random.random() < 0.5:
        weights = tools.mutPolynomialBounded(weights, low=-1, up=1, eta=0.1, indpb=0.2)[0]
    individual[:] = structure + weights
    return individual,

# Exécution de l'algorithme génétique
def run_genetic_algorithm(filepath, n_population=5, n_generations=10):
    if isinstance(filepath, str):
        X, y = load_data(filepath)
    else:
        X = filepath
        y = X[X.columns[-1]]
        X.drop(X.columns[-1], axis=1, inplace=True)
        for column in X.columns:
            if X[column].dtype == 'object' or X[column].dtype == 'str':
                X[column] = tokenizator(X[column])
    X_train, X_test, y_train, y_test, input_dim, output_dim = preprocess_data(X, y)
    
    toolbox = create_individual_toolbox(input_dim, output_dim)
    toolbox.register("evaluate", eval_nn, input_dim=input_dim, output_dim=output_dim, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)
    toolbox.register("mate", tools.cxTwoPoint)
    toolbox.register("mutate", mutate_individual)
    toolbox.register("select", tools.selTournament, tournsize=3)
    
    population = toolbox.population(n=n_population)
    
    # Évaluer tous les individus de la population initiale
    fitnesses = list(map(toolbox.evaluate, population))
    for ind, fit in zip(population, fitnesses):
        ind.fitness.values = fit[0]  # Prendre uniquement le premier élément
        ind.accuracy = fit[1]
        ind.raw_loss = fit[2]
        ind.train_accuracy = fit[3]
        ind.val_accuracy = fit[4]
        ind.train_loss = fit[5]
        ind.val_loss = fit[6]
    
    hall_of_fame = tools.HallOfFame(1)
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("min", lambda values: np.min([val[0] for val in values]))
    logbook = tools.Logbook()
    logbook.header = ["gen", "evals"] + stats.fields
    
    best_accuracy_per_gen = []
    best_loss_per_gen = []
    
    train_accuracies = []
    val_accuracies = []
    train_losses = []
    val_losses = []
    
    for gen in range(n_generations):
        # Garder le meilleur individu de la génération précédente
        if hall_of_fame:
            population.append(toolbox.clone(hall_of_fame[0]))
        
        # Sélectionner les meilleurs individus
        best_individuals = tools.selBest(population, k=int(0.1 * len(population)))
        # Sélectionner quelques mauvais individus
        worst_individuals = tools.selWorst(population, k=int(0.1 * len(population)))
        # Sélectionner les individus restants
        remaining_individuals = tools.selTournament(population, k=len(population) - len(best_individuals) - len(worst_individuals), tournsize=3)
        
        offspring = best_individuals + worst_individuals + remaining_individuals
        offspring = list(map(toolbox.clone, offspring))
        
        for child1, child2 in zip(offspring[::2], offspring[1::2]):
            if np.random.random() < 0.5:
                toolbox.mate(child1, child2)
                del child1.fitness.values
                del child2.fitness.values
        for mutant in offspring:
            if np.random.random() < 0.2:
                toolbox.mutate(mutant)
                del mutant.fitness.values
        
        # Évaluer les individus invalides
        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = map(toolbox.evaluate, invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit[0]  # Prendre uniquement le premier élément
            ind.accuracy = fit[1]
            ind.raw_loss = fit[2]
            ind.train_accuracy = fit[3]
            ind.val_accuracy = fit[4]
            ind.train_loss = fit[5]
            ind.val_loss = fit[6]
        
        # Ajouter quelques nouveaux individus aléatoires pour la diversité
        new_random_individuals = [toolbox.individual(num_layers=np.random.randint(1, 5)) for _ in range(int(0.1 * len(population)))]
        population[:] = offspring + new_random_individuals
        
        # S'assurer qu'il y a des individus avec 1 et 2 couches
        has_one_layer = any(len(decode_individual(ind)[0]) == 1 for ind in population)
        has_two_layers = any(len(decode_individual(ind)[0]) == 2 for ind in population)
        if not has_one_layer:
            population.append(toolbox.individual(num_layers=1))
        if not has_two_layers:
            population.append(toolbox.individual(num_layers=2))
        
        # S'assurer que tous les individus ont des valeurs de fitness valides
        invalid_ind = [ind for ind in population if not ind.fitness.valid]
        if invalid_ind:
            fitnesses = map(toolbox.evaluate, invalid_ind)
            for ind, fit in zip(invalid_ind, fitnesses):
                ind.fitness.values = fit[0]  # Prendre uniquement le premier élément
                ind.accuracy = fit[1]
                ind.raw_loss = fit[2]
                ind.train_accuracy = fit[3]
                ind.val_accuracy = fit[4]
                ind.train_loss = fit[5]
                ind.val_loss = fit[6]
        
        # Enregistrer les statistiques
        record = stats.compile(population)
        logbook.record(gen=gen, evals=len(invalid_ind), **record)
        print(logbook.stream)
        
        hall_of_fame.update(population)
        
        best_ind = tools.selBest(population, 1)[0]
        best_accuracy_per_gen.append(best_ind.accuracy)
        best_loss_per_gen.append(best_ind.fitness.values[0])
    
        train_accuracies.append(best_ind.train_accuracy)
        val_accuracies.append(best_ind.val_accuracy)
        train_losses.append(best_ind.train_loss)
        val_losses.append(best_ind.val_loss)

        # Afficher le meilleur individu de chaque génération
        print(f"Generation {gen}: Best Individual = {best_ind}")
        
        # Arrêter si un modèle avec 1 couche atteint une accuracy de 98%
        if best_ind.accuracy >= 0.98 and len(decode_individual(best_ind)[0]) == 1:
            print("Stopping early as a single layer model reached 98% accuracy.")
            break

    # Affichage du meilleur individu
    best_individual = hall_of_fame[0]
    display_best_individual(best_individual, X_train.shape[1])

    best_structure, best_weights = decode_individual(best_individual)
    print('Best Individual Structure:', best_structure)
    print('Fitness (Loss, Num Layers, Total Neurons):', best_individual.fitness.values)
    print('Accuracy of the Best Model:', best_individual.accuracy)
    print('Raw Loss:', best_individual.raw_loss)
    
    # Calcul de l'accuracy du meilleur modèle au cours des époques
    best_model_layers = best_structure
    best_model = Sequential()
    best_model.add(Input(shape=(X_train.shape[1],)))
    for neurons in best_model_layers:
        best_model.add(Dense(neurons, activation='relu'))
    best_model.add(Dense(output_dim, activation='softmax'))
    best_model.compile(loss='categorical_crossentropy', optimizer=Adam(0.01), metrics=['accuracy'])
    history = best_model.fit(X_train, y_train, epochs=20, verbose=0, validation_data=(X_test, y_test))
    loss, accuracy = best_model.evaluate(X_test, y_test, verbose=0)
    print('Final Accuracy of the Best Model:', accuracy)
    
    # Afficher les résultats
    plt.figure(figsize=(12, 5))
    
    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Loss over Epochs for Best Model')
    plt.legend()
    
    plt.subplot(1, 2, 2)
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.title('Accuracy over Epochs for Best Model')
    plt.legend()
    
    plt.tight_layout()
    plt.show()

    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(best_loss_per_gen, label='Best Loss per Generation')
    plt.xlabel('Generation')
    plt.ylabel('Loss')
    plt.title('Best Loss per Generation')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(best_accuracy_per_gen, label='Best Accuracy per Generation')
    plt.xlabel('Generation')
    plt.ylabel('Accuracy')
    plt.title('Best Accuracy per Generation')
    plt.legend()

    plt.tight_layout()
    plt.show()

# Fonction pour afficher la structure et les poids du meilleur individu
def display_best_individual(best_individual, input_dim):
    structure, weights = decode_individual(best_individual)
    
    print("Best Individual Structure:")
    print(structure)
    
    print("\nBest Individual Weights:")
    weight_idx = 0
    input_dim_layer = input_dim
    for i, neurons in enumerate(structure):
        output_dim_layer = neurons
        layer_weights = np.array(weights[weight_idx:weight_idx + input_dim_layer * output_dim_layer]).reshape(input_dim_layer, output_dim_layer)
        print(f"Layer {i + 1} - Weights:\n{layer_weights}")
        weight_idx += input_dim_layer * output_dim_layer
        input_dim_layer = output_dim_layer

    print("\nBest Individual Biases:")
    for i, neurons in enumerate(structure):
        biases = weights[weight_idx:weight_idx + neurons]
        print(f"Layer {i + 1} - Biases:\n{biases}")
        weight_idx += neurons

# Exemple d'utilisation
# Utilisation de la fonction pour enregistrer les données Iris dans un fichier CSV
filepath = cla_gen(lignes = 10000, colonnes = 10, nb_classes = 30)
run_genetic_algorithm(filepath)


gen	evals	min      
0  	1    	0.0733469
Generation 0: Best Individual = [10, 11, 0.9046171798997118, 0.07799573597025078, -0.4702711924999887, -0.6061063755437708, 0.27451379073873716, 0.8821002911229638, 0.7327237089466161, 0.782724001406528, -0.5640881312484347, 0.28337004313816494, 0.7596713380767055, 0.3975391891647746, -0.4961427468252737, 0.8152809479301126, -0.24125815275380869, 0.31825951215201975, 0.4977161402987891, 0.006218410042655442, -0.30128010310240616, 0.9791315363919884, 0.9796640438361728, -0.5898725559676765, -0.0958591226022465, -0.005227085397501607, -0.7285406791808369, 0.4504553802834663, 0.030667321094760558, -0.9061395573613995, 0.46311077026127445, 0.671044793370893, -0.3727026690194628, -0.014208869884899666, -0.01682317700722291, -0.21850378799843195, 0.04821496313118234, 0.22670105574091415, -0.14030592319945767, 0.30006280517783046, -0.44971217955916853, -0.6765905452681225, -0.4306242460751659, -0.3957092976855878, 0.3873348650159434, -0.0523437963505917

wine

In [None]:
from sklearn.datasets import load_wine
import pandas as pd
import numpy as np

# Fonction pour télécharger et préparer les données wine
def download_wine_data(filepath):
    wine = load_wine()
    df = pd.DataFrame(data=np.c_[wine['data'], wine['target']], columns=wine['feature_names'] + ['target'])
    df.to_csv(filepath, index=False)

# Utilisation de la fonction pour enregistrer les données wine dans un fichier CSV
filepath = 'wine.csv'
download_wine_data(filepath)



    
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, Input
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from keras.utils import to_categorical
from deap import base, creator, tools, algorithms

# Charger les données depuis un fichier CSV
def load_data(filepath):
    data = pd.read_csv(filepath)
    X = data.iloc[:, :-1].values  # Toutes les colonnes sauf la dernière sont des caractéristiques
    y = data.iloc[:, -1].values  # La dernière colonne est la cible
    return X, y

# Prétraiter les données
def preprocess_data(X, y):
    if y.dtype == 'object' or y.dtype == 'str':
        le = LabelEncoder()
        y = le.fit_transform(y)
    y = to_categorical(y)  # Conversion en one-hot encoding
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_test_scaled, y_train, y_test, X.shape[1], y.shape[1]

# Créer le toolbox pour les individus
def create_individual_toolbox(input_dim, output_dim):
    if 'FitnessMin' not in creator.__dict__:
        creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
    if 'Individual' not in creator.__dict__:
        creator.create("Individual", list, fitness=creator.FitnessMin)
    
    toolbox = base.Toolbox()
    toolbox.register("attr_int", np.random.randint, 5, 12)
    toolbox.register("attr_float", np.random.uniform, -1, 1)
    
    def generate_individual(num_layers=None):
        if num_layers is None:
            num_layers = np.random.randint(1, 5)  # Nombre de couches entre 1 et 4
        structure = [np.random.randint(5, 12) for _ in range(num_layers)]
        total_weights = sum([structure[i-1] * structure[i] if i > 0 else input_dim * structure[i] for i in range(num_layers)])
        weights = [toolbox.attr_float() for _ in range(total_weights)]
        individual = structure + weights
        return creator.Individual(individual)
    
    def generate_diverse_population(n):
        population = []
        seen_structures = set()
        while len(population) < n:
            num_layers = np.random.randint(1, 5)
            individual = generate_individual(num_layers=num_layers)
            structure, _ = decode_individual(individual)
            structure_tuple = tuple(structure)
            if structure_tuple not in seen_structures:
                population.append(individual)
                seen_structures.add(structure_tuple)
        return population

    toolbox.register("individual", generate_individual)
    toolbox.register("population", generate_diverse_population)

    return toolbox

# Fonction pour extraire la structure et les poids d'un individu
def decode_individual(individual):
    num_layers = len([x for x in individual if isinstance(x, int)])
    structure = individual[:num_layers]
    weights = individual[num_layers:]
    return structure, weights

# Déplacer la définition de la fonction eval_nn ici
def eval_nn(individual, input_dim, output_dim, X_train, y_train, X_test, y_test):
    structure, weights = decode_individual(individual)
    model = Sequential()
    model.add(Input(shape=(input_dim,)))
    total_neurons = 0
    weight_idx = 0
    for neurons in structure:
        if not isinstance(neurons, int):
            neurons = int(neurons)
        model.add(Dense(neurons, activation='relu'))
        total_neurons += neurons
        output_dim_layer = neurons
        if weight_idx + input_dim * output_dim_layer <= len(weights):
            layer_weights = np.array(weights[weight_idx:weight_idx + input_dim * output_dim_layer]).reshape(input_dim, output_dim_layer)
            weight_idx += input_dim * output_dim_layer
            input_dim = output_dim_layer
        else:
            return (float('inf'),), 0, float('inf'), [], [], [], []  # Eviter les erreurs d'index
    model.add(Dense(output_dim, activation='softmax'))
    
    weight_idx = 0
    input_dim = X_train.shape[1]
    for layer in model.layers:
        if isinstance(layer, Dense):
            output_dim_layer = layer.units
            if weight_idx + input_dim * output_dim_layer <= len(weights):
                layer_weights = np.array(weights[weight_idx:weight_idx + input_dim * output_dim_layer]).reshape(input_dim, output_dim_layer)
                weight_idx += input_dim * output_dim_layer
                layer.set_weights([layer_weights, np.zeros(output_dim_layer)])
                input_dim = output_dim_layer
    
    model.compile(loss='categorical_crossentropy', optimizer=Adam(0.01), metrics=['accuracy'])
    history = model.fit(X_train, y_train, epochs=20, verbose=0, validation_data=(X_test, y_test))
    loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
    return (loss,), accuracy, loss, history.history['accuracy'], history.history['val_accuracy'], history.history['loss'], history.history['val_loss']



# Mutation des individus
def mutate_individual(individual):
    structure, weights = decode_individual(individual)
    if np.random.random() < 0.5:
        for i in range(len(structure)):
            if np.random.random() < 0.2:
                structure[i] = np.random.randint(5, 12)
    if np.random.random() < 0.5:
        weights = tools.mutPolynomialBounded(weights, low=-1, up=1, eta=0.1, indpb=0.2)[0]
    individual[:] = structure + weights
    return individual,

# Exécution de l'algorithme génétique
def run_genetic_algorithm(filepath, n_population=5, n_generations=10):
    if isinstance(filepath, str):
        X, y = load_data(filepath)
    else:
        X = filepath
        y = X[X.columns[-1]]
        X.drop(X.columns[-1], axis=1, inplace=True)
        for column in X.columns:
            if X[column].dtype == 'object' or X[column].dtype == 'str':
                X[column] = tokenizator(X[column])
    X_train, X_test, y_train, y_test, input_dim, output_dim = preprocess_data(X, y)
    
    toolbox = create_individual_toolbox(input_dim, output_dim)
    toolbox.register("evaluate", eval_nn, input_dim=input_dim, output_dim=output_dim, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)
    toolbox.register("mate", tools.cxTwoPoint)
    toolbox.register("mutate", mutate_individual)
    toolbox.register("select", tools.selTournament, tournsize=3)
    
    population = toolbox.population(n=n_population)
    
    # Évaluer tous les individus de la population initiale
    fitnesses = list(map(toolbox.evaluate, population))
    for ind, fit in zip(population, fitnesses):
        ind.fitness.values = fit[0]  # Prendre uniquement le premier élément
        ind.accuracy = fit[1]
        ind.raw_loss = fit[2]
        ind.train_accuracy = fit[3]
        ind.val_accuracy = fit[4]
        ind.train_loss = fit[5]
        ind.val_loss = fit[6]
    
    hall_of_fame = tools.HallOfFame(1)
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("min", lambda values: np.min([val[0] for val in values]))
    logbook = tools.Logbook()
    logbook.header = ["gen", "evals"] + stats.fields
    
    best_accuracy_per_gen = []
    best_loss_per_gen = []
    
    train_accuracies = []
    val_accuracies = []
    train_losses = []
    val_losses = []
    
    for gen in range(n_generations):
        # Garder le meilleur individu de la génération précédente
        if hall_of_fame:
            population.append(toolbox.clone(hall_of_fame[0]))
        
        # Sélectionner les meilleurs individus
        best_individuals = tools.selBest(population, k=int(0.1 * len(population)))
        # Sélectionner quelques mauvais individus
        worst_individuals = tools.selWorst(population, k=int(0.1 * len(population)))
        # Sélectionner les individus restants
        remaining_individuals = tools.selTournament(population, k=len(population) - len(best_individuals) - len(worst_individuals), tournsize=3)
        
        offspring = best_individuals + worst_individuals + remaining_individuals
        offspring = list(map(toolbox.clone, offspring))
        
        for child1, child2 in zip(offspring[::2], offspring[1::2]):
            if np.random.random() < 0.5:
                toolbox.mate(child1, child2)
                del child1.fitness.values
                del child2.fitness.values
        for mutant in offspring:
            if np.random.random() < 0.2:
                toolbox.mutate(mutant)
                del mutant.fitness.values
        
        # Évaluer les individus invalides
        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = map(toolbox.evaluate, invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit[0]  # Prendre uniquement le premier élément
            ind.accuracy = fit[1]
            ind.raw_loss = fit[2]
            ind.train_accuracy = fit[3]
            ind.val_accuracy = fit[4]
            ind.train_loss = fit[5]
            ind.val_loss = fit[6]
        
        # Ajouter quelques nouveaux individus aléatoires pour la diversité
        new_random_individuals = [toolbox.individual(num_layers=np.random.randint(1, 5)) for _ in range(int(0.1 * len(population)))]
        population[:] = offspring + new_random_individuals
        
        # S'assurer qu'il y a des individus avec 1 et 2 couches
        has_one_layer = any(len(decode_individual(ind)[0]) == 1 for ind in population)
        has_two_layers = any(len(decode_individual(ind)[0]) == 2 for ind in population)
        if not has_one_layer:
            population.append(toolbox.individual(num_layers=1))
        if not has_two_layers:
            population.append(toolbox.individual(num_layers=2))
        
        # S'assurer que tous les individus ont des valeurs de fitness valides
        invalid_ind = [ind for ind in population if not ind.fitness.valid]
        if invalid_ind:
            fitnesses = map(toolbox.evaluate, invalid_ind)
            for ind, fit in zip(invalid_ind, fitnesses):
                ind.fitness.values = fit[0]  # Prendre uniquement le premier élément
                ind.accuracy = fit[1]
                ind.raw_loss = fit[2]
                ind.train_accuracy = fit[3]
                ind.val_accuracy = fit[4]
                ind.train_loss = fit[5]
                ind.val_loss = fit[6]
        
        # Enregistrer les statistiques
        record = stats.compile(population)
        logbook.record(gen=gen, evals=len(invalid_ind), **record)
        print(logbook.stream)
        
        hall_of_fame.update(population)
        
        best_ind = tools.selBest(population, 1)[0]
        best_accuracy_per_gen.append(best_ind.accuracy)
        best_loss_per_gen.append(best_ind.fitness.values[0])
    
        train_accuracies.append(best_ind.train_accuracy)
        val_accuracies.append(best_ind.val_accuracy)
        train_losses.append(best_ind.train_loss)
        val_losses.append(best_ind.val_loss)

        # Afficher le meilleur individu de chaque génération
        print(f"Generation {gen}: Best Individual = {best_ind}")
        
        # Arrêter si un modèle avec 1 couche atteint une accuracy de 98%
        if best_ind.accuracy >= 0.98 and len(decode_individual(best_ind)[0]) == 1:
            print("Stopping early as a single layer model reached 98% accuracy.")
            break

    # Affichage du meilleur individu
    best_individual = hall_of_fame[0]
    display_best_individual(best_individual, X_train.shape[1])

    best_structure, best_weights = decode_individual(best_individual)
    print('Best Individual Structure:', best_structure)
    print('Fitness (Loss, Num Layers, Total Neurons):', best_individual.fitness.values)
    print('Accuracy of the Best Model:', best_individual.accuracy)
    print('Raw Loss:', best_individual.raw_loss)
    
    # Calcul de l'accuracy du meilleur modèle au cours des époques
    best_model_layers = best_structure
    best_model = Sequential()
    best_model.add(Input(shape=(X_train.shape[1],)))
    for neurons in best_model_layers:
        best_model.add(Dense(neurons, activation='relu'))
    best_model.add(Dense(output_dim, activation='softmax'))
    best_model.compile(loss='categorical_crossentropy', optimizer=Adam(0.01), metrics=['accuracy'])
    history = best_model.fit(X_train, y_train, epochs=20, verbose=0, validation_data=(X_test, y_test))
    loss, accuracy = best_model.evaluate(X_test, y_test, verbose=0)
    print('Final Accuracy of the Best Model:', accuracy)
    
    # Afficher les résultats
    plt.figure(figsize=(12, 5))
    
    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Loss over Epochs for Best Model')
    plt.legend()
    
    plt.subplot(1, 2, 2)
    plt.plot(history.history['accuracy'], label='Train Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.title('Accuracy over Epochs for Best Model')
    plt.legend()
    
    plt.tight_layout()
    plt.show()

    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(best_loss_per_gen, label='Best Loss per Generation')
    plt.xlabel('Generation')
    plt.ylabel('Loss')
    plt.title('Best Loss per Generation')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(best_accuracy_per_gen, label='Best Accuracy per Generation')
    plt.xlabel('Generation')
    plt.ylabel('Accuracy')
    plt.title('Best Accuracy per Generation')
    plt.legend()

    plt.tight_layout()
    plt.show()

# Fonction pour afficher la structure et les poids du meilleur individu
def display_best_individual(best_individual, input_dim):
    structure, weights = decode_individual(best_individual)
    
    print("Best Individual Structure:")
    print(structure)
    
    print("\nBest Individual Weights:")
    weight_idx = 0
    input_dim_layer = input_dim
    for i, neurons in enumerate(structure):
        output_dim_layer = neurons
        layer_weights = np.array(weights[weight_idx:weight_idx + input_dim_layer * output_dim_layer]).reshape(input_dim_layer, output_dim_layer)
        print(f"Layer {i + 1} - Weights:\n{layer_weights}")
        weight_idx += input_dim_layer * output_dim_layer
        input_dim_layer = output_dim_layer

    print("\nBest Individual Biases:")
    for i, neurons in enumerate(structure):
        biases = weights[weight_idx:weight_idx + neurons]
        print(f"Layer {i + 1} - Biases:\n{biases}")
        weight_idx += neurons

# Exemple d'utilisation
filepath = cla_gen(lignes = 10000, colonnes = 10, nb_classes = 30)
run_genetic_algorithm(filepath)


REGRESSION

housing

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, Input
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from deap import base, creator, tools, algorithms

# Charger les données depuis un fichier CSV
def load_data(filepath):
    data = pd.read_csv(filepath)
    X = data.iloc[:, :-1].values  # Toutes les colonnes sauf la dernière sont des caractéristiques
    y = data.iloc[:, -1].values  # La dernière colonne est la cible
    return X, y

# Prétraiter les données
def preprocess_data(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_test_scaled, y_train, y_test, X.shape[1], 1

# Créer le toolbox pour les individus
def create_individual_toolbox(input_dim, output_dim):
    if 'FitnessMin' not in creator.__dict__:
        creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
    if 'Individual' not in creator.__dict__:
        creator.create("Individual", list, fitness=creator.FitnessMin)
    
    toolbox = base.Toolbox()
    toolbox.register("attr_int", np.random.randint, 5, 12)
    toolbox.register("attr_float", np.random.uniform, -1, 1)
    
    def generate_individual(num_layers=None):
        if num_layers is None:
            num_layers = np.random.randint(1, 5)  # Nombre de couches entre 1 et 4
        structure = [np.random.randint(5, 12) for _ in range(num_layers)]
        total_weights = sum([structure[i-1] * structure[i] if i > 0 else input_dim * structure[i] for i in range(num_layers)])
        weights = [toolbox.attr_float() for _ in range(total_weights)]
        individual = structure + weights
        return creator.Individual(individual)
    
    def generate_diverse_population(n):
        population = []
        seen_structures = set()
        while len(population) < n:
            num_layers = np.random.randint(1, 5)
            individual = generate_individual(num_layers=num_layers)
            structure, _ = decode_individual(individual)
            structure_tuple = tuple(structure)
            if structure_tuple not in seen_structures:
                population.append(individual)
                seen_structures.add(structure_tuple)
        return population

    toolbox.register("individual", generate_individual)
    toolbox.register("population", generate_diverse_population)

    return toolbox

# Fonction pour extraire la structure et les poids d'un individu
def decode_individual(individual):
    num_layers = len([x for x in individual if isinstance(x, int)])
    structure = individual[:num_layers]
    weights = individual[num_layers:]
    return structure, weights

# Fonction d'évaluation de l'erreur du modèle
def eval_nn(individual, input_dim, output_dim, X_train, y_train, X_test, y_test):
    structure, weights = decode_individual(individual)
    model = Sequential()
    model.add(Input(shape=(input_dim,)))
    total_neurons = 0
    weight_idx = 0
    for neurons in structure:
        model.add(Dense(neurons, activation='relu'))
        total_neurons += neurons
        output_dim_layer = neurons
        if weight_idx + input_dim * output_dim_layer <= len(weights):
            layer_weights = np.array(weights[weight_idx:weight_idx + input_dim * output_dim_layer]).reshape(input_dim, output_dim_layer)
            weight_idx += input_dim * output_dim_layer
            input_dim = output_dim_layer
        else:
            return (float('inf'),), float('inf'), [], [], []  # Eviter les erreurs d'index
    model.add(Dense(output_dim))
    
    weight_idx = 0
    input_dim = X_train.shape[1]
    for layer in model.layers:
        if isinstance(layer, Dense):
            output_dim_layer = layer.units
            if weight_idx + input_dim * output_dim_layer <= len(weights):
                layer_weights = np.array(weights[weight_idx:weight_idx + input_dim * output_dim_layer]).reshape(input_dim, output_dim_layer)
                weight_idx += input_dim * output_dim_layer
                layer.set_weights([layer_weights, np.zeros(output_dim_layer)])
                input_dim = output_dim_layer
    
    model.compile(loss='mean_squared_error', optimizer=Adam(0.01))
    history = model.fit(X_train, y_train, epochs=20, verbose=0, validation_data=(X_test, y_test))
    loss = model.evaluate(X_test, y_test, verbose=0)
    return (loss,), loss, history.history['loss'], history.history['val_loss']

# Mutation des individus
def mutate_individual(individual):
    structure, weights = decode_individual(individual)
    if np.random.random() < 0.5:
        for i in range(len(structure)):
            if np.random.random() < 0.2:
                structure[i] = np.random.randint(5, 12)
    if np.random.random() < 0.5:
        weights = tools.mutPolynomialBounded(weights, low=-1, up=1, eta=0.1, indpb=0.2)[0]
    individual[:] = structure + weights
    return individual,

# Exécution de l'algorithme génétique
def run_genetic_algorithm(filepath, n_population=4, n_generations=10):
    if isinstance(filepath, str):
        X, y = load_data(filepath)
    else:
        X = filepath
        y = X[X.columns[-1]]
        X.drop(X.columns[-1], axis=1, inplace=True)
        for column in X.columns:
            if X[column].dtype == 'object' or X[column].dtype == 'str':
                X[column] = tokenizator(X[column])
    X_train, X_test, y_train, y_test, input_dim, output_dim = preprocess_data(X, y)
    
    toolbox = create_individual_toolbox(input_dim, output_dim)
    toolbox.register("evaluate", eval_nn, input_dim=input_dim, output_dim=output_dim, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)
    toolbox.register("mate", tools.cxTwoPoint)
    toolbox.register("mutate", mutate_individual)
    toolbox.register("select", tools.selTournament, tournsize=3)
    
    population = toolbox.population(n=n_population)
    
    # Évaluer tous les individus de la population initiale
    fitnesses = list(map(toolbox.evaluate, population))
    for ind, fit in zip(population, fitnesses):
        ind.fitness.values = fit[0]  # Prendre uniquement le premier élément
        ind.raw_loss = fit[1]
        ind.train_loss = fit[2]
        ind.val_loss = fit[3]
    
    hall_of_fame = tools.HallOfFame(1)
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("min", lambda values: np.min([val[0] for val in values]))
    logbook = tools.Logbook()
    logbook.header = ["gen", "evals"] + stats.fields
    
    best_loss_per_gen = []
    train_losses = []
    val_losses = []
    
    for gen in range(n_generations):
        # Garder le meilleur individu de la génération précédente
        if hall_of_fame:
            population.append(toolbox.clone(hall_of_fame[0]))
        
        # Sélectionner les meilleurs individus
        best_individuals = tools.selBest(population, k=int(0.1 * len(population)))
        # Sélectionner quelques mauvais individus
        worst_individuals = tools.selWorst(population, k=int(0.1 * len(population)))
        # Sélectionner les individus restants
        remaining_individuals = tools.selTournament(population, k=len(population) - len(best_individuals) - len(worst_individuals), tournsize=3)
        
        offspring = best_individuals + worst_individuals + remaining_individuals
        offspring = list(map(toolbox.clone, offspring))
        
        for child1, child2 in zip(offspring[::2], offspring[1::2]):
            if np.random.random() < 0.5:
                toolbox.mate(child1, child2)
                del child1.fitness.values
                del child2.fitness.values
        for mutant in offspring:
            if np.random.random() < 0.2:
                toolbox.mutate(mutant)
                del mutant.fitness.values
        
        # Évaluer les individus invalides
        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = map(toolbox.evaluate, invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit[0]  # Prendre uniquement le premier élément
            ind.raw_loss = fit[1]
            ind.train_loss = fit[2]
            ind.val_loss = fit[3]
        
        # Ajouter quelques nouveaux individus aléatoires pour la diversité
        new_random_individuals = [toolbox.individual(num_layers=np.random.randint(1, 5)) for _ in range(int(0.1 * len(population)))]
        population[:] = offspring + new_random_individuals
        
        # S'assurer qu'il y a des individus avec 1 et 2 couches
        has_one_layer = any(len(decode_individual(ind)[0]) == 1 for ind in population)
        has_two_layers = any(len(decode_individual(ind)[0]) == 2 for ind in population)
        if not has_one_layer:
            population.append(toolbox.individual(num_layers=1))
        if not has_two_layers:
            population.append(toolbox.individual(num_layers=2))
        
        # Trier la population du meilleur au moins bon
        valid_population = [ind for ind in population if ind.fitness.valid]
        population.sort(key=lambda ind: ind.fitness.values[0] if ind.fitness.valid else float('inf'))
        
        # S'assurer que tous les individus ont des valeurs de fitness valides
        invalid_ind = [ind for ind in population if not ind.fitness.valid]
        if invalid_ind:
            fitnesses = map(toolbox.evaluate, invalid_ind)
            for ind, fit in zip(invalid_ind, fitnesses):
                ind.fitness.values = fit[0]  # Prendre uniquement le premier élément
                ind.raw_loss = fit[1]
                ind.train_loss = fit[2]
                ind.val_loss = fit[3]
        
        # Enregistrer les statistiques
        record = stats.compile(population)
        logbook.record(gen=gen, evals=len(invalid_ind), **record)
        print(logbook.stream)
        
        hall_of_fame.update(population)
        
        best_ind = tools.selBest(population, 1)[0]
        best_loss_per_gen.append(best_ind.fitness.values[0])
    
        train_losses.append(best_ind.train_loss)
        val_losses.append(best_ind.val_loss)

        # Afficher le meilleur individu de chaque génération
        print(f"Generation {gen}: Best Individual = {best_ind}")

    # Affichage du meilleur individu
    best_individual = hall_of_fame[0]
    display_best_individual(best_individual, X_train.shape[1])

    best_structure, best_weights = decode_individual(best_individual)
    print('Best Individual Structure:', best_structure)
    print('Fitness (Loss, Num Layers, Total Neurons):', best_individual.fitness.values)
    print('Raw Loss:', best_individual.raw_loss)
    
    # Calcul de la perte du meilleur modèle au cours des époques
    best_model_layers = best_structure
    best_model = Sequential()
    best_model.add(Input(shape=(X_train.shape[1],)))
    for neurons in best_model_layers:
        best_model.add(Dense(neurons, activation='relu'))
    best_model.add(Dense(output_dim))
    best_model.compile(loss='mean_squared_error', optimizer=Adam(0.01))
    history = best_model.fit(X_train, y_train, epochs=20, verbose=0, validation_data=(X_test, y_test))
    loss = best_model.evaluate(X_test, y_test, verbose=0)
    print('Final Loss of the Best Model:', loss)
    
    # Afficher les résultats
    plt.figure(figsize=(12, 5))
    
    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Loss over Epochs for Best Model')
    plt.legend()
    
    plt.tight_layout()
    plt.show()

    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(best_loss_per_gen, label='Best Loss per Generation')
    plt.xlabel('Generation')
    plt.ylabel('Loss')
    plt.title('Best Loss per Generation')
    plt.legend()

    plt.tight_layout()
    plt.show()

# Fonction pour afficher la structure et les poids du meilleur individu
def display_best_individual(best_individual, input_dim):
    structure, weights = decode_individual(best_individual)
    
    print("Best Individual Structure:")
    print(structure)
    
    print("\nBest Individual Weights:")
    weight_idx = 0
    input_dim_layer = input_dim
    for i, neurons in enumerate(structure):
        output_dim_layer = neurons
        layer_weights = np.array(weights[weight_idx:weight_idx + input_dim_layer * output_dim_layer]).reshape(input_dim_layer, output_dim_layer)
        print(f"Layer {i + 1} - Weights:\n{layer_weights}")
        weight_idx += input_dim_layer * output_dim_layer
        input_dim_layer = output_dim_layer

    print("\nBest Individual Biases:")
    for i, neurons in enumerate(structure):
        biases = weights[weight_idx:weight_idx + neurons]
        print(f"Layer {i + 1} - Biases:\n{biases}")
        weight_idx += neurons

# Exemple d'utilisation
filepath = 'housing.csv'  # Remplacez par le chemin de votre fichier CSV
run_genetic_algorithm(filepath)


Concrete

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, Input
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from deap import base, creator, tools, algorithms

# Charger les données depuis un fichier CSV
def load_data(filepath):
    data = pd.read_csv(filepath)
    X = data.iloc[:, :-1].values  # Toutes les colonnes sauf la dernière sont des caractéristiques
    y = data.iloc[:, -1].values  # La dernière colonne est la cible
    return X, y

# Prétraiter les données
def preprocess_data(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_test_scaled, y_train, y_test, X.shape[1], 1

# Créer le toolbox pour les individus
def create_individual_toolbox(input_dim, output_dim):
    if 'FitnessMin' not in creator.__dict__:
        creator.create("FitnessMin", base.Fitness, weights=(-1.0,))
    if 'Individual' not in creator.__dict__:
        creator.create("Individual", list, fitness=creator.FitnessMin)
    
    toolbox = base.Toolbox()
    toolbox.register("attr_int", np.random.randint, 5, 12)
    toolbox.register("attr_float", np.random.uniform, -1, 1)
    
    def generate_individual(num_layers=None):
        if num_layers is None:
            num_layers = np.random.randint(1, 5)  # Nombre de couches entre 1 et 4
        structure = [np.random.randint(5, 12) for _ in range(num_layers)]
        total_weights = sum([structure[i-1] * structure[i] if i > 0 else input_dim * structure[i] for i in range(num_layers)])
        weights = [toolbox.attr_float() for _ in range(total_weights)]
        individual = structure + weights
        return creator.Individual(individual)
    
    def generate_diverse_population(n):
        population = []
        seen_structures = set()
        while len(population) < n:
            num_layers = np.random.randint(1, 5)
            individual = generate_individual(num_layers=num_layers)
            structure, _ = decode_individual(individual)
            structure_tuple = tuple(structure)
            if structure_tuple not in seen_structures:
                population.append(individual)
                seen_structures.add(structure_tuple)
        return population

    toolbox.register("individual", generate_individual)
    toolbox.register("population", generate_diverse_population)

    return toolbox

# Fonction pour extraire la structure et les poids d'un individu
def decode_individual(individual):
    num_layers = len([x for x in individual if isinstance(x, int)])
    structure = individual[:num_layers]
    weights = individual[num_layers:]
    return structure, weights

# Fonction d'évaluation de l'erreur du modèle
def eval_nn(individual, input_dim, output_dim, X_train, y_train, X_test, y_test):
    structure, weights = decode_individual(individual)
    model = Sequential()
    model.add(Input(shape=(input_dim,)))
    total_neurons = 0
    weight_idx = 0
    for neurons in structure:
        model.add(Dense(neurons, activation='relu'))
        total_neurons += neurons
        output_dim_layer = neurons
        if weight_idx + input_dim * output_dim_layer <= len(weights):
            layer_weights = np.array(weights[weight_idx:weight_idx + input_dim * output_dim_layer]).reshape(input_dim, output_dim_layer)
            weight_idx += input_dim * output_dim_layer
            input_dim = output_dim_layer
        else:
            return (float('inf'),), float('inf'), [], [], []  # Eviter les erreurs d'index
    model.add(Dense(output_dim))
    
    weight_idx = 0
    input_dim = X_train.shape[1]
    for layer in model.layers:
        if isinstance(layer, Dense):
            output_dim_layer = layer.units
            if weight_idx + input_dim * output_dim_layer <= len(weights):
                layer_weights = np.array(weights[weight_idx:weight_idx + input_dim * output_dim_layer]).reshape(input_dim, output_dim_layer)
                weight_idx += input_dim * output_dim_layer
                layer.set_weights([layer_weights, np.zeros(output_dim_layer)])
                input_dim = output_dim_layer
    
    model.compile(loss='mean_squared_error', optimizer=Adam(0.01))
    history = model.fit(X_train, y_train, epochs=20, verbose=0, validation_data=(X_test, y_test))
    loss = model.evaluate(X_test, y_test, verbose=0)
    return (loss,), loss, history.history['loss'], history.history['val_loss']

# Mutation des individus
def mutate_individual(individual):
    structure, weights = decode_individual(individual)
    if np.random.random() < 0.5:
        for i in range(len(structure)):
            if np.random.random() < 0.2:
                structure[i] = np.random.randint(5, 12)
    if np.random.random() < 0.5:
        weights = tools.mutPolynomialBounded(weights, low=-1, up=1, eta=0.1, indpb=0.2)[0]
    individual[:] = structure + weights
    return individual,

# Exécution de l'algorithme génétique
def run_genetic_algorithm(filepath, n_population=4, n_generations=10):
    if isinstance(filepath, str):
        X, y = load_data(filepath)
    else:
        X = filepath
        y = X[X.columns[-1]]
        X.drop(X.columns[-1], axis=1, inplace=True)
        for column in X.columns:
            if X[column].dtype == 'object' or X[column].dtype == 'str':
                X[column] = tokenizator(X[column])
    X_train, X_test, y_train, y_test, input_dim, output_dim = preprocess_data(X, y)
    
    toolbox = create_individual_toolbox(input_dim, output_dim)
    toolbox.register("evaluate", eval_nn, input_dim=input_dim, output_dim=output_dim, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test)
    toolbox.register("mate", tools.cxTwoPoint)
    toolbox.register("mutate", mutate_individual)
    toolbox.register("select", tools.selTournament, tournsize=3)
    
    population = toolbox.population(n=n_population)
    
    # Évaluer tous les individus de la population initiale
    fitnesses = list(map(toolbox.evaluate, population))
    for ind, fit in zip(population, fitnesses):
        ind.fitness.values = fit[0]  # Prendre uniquement le premier élément
        ind.raw_loss = fit[1]
        ind.train_loss = fit[2]
        ind.val_loss = fit[3]
    
    hall_of_fame = tools.HallOfFame(1)
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("min", lambda values: np.min([val[0] for val in values]))
    logbook = tools.Logbook()
    logbook.header = ["gen", "evals"] + stats.fields
    
    best_loss_per_gen = []
    train_losses = []
    val_losses = []
    
    for gen in range(n_generations):
        # Garder le meilleur individu de la génération précédente
        if hall_of_fame:
            population.append(toolbox.clone(hall_of_fame[0]))
        
        # Sélectionner les meilleurs individus
        best_individuals = tools.selBest(population, k=int(0.1 * len(population)))
        # Sélectionner quelques mauvais individus
        worst_individuals = tools.selWorst(population, k=int(0.1 * len(population)))
        # Sélectionner les individus restants
        remaining_individuals = tools.selTournament(population, k=len(population) - len(best_individuals) - len(worst_individuals), tournsize=3)
        
        offspring = best_individuals + worst_individuals + remaining_individuals
        offspring = list(map(toolbox.clone, offspring))
        
        for child1, child2 in zip(offspring[::2], offspring[1::2]):
            if np.random.random() < 0.5:
                toolbox.mate(child1, child2)
                del child1.fitness.values
                del child2.fitness.values
        for mutant in offspring:
            if np.random.random() < 0.2:
                toolbox.mutate(mutant)
                del mutant.fitness.values
        
        # Évaluer les individus invalides
        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = map(toolbox.evaluate, invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit[0]  # Prendre uniquement le premier élément
            ind.raw_loss = fit[1]
            ind.train_loss = fit[2]
            ind.val_loss = fit[3]
        
        # Ajouter quelques nouveaux individus aléatoires pour la diversité
        new_random_individuals = [toolbox.individual(num_layers=np.random.randint(1, 5)) for _ in range(int(0.1 * len(population)))]
        population[:] = offspring + new_random_individuals
        
        # S'assurer qu'il y a des individus avec 1 et 2 couches
        has_one_layer = any(len(decode_individual(ind)[0]) == 1 for ind in population)
        has_two_layers = any(len(decode_individual(ind)[0]) == 2 for ind in population)
        if not has_one_layer:
            population.append(toolbox.individual(num_layers=1))
        if not has_two_layers:
            population.append(toolbox.individual(num_layers=2))
        
        # Trier la population du meilleur au moins bon
        valid_population = [ind for ind in population if ind.fitness.valid]
        population.sort(key=lambda ind: ind.fitness.values[0] if ind.fitness.valid else float('inf'))
        
        # S'assurer que tous les individus ont des valeurs de fitness valides
        invalid_ind = [ind for ind in population if not ind.fitness.valid]
        if invalid_ind:
            fitnesses = map(toolbox.evaluate, invalid_ind)
            for ind, fit in zip(invalid_ind, fitnesses):
                ind.fitness.values = fit[0]  # Prendre uniquement le premier élément
                ind.raw_loss = fit[1]
                ind.train_loss = fit[2]
                ind.val_loss = fit[3]
        
        # Enregistrer les statistiques
        record = stats.compile(population)
        logbook.record(gen=gen, evals=len(invalid_ind), **record)
        print(logbook.stream)
        
        hall_of_fame.update(population)
        
        best_ind = tools.selBest(population, 1)[0]
        best_loss_per_gen.append(best_ind.fitness.values[0])
    
        train_losses.append(best_ind.train_loss)
        val_losses.append(best_ind.val_loss)

        # Afficher le meilleur individu de chaque génération
        print(f"Generation {gen}: Best Individual = {best_ind}")

    # Affichage du meilleur individu
    best_individual = hall_of_fame[0]
    display_best_individual(best_individual, X_train.shape[1])

    best_structure, best_weights = decode_individual(best_individual)
    print('Best Individual Structure:', best_structure)
    print('Fitness (Loss, Num Layers, Total Neurons):', best_individual.fitness.values)
    print('Raw Loss:', best_individual.raw_loss)
    
    # Calcul de la perte du meilleur modèle au cours des époques
    best_model_layers = best_structure
    best_model = Sequential()
    best_model.add(Input(shape=(X_train.shape[1],)))
    for neurons in best_model_layers:
        best_model.add(Dense(neurons, activation='relu'))
    best_model.add(Dense(output_dim))
    best_model.compile(loss='mean_squared_error', optimizer=Adam(0.01))
    history = best_model.fit(X_train, y_train, epochs=20, verbose=0, validation_data=(X_test, y_test))
    loss = best_model.evaluate(X_test, y_test, verbose=0)
    print('Final Loss of the Best Model:', loss)
    
    # Afficher les résultats
    plt.figure(figsize=(12, 5))
    
    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Loss over Epochs for Best Model')
    plt.legend()
    
    plt.tight_layout()
    plt.show()

    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(best_loss_per_gen, label='Best Loss per Generation')
    plt.xlabel('Generation')
    plt.ylabel('Loss')
    plt.title('Best Loss per Generation')
    plt.legend()

    plt.tight_layout()
    plt.show()

# Fonction pour afficher la structure et les poids du meilleur individu
def display_best_individual(best_individual, input_dim):
    structure, weights = decode_individual(best_individual)
    
    print("Best Individual Structure:")
    print(structure)
    
    print("\nBest Individual Weights:")
    weight_idx = 0
    input_dim_layer = input_dim
    for i, neurons in enumerate(structure):
        output_dim_layer = neurons
        layer_weights = np.array(weights[weight_idx:weight_idx + input_dim_layer * output_dim_layer]).reshape(input_dim_layer, output_dim_layer)
        print(f"Layer {i + 1} - Weights:\n{layer_weights}")
        weight_idx += input_dim_layer * output_dim_layer
        input_dim_layer = output_dim_layer

    print("\nBest Individual Biases:")
    for i, neurons in enumerate(structure):
        biases = weights[weight_idx:weight_idx + neurons]
        print(f"Layer {i + 1} - Biases:\n{biases}")
        weight_idx += neurons

# Exemple d'utilisation
filepath = reg_gen(lignes = 10000, colonnes = 10)  # Remplacez par le chemin de votre fichier CSV
run_genetic_algorithm(filepath)
