# Présentation des données

In [None]:
# Exo 1 - Préparation des données 

# importation des données
import pandas as pd 
import numpy as np 
# import du fichier
data = pd.read_csv("synthetic.csv")

# Visualisation des données
print(data.head())


In [None]:
# 1 - Nombre de colonnes (attributs) dans le DataFrame
num_attributes = data.shape[1]

# Afficher le nombre d'attributs
print(f"Le nombre d'attributs dans le fichier est : {num_attributes}")


In [None]:
# Type de données et valeurs manquantes
print(data.info())

In [None]:
# Avoir le nombre d'attributs dans le modèle
print(data.columns)
# 14 attributs dans le modèle

In [None]:
# Obtenir les classes uniques dans la colonne 'Class'
classes_uniques = data['Class'].unique()

# Nombre de classes différentes
num_classes = len(classes_uniques)

# Afficher le nombre de classes différentes
print(f"Le nombre de classes différentes dans les données est : {num_classes}")

In [None]:
# combien d'instances compte chaque classe?
nbr_instances = data['Class'].value_counts()
print(nbr_instances)

# Sortie 
# Class
# 1    908
# 0    674
# 2    472
# 3    244
# Name: count, dtype: int64

# Les données sont-elles linéairement séparables ?
Non, si on observe le schéma 1 on voit que les données ne le sont pas.
De plus si l'on choisit de les ranger par classe , on peut s'apercevoir que 


In [None]:
import matplotlib.pyplot as plt # import biblio matplot
plt.figure(figsize=(10, 6))
plt.scatter(data['Attr_A'], data['Attr_B'], c=data['Class'], alpha=0.5, cmap='viridis')
plt.xlabel('Attribut 1')
plt.ylabel('Attribut 2')
plt.title('Scatter Plot des attributs par classe')
plt.colorbar(label='Classe')
plt.show()

# On peut voir clairement que ce n'est pas divisible linéairement à l'état brut
# je pense que use image est vraiment mieux


## 5 et 6 (voir compte-rendu.md) 

# 2 Mise en oeuvre des modèles

In [None]:
# Choisir un attribut à analyser, par exemple 'Attr_A'
attribute = 'Attr_A'


# Calculer les quartiles pour l'attribut choisi
quartiles = data[attribute].quantile([0.25, 0.5, 0.75])

# Sort the attribute values and print them
sorted_attribute = data[attribute].sort_values()
print(sorted_attribute)
print(quartiles)
# Afficher les quartiles
print(f"Quartile 1 (Q1) de l'attribut '{attribute}': {quartiles[0.25]}")
print(f"Médiane (Q2) de l'attribut '{attribute}': {quartiles[0.5]}")
print(f"Quartile 3 (Q3) de l'attribut '{attribute}': {quartiles[0.75]}")


# 6 : 

# Partie 2 : Mise en oeuvre des modèles.

# Arbre de décision 


In [None]:
# Arbre de décision

# Calcul de l'entropie

"""
L'entropie est une mesure de l'incertitude associée à une variable aléatoire.
"""

def entropie(dataframe , attribut_cible):  
    # Calcul de la probabilité de chaque classe
    compte_classe = dataframe[attribut_cible].value_counts()
    #print(compte_classe)
    proba = compte_classe / compte_classe.sum()
    #print(proba) 
    # Calcul de l'entropie
    entropie = - (proba * np.log2(proba+ np.finfo(float).eps)).sum() # éviter log2(0)
    return entropie

# Test de la fonction
print(entropie(data, 'Attr_A'))


11.166163082646115
11.166163082645376

11.166163082646115
11.166163082645376

11.166163082646115
11.166163082645376

1.8608867211835993
1.860886721183598

In [None]:
import pandas as pd
import numpy as np

# Fonction pour calculer tous les quartiles d'un attribut donné
def calculate_quartiles(data, attribute):
    return data[attribute].quantile([0.25, 0.5, 0.75])

# Test de la fonction sur le DataFrame chargé

print(calculate_quartiles(data, 'Attr_A'))


In [None]:
data

In [None]:
data.sort_values(by="Attr_C")

In [None]:
# data.head()
sorted = data.sort_values(by="Attr_A")
print(len(sorted))

In [None]:
def gain_information(dataframe, attribut_cible, attribut_test):
    """
    Calculate the information gain from splitting the data based on a test attribute.

    Parameters:
    dataframe (pd.DataFrame): The DataFrame containing the data to partition.
    attribut_cible (str): The target attribute we want to predict.
    attribut_test (str): The attribute whose gain we want to calculate.

    Returns:
    tuple: A tuple containing:
        - attribut_test (str): The test attribute.
        - max_gain (float): The maximum information gain obtained.
        - best_split_value (float): The split value that provides the best gain.
        - best_partitions (tuple): A tuple containing two DataFrames representing the lower and upper partitions
          resulting from the best split.
    """
    
    # Initial entropy of the target attribute
    entropie_initiale = entropie(dataframe, attribut_cible)

    # The gain, split_value and partitions initialized
    max_gain = 0
    best_split_value = None
    best_partitions = None

    # Check for no unique values in the attribute being tested
    if len(dataframe[attribut_test].unique()) <= 1:
        return None

    # Sorting data by the attribute to test
    sorted_data = dataframe.sort_values(by=attribut_test)

    # Unique values of the attribute to test, considering quartiles to reduce complexity
    quartiles = calculate_quartiles(sorted_data, attribut_test).to_list()

    # Adding the min and max values to cover the entire range of the attribute
    quartiles = [sorted_data[attribut_test].min()] + quartiles + \
        [sorted_data[attribut_test].max()]
    # Voir si je n'enlève pas min et max valeur

    # Iterating through the sorted unique values to find the best split
    for split_value in quartiles:
        # Partitioning the data based on the split value
        lower_partition = sorted_data[sorted_data[attribut_test] < split_value]
        upper_partition = sorted_data[sorted_data[attribut_test]
                                      >= split_value]

        # Calculating the weighted entropy for the partitions
        # Row counts.
        total_instances = len(sorted_data)
        lower_weight = len(lower_partition) / total_instances
        upper_weight = len(upper_partition) / total_instances

        # Computing the weighted_entropy
        weighted_entropy = (lower_weight * entropie(lower_partition, attribut_cible)) + \
                           (upper_weight * entropie(upper_partition, attribut_cible))

        # Information gain for the current split
        current_gain = entropie_initiale - weighted_entropy

        # If the current gain is greater than the max_gain, update max_gain and best_split_value
        if current_gain > max_gain:
            max_gain = current_gain
            best_split_value = split_value
            best_partitions = (lower_partition, upper_partition)

    # Returning the attribute, gain, split_value, and partitions as a tuple
    return attribut_test, max_gain, best_split_value, best_partitions


# Testing the function with an example attribute
# Let's use 'Attr_A' as the attribute to test and 'Class' as the target
test_gain_info = gain_information(data, 'Class', 'Attr_H')
test_gain_info

In [None]:
def find_best_gain(dataframe, attribut_cible):
    """
    Calculate the best information gain and corresponding split in a DataFrame.
    
    Parameters:
    dataframe (pd.DataFrame): The input data as a pandas DataFrame.
    attribut_cible (str): The target attribute that we want to predict (e.g. 'Class').
    
    Returns:
    tuple: A tuple containing the following elements:
        - best_attribute (str): The attribute that yields the best information gain.
        - best_gain (float): The highest information gain observed.
        - best_split_value (float): The split value that produces the best gain.
        - best_partitions (tuple): A tuple containing the two partitions resulting from the best split.
    """
    # Initialize variables to track the best gain and the associated attribute
    best_gain = 0
    best_attribute = None
    best_split_value = None
    best_partitions = None

    # Iterate over all the attributes in the DataFrame, except the target attribute
    for test_attribute in dataframe.columns:
        if test_attribute == attribut_cible:
            continue  # Skip the target attribute

        # Calculate the information gain for the current attribute
        result = gain_information(dataframe, attribut_cible, test_attribute)

        # If the result is None, skip to the next attribute
        if result is None:
            continue

        # Unpack the result from gain_information
        _, current_gain, split_value, partitions = result

        # Update the variables if the current gain is higher than the best gain
        if current_gain > best_gain:
            best_gain = current_gain
            best_attribute = test_attribute
            best_split_value = split_value
            best_partitions = partitions

    # Return the best attribute, gain, split value, and partitions
    return best_attribute, best_gain, best_split_value, best_partitions


find_best_gain(data, 'Class')

In [None]:
from sklearn.model_selection import train_test_split

def split_data(dataframe, attribut_cible, test_size=0.2, random_state=42):
    """
    Splits the dataframe into training and testing sets.

    Parameters:
    dataframe (pd.DataFrame): The DataFrame containing the data to split.
    attribut_cible (str): The target attribute we want to predict. e.g ("Class")
    test_size (float): The proportion of the data to include in the test split. Default is 0.2.
    random_state (int): Controls the shuffling applied to the data before applying the split. Default is 42.

    Returns:
    tuple: A tuple containing:
        - X_train (pd.DataFrame): The training features.
        - X_test (pd.DataFrame): The testing features.
        - y_train (pd.Series): The training target attribute.
        - y_test (pd.Series): The testing target attribute.
    """
    # Separate features and target attribute
    X = dataframe.drop(columns=[attribut_cible])
    y = dataframe[attribut_cible]
    
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
    
    return X_train, X_test, y_train, y_test

X_train, X_test, y_train, y_test =  split_data(data, "Class")

In [None]:
class DecisionNode:
    def __init__(
            self,
            is_leaf,
            attribute=None,
            split_value=None,
            left=None,
            right=None,
            prediction=None):
        """
        Initialize a decision tree node.

        Parameters:
        is_leaf (bool): Whether the node is a leaf node.
        attribute (str, optional): The attribute to split on if the node is not a leaf.
        split_value (float, optional): The split value for the attribute if the node is not a leaf.
        left (DecisionNode, optional): The left child node.
        right (DecisionNode, optional): The right child node.
        value (object, optional): The target value if the node is a leaf.
        """
        self.is_leaf = is_leaf
        self.attribute = attribute
        self.split_value = split_value
        self.left = left
        self.right = right
        self.prediction = prediction

    def _is_leaf(self):
        return self.is_leaf

In [None]:
class DecisionTree:
    """
    Represents a decision tree.

    Attributes:
    max_depth (int): The maximum depth of the tree.
    tree (DecisionNode): The root node of the tree.
    """

    def __init__(self, max_depth=8):
        """
        Initialize the decision tree.

        Parameters:
        max_depth (int, optional): The maximum depth of the tree. Default is 8.
        """
        self.max_depth = max_depth
        self.tree = None

    def _build_tree(self, data, target_attribute, depth=0):
        """
        Recursively build the decision tree.

        Parameters:
        data (pd.DataFrame): The input data as a pandas DataFrame.
        target_attribute (str): The target attribute that we want to predict.
        depth (int): The current depth of the tree.

        Returns:
        DecisionNode: The root node of the decision tree.
        """
        # Check stopping conditions: maximum depth or pure leaf
        if depth >= self.max_depth:
            # Return a leaf node with the most frequent target value
            prediction = data[target_attribute].mode()[0]
            return DecisionNode(is_leaf=True, prediction=prediction)

        # Check if the data is pure (all target values are the same)
        if data[target_attribute].nunique() == 1:
            # Return a leaf node with the unique target value
            prediction = data[target_attribute].iloc[0]
            return DecisionNode(is_leaf=True, prediction=prediction)

        # Find the best attribute, gain, split value, and partitions using find_best_gain
        best_attribute, best_gain, best_split_value, best_partitions = find_best_gain(
            data, target_attribute)

        # Check if no gain is found, return the most frequent target value as a leaf node
        if best_attribute is None or best_gain <= 0:
            prediction = data[target_attribute].mode()[0]
            return DecisionNode(is_leaf=True, prediction=prediction)

        # Create the decision node with the best attribute and split value
        left_data, right_data = best_partitions
        left_child = self._build_tree(left_data, target_attribute, depth + 1)
        right_child = self._build_tree(right_data, target_attribute, depth + 1)

        # Return the decision node with the children
        return DecisionNode(
            is_leaf=False,
            attribute=best_attribute,
            split_value=best_split_value,
            left=left_child,
            right=right_child
        )

    def fit(self, data, target_attribute):
        """
        Build the decision tree based on the provided data and target attribute. e.g ("Class")
        Parameters:
        data (pd.DataFrame): The input data as a pandas DataFrame.
        target_attribute (str): The target attribute that we want to predict.
        """
        self.tree = self._build_tree(data, target_attribute)

    def train(self, dataframe, target_attribute):
        """
        Train the decision tree using the given data.

        Parameters:
        dataframe (pd.DataFrame): The DataFrame containing the training data.
        target_attribute (str): The target attribute we want to predict.
        """
        self.fit(dataframe, target_attribute)

    def predict(self, data):
        """
        Predict target attribute values for the given data using the decision tree.

        Parameters:
        data (pd.DataFrame): The data for which predictions are to be made.

        Returns:
        np.array: The predicted values of the target attribute.
        """
        predictions = data.apply(self._predict_single, axis=1)
        return predictions

    def _predict_single(self, row):
        """
        Predict the target attribute value for a single data point.

        Parameters:
        row (pd.Series): The data point as a pandas Series.

        Returns:
        object: The predicted value of the target attribute.
        """
        node = self.tree

        # Traverse the tree until a leaf node is reached
        while not node.is_leaf:
            attribute = node.attribute
            split_value = node.split_value

            if row[attribute] < split_value:
                node = node.left
            else:
                node = node.right

        # Return the value of the leaf node
        return node.prediction

    def print_tree(self, node=None, indent=""):
        """
        Print the decision tree in a human-readable way.

        Parameters:
        node (DecisionNode, optional): The current node to print. If not specified, starts with the root node.
        indent (str): Indentation for nested levels in the tree.
        """
        # If no node is specified, start with the root node
        if node is None:
            node = self.tree

        # Check if the current node is a leaf node
        if node._is_leaf():
            print(f"{indent}Leaf: Predict {node.prediction}")
        else:
            # Print the split condition and value at the current node
            print(f"{indent}Node: {node.attribute} < {node.split_value}")

            # Recursively print the left and right children
            print(f"{indent}Left:")
            self.print_tree(node.left, indent + "    ")

            print(f"{indent}Right:")
            self.print_tree(node.right, indent + "    ")

## Class qui permet d'évaluer notre model.

In [None]:
class EvaluationMetrics:
    """
    Classe pour calculer les métriques d'évaluation.
    """
    @staticmethod
    def accuracy_score(y_true, y_pred):
        """
        Calcule l'exactitude entre les valeurs réelles et prédites.
        
        Arguments:
        y_true -- Les valeurs réelles (cibles).
        y_pred -- Les valeurs prédites par le modèle.
        
        Retourne:
        float -- L'exactitude.
        """
        correct_predictions = sum(y_true == y_pred)
        total_predictions = len(y_true)
        return correct_predictions / total_predictions

    @staticmethod
    def precision_score(y_true, y_pred, positive_label):
        """
        Calcule la précision pour une classe positive spécifiée.
        
        Arguments:
        y_true -- Les valeurs réelles (cibles).
        y_pred -- Les valeurs prédites par le modèle.
        positive_label -- La classe positive pour laquelle calculer la précision.
        
        Retourne:
        float -- La précision.
        """
        true_positives = sum((y_true == positive_label) & (y_pred == positive_label))
        predicted_positives = sum(y_pred == positive_label)
        if predicted_positives == 0:
            return 0.0
        return true_positives / predicted_positives

    @staticmethod
    def recall_score(y_true, y_pred, positive_label):
        """
        Calcule le rappel pour une classe positive spécifiée.
        
        Arguments:
        y_true -- Les valeurs réelles (cibles).
        y_pred -- Les valeurs prédites par le modèle.
        positive_label -- La classe positive pour laquelle calculer le rappel.
        
        Retourne:
        float -- Le rappel.
        """
        true_positives = sum((y_true == positive_label) & (y_pred == positive_label))
        actual_positives = sum(y_true == positive_label)
        if actual_positives == 0:
            return 0.0
        return true_positives / actual_positives

    @staticmethod
    def f1_score(y_true, y_pred, positive_label):
        """
        Calcule le score F1 pour une classe positive spécifiée.
        
        Arguments:
        y_true -- Les valeurs réelles (cibles).
        y_pred -- Les valeurs prédites par le modèle.
        positive_label -- La classe positive pour laquelle calculer le score F1.
        
        Retourne:
        float -- Le score F1.
        """
        precision = EvaluationMetrics.precision_score(y_true, y_pred, positive_label)
        recall = EvaluationMetrics.recall_score(y_true, y_pred, positive_label)
        
        if precision == 0 and recall == 0:
            return 0.0
        
        return 2 * (precision * recall) / (precision + recall)


In [None]:
def train_and_evaluate_decision_trees(X_train, y_train, X_test, y_test, depths):
    """
    Train decision tree models with different maximum depths and evaluate their performance.

    Args:
        X_train: Training features (data).
        y_train: Training target (labels).
        X_test: Testing features (data).
        y_test: Testing target (labels).
        depths (list): List of maximum depths to test for decision trees.

    Returns:
        List of tuples containing the accuracy scores and corresponding models.
        The list is sorted in descending order based on accuracy scores.
    """
    # Combine features and labels for training and testing sets
    train_data = pd.concat([X_train, y_train], axis=1)
    test_data = pd.concat([X_test, y_test], axis=1)

    # List to store the models and their accuracy scores
    models_and_scores = []

    # Train and evaluate models with different maximum depths
    for max_depth in depths:
        # Create a decision tree classifier with the specified maximum depth
        model = DecisionTree(max_depth=max_depth)
        
        # Train the model on the combined training set
        model.train(train_data, y_train.name)  # Pass the name of the target attribute
        
        # Predict on the combined test set
        y_pred = model.predict(test_data)
        
        # Calculate the accuracy of the model using the provided y_test
        accuracy = EvaluationMetrics.accuracy_score(y_test, y_pred)
        
        # Append the accuracy score and model to the list
        models_and_scores.append((accuracy, model))
        
        # Print the results for the current model
        print(f"Max Depth: {max_depth}, Accuracy: {accuracy:.4f}")
    
    # Sort the list of models and scores in descending order based on accuracy
    models_and_scores.sort(key=lambda x: x[0], reverse=True)
    
    return models_and_scores

In [None]:
target_attribute  = "Class"
X_train, X_test, y_train, y_test = split_data(data, target_attribute)
models_and_scores = train_and_evaluate_decision_trees(X_train, y_train, X_test, y_test, [3, 4, 5, 6, 7, 8])
print(models_and_scores)

## Two best models

In [None]:
best_model = models_and_scores[0]
second_best_model = models_and_scores[1]

print("Best model", best_model)
print("Second Best model", second_best_model)

## Test of DecisionNode and DecisionTree class

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Define the target attribute
target_attribute = "Class"

# Split the data into training and testing sets using the split_data function
X_train, X_test, y_train, y_test = split_data(data, target_attribute)

# Instantiate the DecisionTree class
decision_tree = DecisionTree(max_depth=8)
evaluation_metric = EvaluationMetrics()

# Train the decision tree using the training data
decision_tree.train(pd.concat([X_train, y_train], axis=1), target_attribute)

# Predict the target attribute for the testing data
predictions = decision_tree.predict(pd.concat([X_test, y_test], axis=1))

# Evaluate the model using accuracy, precision, recall, and F1-score
accuracy = accuracy_score(y_test, predictions)
eval_accuracy = evaluation_metric.accuracy_score(y_test, predictions)

precision = precision_score(y_test, predictions, average='weighted')
eval_precision = evaluation_metric.precision_score(y_test, predictions, positive_label=1)

recall = recall_score(y_test, predictions, average='weighted')
eval_recall = evaluation_metric.recall_score(y_test, predictions, positive_label=1)

f1 = f1_score(y_test, predictions, average='weighted')
eval_f1 = evaluation_metric.f1_score(y_test, predictions, positive_label=1)

# Print the evaluation metrics
print(f"Accuracy: {accuracy:.2f}")
print(f"Eval Accuracy: {eval_accuracy:.2f}")

print(f"Precision: {precision:.2f}")
print(f"Eval Precision: {eval_precision:.2f}")

print(f"Recall: {recall:.2f}")
print(f"Eval Recall: {eval_recall:.2f}")

print(f"F1-score: {f1:.2f}")
print(f"Eval F1-score: {eval_f1:.2f}")

decision_tree.print_tree()


# 2.2 Réseaux de neurones artificiels

### 1 - Division des données.

In [29]:
from sklearn.model_selection import train_test_split


def neural_split_data(data, target_attribute):
    
    # # Séparez les données et les labels
    # X_train, y_train = train_data.drop(
    #     columns=[target_attribute]), train_data[target_attribute]
    # X_val, y_val = val_data.drop(
    #     columns=[target_attribute]), val_data[target_attribute]
    # X_test, y_test = test_data.drop(
    #     columns=[target_attribute]), test_data[target_attribute]
    
    # # Divisez les données en jeu d'entraînement et de test (85% et 15%)
    # train_data, test_data = train_test_split(
    #     data, test_size=0.15, random_state=42)

    # # Divisez le jeu d'entraînement en sous-ensemble d'entraînement et de validation (85% et 15%)
    # train_data, val_data = train_test_split(
    #     train_data, test_size=0.15/0.85, random_state=42)

    # Séparez les données et les labels afin de pouvoir séparer en jeu d'entrainement
    # de test et de validation
    X_train, X_test, y_train, y_test = train_test_split(data.drop(columns=[target_attribute]),
                                                        data[target_attribute], test_size=0.15, random_state=42)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15/0.85,
                                                    random_state=42)
    return X_train, y_train, X_val, y_val, X_test, y_test

print (X_train)

         Attr_A     Attr_B       Attr_C      Attr_D      Attr_E       Attr_F  \
1131  11.845628  11.161878  1313.712979  111.581496   95.456558   972.046868   
351    8.499002  10.638715   776.313616  101.128788   98.194311  1101.968011   
944    7.378612  10.231545   864.338533   97.541152  116.114975  1177.874002   
1958   9.887848   9.274918   980.163706  105.475302  107.331859  1017.637187   
210   11.062933  11.315338   699.887951   90.718138   96.913945  1083.308853   
...         ...        ...          ...         ...         ...          ...   
1638  10.500580   8.488398  1030.144905  102.816959  135.236907   829.134392   
1095  10.075134   9.747951   749.562376   95.946030   87.677656  1180.735377   
1130  10.228703   9.811083   964.832208   94.333244  102.811652   973.889034   
1294   9.614717  11.321535   943.832125   92.273154  102.943806  1094.288656   
860   12.609813   9.373724  1055.615571   95.149818  106.666943   988.819215   

         Attr_G     Attr_H     Attr_I  

### 2 - Création de la classe "NeuralNetwork"

In [None]:
import numpy as np


class NeuralNetwork:
    def __init__(self, architecture, activation='relu'):
        """
        Initialize the neural network with a specific architecture and activation function.

        Parameters:
        - architecture (list of int): List of integers representing the number of neurons in each layer.
        - activation (str): The activation function to use ('relu' or 'softmax').
        """
        self.architecture = architecture
        self.activation = activation

        # Initialize weights and biases
        self.weights = []
        self.biases = []
        self.initialize_parameters()

    def initialize_parameters(self):
        """
        Initialize weights and biases for the network layers.
        """
        np.random.seed(42)
        layer_sizes = self.architecture

        for i in range(1, len(layer_sizes)):
            weight_matrix = np.random.randn(
                layer_sizes[i], layer_sizes[i - 1]) * 0.01
            bias_vector = np.zeros((layer_sizes[i], 1))

            self.weights.append(weight_matrix)
            self.biases.append(bias_vector)
            # print(f"Dimensions de la matrice de poids {i}: {weight_matrix.shape}")
            # print(bias_vector.shape)

    def activation_function(self, x, layer_index=None):
        """
        Apply the specified activation function.

        Parameters:
        - x (np.array): Input array.

        Returns:
        - np.array: Output array with activation applied.
        """
        if layer_index == len(self.architecture) - 2:
            # Use softmax for the last hidden layer for classification
            exp_x = np.exp(x - np.max(x, axis=0, keepdims=True))
            return exp_x / np.sum(exp_x, axis=0, keepdims=True)
        elif self.activation == 'tanh':
            return np.tanh(x)
        elif self.activation == 'relu':
            return np.maximum(0, x)
        else:
            raise ValueError(
                "Invalid activation function. Use 'tanh' or 'relu'.")

    def activation_derivative(self, x):
        """
        Compute the derivative of the specified activation function.

        Parameters:
        - x (np.array): Input array.

        Returns:
        - np.array: Derivative of the activation function.
        """
        if self.activation == 'tanh':
            return 1 - np.tanh(x)**2
        elif self.activation == 'relu':
            return (x > 0).astype(float)
        else:
            raise ValueError(
                "Invalid activation function. Use 'tanh' or 'relu'.")

    def forward_propagation(self, X):
        """
        Perform forward propagation.

        Parameters:
        - X (np.array): Input data.

        Returns:
        - List of activations and z-values at each layer.
        """
        activations = [X.T]
        # print(activations)
        # print(activations)
        zs = []

        for i in range(len(self.architecture) - 1):
            # print(f"{self.weights[i].shape} * {activations[-1].shape} + {self.biases[i].shape}")
            z = np.dot(self.weights[i], activations[-1]) + self.biases[i]
            zs.append(z)
            activation = self.activation_function(z, i)
            activations.append(activation)

            # print("Forme de z : ", z.shape)
        return activations, zs

    def backward_propagation(self, activations, zs, y):
        """
        Perform backward propagation and calculate gradients.

        Parameters:
        - activations (list of np.array): List of activations at each layer.
        - zs (list of np.array): List of z-values at each layer.
        - y (np.array): True labels.

        Returns:
        - Tuple containing gradients of weights and biases.
        """
        m = y.shape[1]
        dw = []
        db = []

        # Compute the gradient of the last layer
        # Difference between predicted and actual labels
        dz = activations[-1] - y

        # Backpropagate the gradients
        for i in range(len(self.architecture) - 1, 0, -1):
            dw_i = np.dot(dz, activations[i - 1].T) / m
            db_i = np.sum(dz, axis=1, keepdims=True) / m

            dw.append(dw_i)
            db.append(db_i)

            if i > 1:
                dz = np.dot(self.weights[i - 1].T, dz) * \
                    self.activation_derivative(zs[i - 2])

        # Reverse gradients to match the order of weights and biases
        return dw[::-1], db[::-1]

    def update_parameters(self, dw, db, learning_rate):
        """
        Update the network parameters (weights and biases) using the given gradients.

        Parameters:
        - dw (list of np.array): Gradients of weights.
        - db (list of np.array): Gradients of biases.
        - learning_rate (float): Learning rate for the update.
        """
        for i in range(len(self.weights)):
            self.weights[i] -= learning_rate * dw[i]
            self.biases[i] -= learning_rate * db[i]

    def _shuffle_data(self, X_train, y_train):
        N, _ = X_train.shape
        shuffled_idx = np.random.permutation(N)
        return X_train.iloc[shuffled_idx], y_train.iloc[shuffled_idx]

    def train(self, X_train, y_train, X_val, y_val, learning_rate=0.01, epochs=100, patience=4, batch_size=4):
        """
        Train the network using the given data.

        Parameters:
        - X_train (np.array): Training data.
        - y_train (np.array): Training labels.
        - X_val (np.array): Validation data.
        - y_val (np.array): Validation labels.
        - learning rate (float): Learning rate for training.
        - epochs (int): Number of epochs to train for.
        - patience (int): Patience for early stopping.
        - batch_size (int): Size of mini-batch for training.
        """
        best_val_loss = float('inf')
        patience_counter = 0
        N, D = X_train.shape
        # print(f"X_train.shape = {X_train.shape}")

        # print(X_train.shape)
        # Training loop
        for epoch in range(epochs):
            # Shuffle the training data
            X_train_shuffled, y_train_shuffled = self._shuffle_data(
                X_train, y_train)

            # Mini-batch training
            num_batches = N // batch_size
            print(f"num_batches {num_batches}")
            for batch_idx in range(num_batches):
                # Récupérez le mini-lot
                start = batch_idx * batch_size
                end = start + batch_size
                
                # Utilisez X_train_shuffled[start:end] pour obtenir le mini-lot
                X_batch = X_train_shuffled[start:end]
               
                # Transposez X_batch pour avoir les dimensions (14, batch_size)
                X_batch = np.array(X_batch)

                # Utilisez y_train_shuffled[start:end] pour obtenir les étiquettes du mini-lot
                y_batch = y_train_shuffled[start:end]
                y_batch = np.array(y_batch)
                y_batch = np.eye(4)[y_batch]
                # y_batch = y_batch.reshape((4, 1))

                # print(f"X_bath = {X_batch.shape}")
                # print(f"Y_batch = {y_batch.shape}")

            
                # Effectuez la propagation avant
                activations, zs = self.forward_propagation(X_batch)

                # Effectuez la propagation arrière
                dw, db = self.backward_propagation(activations, zs, y_batch)

                # Mettez à jour les paramètres
                self.update_parameters(dw, db, learning_rate)

            # Calculate loss on validation data
            val_loss = self.calculate_loss(X_val, y_val)

            # Check for early stopping
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                patience_counter = 0
            else:
                patience_counter += 1

            if patience_counter >= patience:
                print(f"Early stopping at epoch {epoch + 1}")
                break

            # Display progress
            print(
                f"Epoch {epoch + 1}/{epochs}: Validation loss = {val_loss:.4f}")

    def calculate_loss(self, X, y):
        """
        Calculate loss using categorical cross-entropy.

        Parameters:
        - X (np.array): Input data.
        - y (np.array): True labels.

        Returns:
        - float: Calculated loss.
        """
        # Forward propagation
        activations, _ = self.forward_propagation(X)
        activations_finales = activations[-1].T  # Transposer activations[-1]
        y = np.eye(4)[y]

        # Affiche les formes des matrices pour déboguer
        # print("Forme de y :", y.shape)
        # print("Forme de activations[-1] :", activations[-1].shape)
        # return


        # Calculate loss using categorical cross-entropy
        m = y.shape[0]  # Number of entries.
        epsilon = 1e-9  # Small constant to avoid division by zero
        loss = -np.sum(y * np.log(activations_finales + epsilon)) / m
        return loss

    def predict(self, X):
        """
        Predict the labels for the given input data.

        Parameters:
        - X (np.array): Input data.

        Returns:
        - np.array: Predicted labels.
        """
        activations, _ = self.forward_propagation(X)
        # Use np.argmax to return the class index with the highest probability
        return np.argmax(activations[-1], axis=0)

In [None]:
# Spécifiez le nom de la colonne cible
target_attribute = "Class"

# Divisez les données en ensembles d'entraînement, de validation et de test
X_train, y_train, X_val, y_val, X_test, y_test = neural_split_data(
    data, target_attribute)

### 3. Entraînement

In [None]:
def train_models(X_train, y_train, X_val, y_val, architectures, activations, learning_rate=0.01, epochs=100, patience=4, batch_size=4):
    """
    Train multiple neural network models with specified architectures and activations.

    Parameters:
    - X_train (np.array): Training data.
    - y_train (np.array): Training labels.
    - X_val (np.array): Validation data.
    - y_val (np.array): Validation labels.
    - architectures (list of lists): List of architectures (list of integers) to train.
    - activations (list of str): List of activation functions to use ('tanh' and 'relu').
    - learning_rate (float): Learning rate for training.
    - epochs (int): Number of epochs to train for.
    - patience (int): Patience for early stopping.
    - batch_size (int): Size of mini-batch for training.

    Returns:
    - dict: Dictionary containing trained models for each architecture and activation function.
    """
    models = {}

    # Train models for each architecture and activation function
    for activation in activations:
        for architecture in architectures:
            # Create neural network model
            model = NeuralNetwork(architecture, activation)

            # Train the model
            model.train(X_train, y_train, X_val, y_val, learning_rate=learning_rate,
                        epochs=epochs, patience=patience, batch_size=batch_size)

            # Store the trained model
            models[f"{activation}_{architecture}"] = model

    return models

In [None]:
architectures = [(14, 10, 8, 6, 4), (14, 10, 8, 4, 4), (14, 6, 4, 4)]
activations = ["tanh", "relu"]
models = train_models(X_train, y_train, X_val, y_val, architectures, activations)

### 4. Evaluation

In [None]:
def evaluate_models(models, X_test, y_test):
    """
    Evaluate trained models on the test data.

    Parameters:
    - models (dict): Dictionary containing trained models.
    - X_test (np.array): Test data.
    - y_test (np.array): Test labels.
    
    Returns:
    - dict: Dictionary containing predictions and evaluation metrics for each model.
    """
    results = {}
    
    # Evaluate each model
    for model_name, model in models.items():
        # Predict the test data
        predictions = model.predict(X_test)
        
        # Calculate loss (or other evaluation metrics)
        
        loss = model.calculate_loss(X_test, y_test)

        accuracy = np.mean(predictions == y_test)

        
        # Store results
        results[model_name] = {
            "predictions": predictions,
            "loss": loss,
            "accuracy": accuracy
        }
        
    return results

In [None]:
results = evaluate_models(models, X_test, y_test)
results

In [None]:
# Spécifiez le nom de la colonne cible
target_attribute = "Class"

# Divisez les données en ensembles d'entraînement, de validation et de test
X_train, y_train, X_val, y_val, X_test, y_test = neural_split_data(data, target_attribute)

# print(np.arange(X_train.shape[0]))
# indices = np.arange(X_train.shape[0])
# np.random.shuffle(indices)
# print(indices)
# shuffled = X_train.iloc[indices]
# print(shuffled)
# print(f"X_train shape {X_train.shape}")
# print("X_train", X_train.head())
# print(f"y_train shape {y_train.shape}")
# print(f"y_train  {y_train.head()}")

# print(f"Valeurs uniques dans y_train : {np.unique(y_train)}")

# Liste des architectures à tester
architectures = [(14, 10, 8, 6, 4)]

# Fonction d'activation à tester
activation = 'tanh'  # Utilisez 'tanh' ou 'relu'

neural_network = NeuralNetwork(architecture=architectures[0])
neural_network.train(X_train, y_train, X_val, y_val, learning_rate=0.01, epochs=50, patience=4, batch_size=4)


# # Boucle pour tester chaque architecture
# for architecture in architectures:
#     print(f"Testing architecture: {architecture} with activation: {activation}")

#     # Créez une instance de NeuralNetwork avec l'architecture et l'activation spécifiées
#     neural_network = NeuralNetwork(architecture, activation)

#     # Entraînez le réseau de neurones
#     neural_network.train(X_train, y_train, X_val, y_val, learning_rate=0.01, epochs=50, patience=4, batch_size=4)

#     # Effectuez les prédictions sur le jeu de test
#     predictions = neural_network.predict(X_test)

# #     # Évaluez les prédictions (par exemple, calcul de la précision)
#     accuracy = np.mean(predictions == y_test)
    
# #     # Affichez les résultats de la précision
#     print(f"Accuracy: {accuracy:.4f}\n")

