In [4]:
import os
import pandas as pd
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

# Fonctions d'activation complétées
def relu(x):
    """ReLU activation: max(0, x)"""
    assert isinstance(x, np.ndarray), "Input to ReLU must be a numpy array"
    result = np.maximum(0, x)
    assert np.all(result >= 0), "ReLU output must be non-negative"
    return result

def relu_derivative(x):
    """Derivative of ReLU: 1 if x > 0, else 0"""
    assert isinstance(x, np.ndarray), "Input to ReLU derivative must be a numpy array"
    result = (x > 0).astype(float)
    assert np.all((result == 0) | (result == 1)), "ReLU derivative must be 0 or 1"
    return result

def softmax(x):
    """Softmax activation: exp(x) / sum(exp(x))"""
    assert isinstance(x, np.ndarray), "Input to softmax must be a numpy array"
    # Stabilisation numérique en soustrayant le max
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    result = exp_x / np.sum(exp_x, axis=1, keepdims=True)
    assert np.all((result >= 0) & (result <= 1)), "Softmax output must be in [0, 1]"
    assert np.allclose(np.sum(result, axis=1), 1), "Softmax output must sum to 1 per sample"
    return result

class MultiClassNeuralNetwork:
    def __init__(self, layer_sizes, learning_rate=0.01, lambda_l2=0.01):
        """Initialise le réseau neuronal avec les tailles de couches et taux d'apprentissage"""
        assert isinstance(layer_sizes, list) and len(layer_sizes) >= 2, "layer_sizes must be a list with at least 2 elements"
        assert all(isinstance(size, int) and size > 0 for size in layer_sizes), "All layer sizes must be positive integers"
        assert isinstance(learning_rate, (int, float)) and learning_rate > 0, "Learning rate must be a positive number"
        
        self.layer_sizes = layer_sizes
        self.learning_rate = learning_rate
        self.lambda_l2 = lambda_l2  # Coefficient de régularisation L2
        self.weights = []
        self.biases = []
        # Initialisation ADAM
        self.timestep = 1
        self.beta1 = 0.9
        self.beta2 = 0.999
        self.epsilon = 1e-8
        self.m_weights = []  # Moments pour les poids
        self.v_weights = []  # Moments carrés pour les poids
        self.m_biases = []   # Moments pour les biais
        self.v_biases = []   # Moments carrés pour les biais
        np.random.seed(42)
        
        for i in range(len(layer_sizes)-1):
            #w = np.random.randn(layer_sizes[i], layer_sizes[i+1]) * 0.01
            w=np.random.randn(layer_sizes[i],layer_sizes[i+1])*np.sqrt(2/layer_sizes[i])
            b = np.zeros((1, layer_sizes[i+1]))
            assert w.shape == (layer_sizes[i], layer_sizes[i+1]), f"Weight matrix {i+1} has incorrect shape"
            assert b.shape == (1, layer_sizes[i+1]), f"Bias vector {i+1} has incorrect shape"
            self.weights.append(w)
            self.biases.append(b)

        # Initialisation des moments pour Adam (après création des poids/biais)
        self.m_weights = [np.zeros_like(w) for w in self.weights]
        self.v_weights = [np.zeros_like(w) for w in self.weights]
        self.m_biases = [np.zeros_like(b) for b in self.biases]
        self.v_biases = [np.zeros_like(b) for b in self.biases]

    def forward(self, X):
        """Forward propagation: Z^[l] = A^[l-1]W^[l] + b^[l], A^[l] = g(Z^[l])"""
        assert isinstance(X, np.ndarray), "Input X must be a numpy array"
        assert X.shape[1] == self.layer_sizes[0], f"Input dimension ({X.shape[1]}) must match input layer size ({self.layer_sizes[0]})"

        self.activations = [X]
        self.z_values = []

        # Propagation pour les couches cachées (ReLU)
        for i in range(len(self.weights)-1):
            z=np.dot(self.activations[-1], self.weights[i]) + self.biases[i]
            assert z.shape == (X.shape[0], self.layer_sizes[i+1]), f"Z^[{i+1}] has incorrect shape"
            self.z_values.append(z)
            self.activations.append(relu(z))

        # Propagation pour la couche de sortie (softmax)
        z=np.dot(self.activations[-1], self.weights[-1]) + self.biases[-1]
        assert z.shape == (X.shape[0], self.layer_sizes[-1]), "Output Z has incorrect shape"
        self.z_values.append(z)
        output = softmax(z)
        assert output.shape == (X.shape[0], self.layer_sizes[-1]), "Output A has incorrect shape"
        self.activations.append(output)

        return self.activations[-1]

    def compute_loss(self, y_true, y_pred):
        """Entropie croisée catégorielle avec régularisation L2"""
        assert isinstance(y_true, np.ndarray) and isinstance(y_pred, np.ndarray), "Inputs to loss must be numpy arrays"
        assert y_true.shape == y_pred.shape, "y_true and y_pred must have the same shape"

        # Éviter les log(0)
        y_pred = np.clip(y_pred, 1e-15, 1 - 1e-15)
        
        # Calcul de la perte
        m = y_true.shape[0]
        cross_entropy = -np.sum(y_true * np.log(y_pred)) / m
        
        # Ajout de la régularisation L2
        l2_penalty = 0
        for w in self.weights:
            l2_penalty += np.sum(np.square(w))
        l2_penalty = (self.lambda_l2 / (2 * m)) * l2_penalty
        
        total_loss = cross_entropy + l2_penalty
        assert not np.isnan(total_loss), "Loss computation resulted in NaN"
        return total_loss

    def compute_accuracy(self, y_true, y_pred):
        """Calcule la précision: proportion de prédictions correctes"""
        assert isinstance(y_true, np.ndarray) and isinstance(y_pred, np.ndarray), "Inputs to accuracy must be numpy arrays"
        assert y_true.shape == y_pred.shape, "y_true and y_pred must have the same shape"

        predictions = np.argmax(y_pred, axis=1)
        true_labels = np.argmax(y_true, axis=1)
        accuracy = np.mean(predictions == true_labels)
        assert 0 <= accuracy <= 1, "Accuracy must be between 0 and 1"
        return accuracy

    def backward(self, X, y, outputs):
        """Rétropropagation: calcule dW^[l], db^[l] pour chaque couche"""
        assert isinstance(X, np.ndarray) and isinstance(y, np.ndarray) and isinstance(outputs, np.ndarray), "Inputs to backward must be numpy arrays"
        assert X.shape[1] == self.layer_sizes[0], f"Input dimension ({X.shape[1]}) must match input layer size ({self.layer_sizes[0]})"
        assert y.shape == outputs.shape, "y and outputs must have the same shape"

        m = X.shape[0]
        self.d_weights = [None] * len(self.weights)
        self.d_biases = [None] * len(self.biases)

        # Gradient pour la couche de sortie (softmax + entropie croisée)
        dZ = outputs - y
        assert dZ.shape == outputs.shape, "dZ for output layer has incorrect shape"
        self.d_weights[-1] = (self.activations[-2].T @ dZ) / m + (self.lambda_l2 / m) * self.weights[-1]
        self.d_biases[-1] = np.sum(dZ, axis=0, keepdims=True) / m

        # Rétropropagation pour les couches cachées
        for i in range(len(self.weights)-2, -1, -1):
            dZ = (dZ @ self.weights[i+1].T) * relu_derivative(self.z_values[i])
            assert dZ.shape == (X.shape[0], self.layer_sizes[i+1]), f"dZ^[{i+1}] has incorrect shape"
            self.d_weights[i] = (self.activations[i].T @ dZ) / m + (self.lambda_l2 / m) * self.weights[i]
            self.d_biases[i] = np.sum(dZ, axis=0, keepdims=True) / m

        # Mise à jour des paramètres
        for i in range(len(self.weights)):
            # Moments pour les poids
            self.m_weights[i] = self.beta1 * self.m_weights[i] + (1 - self.beta1) * self.d_weights[i]
            self.v_weights[i] = self.beta2 * self.v_weights[i] + (1 - self.beta2) * (self.d_weights[i] ** 2)
            # Moments corrigés
            m_hat_w = self.m_weights[i] / (1 - self.beta1 ** self.timestep)
            v_hat_w = self.v_weights[i] / (1 - self.beta2 ** self.timestep)
            # Mise à jour des poids
            self.weights[i] -= self.learning_rate * m_hat_w / (np.sqrt(v_hat_w) + self.epsilon)
            # Moments pour les biais
            self.m_biases[i] = self.beta1 * self.m_biases[i] + (1 - self.beta1) * self.d_biases[i]
            self.v_biases[i] = self.beta2 * self.v_biases[i] + (1 - self.beta2) * (self.d_biases[i] ** 2)
            # Moments corrigés
            m_hat_b = self.m_biases[i] / (1 - self.beta1 ** self.timestep)
            v_hat_b = self.v_biases[i] / (1 - self.beta2 ** self.timestep)
            # Mise à jour des biais
            self.biases[i] -= self.learning_rate * m_hat_b / (np.sqrt(v_hat_b) + self.epsilon)
        # Incrémenter le temps pour Adam
        self.timestep += 1

    def train(self, X, y, X_val, y_val, epochs, batch_size):
        """Entraîne le réseau neuronal avec SGD par mini-lots, avec validation"""
        assert isinstance(X, np.ndarray) and isinstance(y, np.ndarray), "X and y must be numpy arrays"
        assert isinstance(X_val, np.ndarray) and isinstance(y_val, np.ndarray), "X_val and y_val must be numpy arrays"
        assert X.shape[1] == self.layer_sizes[0], f"Input dimension ({X.shape[1]}) must match input layer size ({self.layer_sizes[0]})"
        assert y.shape[1] == self.layer_sizes[-1], f"Output dimension ({y.shape[1]}) must match output layer size ({self.layer_sizes[-1]})"
        assert X_val.shape[1] == self.layer_sizes[0], f"Validation input dimension ({X_val.shape[1]}) must match input layer size ({self.layer_sizes[0]})"
        assert y_val.shape[1] == self.layer_sizes[-1], f"Validation output dimension ({y_val.shape[1]}) must match output layer size ({self.layer_sizes[-1]})"
        assert isinstance(epochs, int) and epochs > 0, "Epochs must be a positive integer"
        assert isinstance(batch_size, int) and batch_size > 0, "Batch size must be a positive integer"

        train_losses = []
        val_losses = []
        train_accuracies = []
        val_accuracies = []

        for epoch in range(epochs):
            # Mélange des données
            indices = np.random.permutation(X.shape[0])
            X_shuffled = X[indices]
            y_shuffled = y[indices]

            epoch_loss = 0
            # Entraînement par mini-lots
            for i in range(0, X.shape[0], batch_size):
                X_batch = X_shuffled[i:i+batch_size]
                y_batch = y_shuffled[i:i+batch_size]

                outputs = self.forward(X_batch)
                epoch_loss += self.compute_loss(y_batch, outputs)
                self.backward(X_batch, y_batch, outputs)

            # Calcul des métriques
            train_loss = epoch_loss / (X.shape[0] // batch_size)
            train_pred = self.forward(X)
            train_accuracy = self.compute_accuracy(y, train_pred)
            val_pred = self.forward(X_val)
            val_loss = self.compute_loss(y_val, val_pred)
            val_accuracy = self.compute_accuracy(y_val, val_pred)

            train_losses.append(train_loss)
            val_losses.append(val_loss)
            train_accuracies.append(train_accuracy)
            val_accuracies.append(val_accuracy)

            if epoch % 10 == 0:
                print(f"Epoch {epoch}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, "
                      f"Train Acc: {train_accuracy:.4f}, Val Acc: {val_accuracy:.4f}")

        return train_losses, val_losses, train_accuracies, val_accuracies

    def predict(self, X):
        """Prédit les étiquettes de classe"""
        assert isinstance(X, np.ndarray), "Input X must be a numpy array"
        assert X.shape[1] == self.layer_sizes[0], f"Input dimension ({X.shape[1]}) must match input layer size ({self.layer_sizes[0]})"

        outputs = self.forward(X)
        predictions = np.argmax(outputs, axis=1)
        assert predictions.shape == (X.shape[0],), "Predictions have incorrect shape"
        return predictions


# Chargement et prétraitement des données
def load_and_preprocess_data(data_dir):
    """Charge et prétraite les données Tifinagh"""
    # Charger les étiquettes
    try:
        labels_df = pd.read_csv(os.path.join(data_dir, 'labels-map.csv'))
        assert 'image_path' in labels_df.columns and 'label' in labels_df.columns, "CSV must contain 'image_path' and 'label' columns"
    except FileNotFoundError:
        print("labels-map.csv not found. Building DataFrame from directories...")
        image_paths = []
        labels = []
        for label_dir in os.listdir(data_dir):
            label_path = os.path.join(data_dir, label_dir)
            if os.path.isdir(label_path):
                for img_name in os.listdir(label_path):
                    image_paths.append(os.path.join(label_path, img_name))
                    labels.append(label_dir)
        labels_df = pd.DataFrame({'image_path': image_paths, 'label': labels})

    # Encoder les étiquettes
    label_encoder = LabelEncoder()
    labels_df['label_encoded'] = label_encoder.fit_transform(labels_df['label'])
    num_classes = len(label_encoder.classes_)

    # Fonction pour charger et prétraiter une image
    def load_and_preprocess_image(image_path, target_size=(32, 32)):
        """Charge et prétraite une image : conversion en niveaux de gris, redimensionnement, normalisation"""
        assert os.path.exists(image_path), f"Image not found: {image_path}"
        img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        assert img is not None, f"Failed to load image: {image_path}"
        img = cv2.resize(img, target_size)
        img = img.astype(np.float32) / 255.0  # Normalisation
        return img.flatten()  # Aplatir pour le réseau de neurones

    # Charger toutes les images
    X = np.array([load_and_preprocess_image(path) for path in labels_df['image_path']])
    y = labels_df['label_encoded'].values

    # Vérifier les dimensions
    assert X.shape[0] == y.shape[0], "Mismatch between number of images and labels"
    assert X.shape[1] == 32 * 32, f"Expected flattened image size of {32*32}, got {X.shape[1]}"

    # Diviser en ensembles d'entraînement, validation et test
    X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
    X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.25, stratify=y_temp, random_state=42)

    # Encoder les étiquettes en one-hot
    one_hot_encoder = OneHotEncoder(sparse_output=False)
    y_train_one_hot = one_hot_encoder.fit_transform(y_train.reshape(-1, 1))
    y_val_one_hot = one_hot_encoder.transform(y_val.reshape(-1, 1))
    y_test_one_hot = one_hot_encoder.transform(y_test.reshape(-1, 1))

    print(f"Train: {X_train.shape[0]} samples, Validation: {X_val.shape[0]} samples, Test: {X_test.shape[0]} samples")
    print(f"Number of classes: {num_classes}")

    return X_train, X_val, X_test, y_train_one_hot, y_val_one_hot, y_test_one_hot, y_test, label_encoder

if __name__ == "__main__":
    # Définir le chemin vers les données
    data_dir = 'amhcd-data-64/tifinagh-images/'
    # Charger et prétraiter les données
    X_train, X_val, X_test, y_train, y_val, y_test, y_test_labels, label_encoder = load_and_preprocess_data(data_dir)

labels-map.csv not found. Building DataFrame from directories...
Train: 16908 samples, Validation: 5637 samples, Test: 5637 samples
Number of classes: 33


In [10]:
# Créer et entraîner le modèle
layer_sizes = [X_train.shape[1], 64,32, y_train.shape[1]]  # 64 et 32 neurones cachés
nn = MultiClassNeuralNetwork(layer_sizes, learning_rate=0.0001, lambda_l2=0.001)

print("\nDébut de l'entraînement...")
train_losses, val_losses, train_accuracies, val_accuracies = nn.train(
    X_train, y_train, X_val, y_val, epochs=150, batch_size=32)

# Évaluation sur l'ensemble de test
y_pred = nn.predict(X_test)
print("\nRapport de classification (Test set) :")
print(classification_report(y_test_labels, y_pred, target_names=label_encoder.classes_))

# Matrice de confusion
cm = confusion_matrix(y_test_labels, y_pred)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Matrice de confusion (Test set)')
plt.xlabel('Prédit')
plt.ylabel('Réel')
plt.savefig('confusion_matrix.png')
plt.close()

# Courbes de perte et de précision
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

# Courbe de perte
ax1.plot(train_losses, label='Train Loss')
ax1.plot(val_losses, label='Validation Loss')
ax1.set_title('Courbe de perte')
ax1.set_xlabel('Époque')
ax1.set_ylabel('Perte')
ax1.legend()

# Courbe de précision
ax2.plot(train_accuracies, label='Train Accuracy')
ax2.plot(val_accuracies, label='Validation Accuracy')
ax2.set_title('Courbe de précision')
ax2.set_xlabel('Époque')
ax2.set_ylabel('Précision')
ax2.legend()

plt.tight_layout()
plt.savefig('loss_accuracy_plot.png')
plt.close()


Début de l'entraînement...
Epoch 0, Train Loss: 3.2389, Val Loss: 2.9064, Train Acc: 0.2066, Val Acc: 0.1994
Epoch 10, Train Loss: 1.1220, Val Loss: 1.1661, Train Acc: 0.7056, Val Acc: 0.6654
Epoch 20, Train Loss: 0.7813, Val Loss: 0.8600, Train Acc: 0.7968, Val Acc: 0.7499
Epoch 30, Train Loss: 0.6034, Val Loss: 0.6965, Train Acc: 0.8417, Val Acc: 0.7921
Epoch 40, Train Loss: 0.4862, Val Loss: 0.6009, Train Acc: 0.8762, Val Acc: 0.8112
Epoch 50, Train Loss: 0.4046, Val Loss: 0.5238, Train Acc: 0.8998, Val Acc: 0.8377
Epoch 60, Train Loss: 0.3443, Val Loss: 0.4718, Train Acc: 0.9204, Val Acc: 0.8536
Epoch 70, Train Loss: 0.2963, Val Loss: 0.4343, Train Acc: 0.9315, Val Acc: 0.8662
Epoch 80, Train Loss: 0.2581, Val Loss: 0.4065, Train Acc: 0.9431, Val Acc: 0.8705
Epoch 90, Train Loss: 0.2267, Val Loss: 0.3868, Train Acc: 0.9514, Val Acc: 0.8792
Epoch 100, Train Loss: 0.2005, Val Loss: 0.3748, Train Acc: 0.9596, Val Acc: 0.8819
Epoch 110, Train Loss: 0.1780, Val Loss: 0.3573, Train Acc: