# Perceptron Experiments

Ce notebook permet de lancer des expériences systématiques sur un MLP (perceptron multi‑couches)
pour comparer différentes architectures, fonctions d’activation et taux d’apprentissage.

Il enregistre chaque run dans un dossier `models/run_.../` avec :
- `metrics.json` (métriques et hyperparamètres)
- `confusion_matrix.npy` (matrice de confusion sur le test)

Le notebook `perceptron_report.ipynb` exploitera ces fichiers pour faire un résumé global.

##  Configuration & imports

In [None]:
from pathlib import Path
import os, json, time
from datetime import datetime

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

# === Données ===
MNIST_TRAIN_CSV = Path("data/mnist_train.csv")
MNIST_TEST_CSV  = Path("data/mnist_test.csv")

IMG_DIR_TRAIN = Path("data/training")
IMG_DIR_TEST  = Path("data/testing")

#CURATED_DIR = Path("C:/Users/User/PycharmProjects/pythonProject6/.venv/curated_data")

BATCH_SIZE = 32
EPOCHS = 3
ACTIVATION = ["relu","sigmoid"] 

LR_LIST   = [0.005, 0.01, 0.03]
ARCH_LIST = [
    [256, 128],
    [256, 128, 64],
    [256, 128, 64, 32],
]

MODELS_DIR = Path("models")
SAVE_WEIGHTS = False

MODELS_DIR.mkdir(parents=True, exist_ok=True)
print("MODELS_DIR =", MODELS_DIR.resolve())


MODELS_DIR = C:\Users\User\mlp\models


##  Chargement des données

In [None]:

from PIL import Image

def load_from_mnist_csv(train_csv: Path, test_csv: Path):
    if not train_csv.exists() or not test_csv.exists():
        return None
    print(" Chargement depuis MNIST CSV")
    train = pd.read_csv(train_csv)
    test  = pd.read_csv(test_csv)
    X_train = (train.iloc[:, 1:].values.astype(np.float32) / 255.0)
    y_train = train.iloc[:, 0].values.astype(int)
    X_test  = (test.iloc[:, 1:].values.astype(np.float32)  / 255.0)
    y_test  = test.iloc[:, 0].values.astype(int)
    return (X_train, y_train), (X_test, y_test)


def load_from_image_dirs(train_dir: Path, test_dir: Path, size=(28, 28)):
    if not train_dir.exists() or not test_dir.exists():
        return None

    def scan_split(root: Path):
        X, y = [], []
        for label in sorted([d for d in os.listdir(root) if (root / d).is_dir()]):
            dpath = root / label
            for fname in os.listdir(dpath):
                f = dpath / fname
                if not f.is_file():
                    continue
                try:
                    with Image.open(f) as img:
                        img = img.convert("L")
                        img = img.resize(size)
                        arr = np.array(img, dtype=np.float32) / 255.0
                        X.append(arr.reshape(-1))
                        y.append(int(label))
                except Exception as e:
                    print("Image ignorée:", f, e)
        if not X:
            return None, None
        return np.stack(X), np.array(y, dtype=int)

    X_train, y_train = scan_split(train_dir)
    X_test,  y_test  = scan_split(test_dir)
    if X_train is None or X_test is None:
        return None
    print(" Chargement depuis dossiers images training/testing")
    return (X_train, y_train), (X_test, y_test)


def load_from_curated_data(curated_dir: Path, size=(28, 28)):
    import cv2
    if not curated_dir.exists():
        return None

    img_paths = []
    for root, _, files in os.walk(curated_dir):
        for f in files:
            if f.lower().endswith((".png", ".jpg", ".jpeg")):
                img_paths.append(Path(root) / f)
    if not img_paths:
        return None

    X, y_raw = [], []
    for p in img_paths:
        parent = p.parent.name
        if parent.isdigit():
            label = int(parent)
        else:
            label = ord(p.name[0])
        img = cv2.imread(str(p), cv2.IMREAD_GRAYSCALE)
        if img is None:
            continue
        img = cv2.resize(img, size)
        X.append(img.reshape(-1).astype(np.float32) / 255.0)
        y_raw.append(label)

    X = np.stack(X)
    y_raw = np.array(y_raw, dtype=int)

    uniq = np.unique(y_raw)
    remap = {lab: i for i, lab in enumerate(sorted(uniq))}
    y = np.array([remap[v] for v in y_raw], dtype=int)

    remap_path = curated_dir / "label_remap.json"
    with open(remap_path, "w", encoding="utf-8") as f:
        json.dump({"raw_labels": list(map(int, uniq)),
                   "raw_to_idx": {int(k): int(v) for k, v in remap.items()}}, f, indent=2)
    print("Remap des labels sauvegardé dans", remap_path)

    rng = np.random.default_rng(123)
    idx = rng.permutation(len(X))
    split = int(0.8 * len(X))
    tr, te = idx[:split], idx[split:]
    X_train, y_train = X[tr], y[tr]
    X_test,  y_test  = X[te], y[te]

    print(f" curated_data/ : {len(X_train)} train / {len(X_test)} test, classes={len(uniq)}")
    return (X_train, y_train), (X_test, y_test)


def load_data():
    data = load_from_mnist_csv(MNIST_TRAIN_CSV, MNIST_TEST_CSV)
    if data is not None:
        return data

    data = load_from_image_dirs(IMG_DIR_TRAIN, IMG_DIR_TEST)
    if data is not None:
        return data

    data = load_from_curated_data(CURATED_DIR, size=(28, 28))
    if data is not None:
        return data

    raise FileNotFoundError("Aucune source de données trouvée).")


train_split, test_split = load_data()
X_train, y_train = train_split
X_test, y_test   = test_split

INPUT_SIZE = X_train.shape[1]
NB_CLASSES = int(max(y_train.max(), y_test.max()) + 1)

print("Shapes:", X_train.shape, X_test.shape)
print("NB_CLASSES =", NB_CLASSES)


→ Chargement depuis MNIST CSV
Shapes: (60000, 784) (10000, 784)
NB_CLASSES = 10


##  Modèle MLP

In [9]:

rng = np.random.default_rng(42)

class MLP:
    def __init__(self, input_size, nb_classes, hidden_layers,
                 lr=0.05, epochs=3, batch_size=32, activation="relu"):
        self.input_size = int(input_size)
        self.nb_classes = int(nb_classes)
        self.hidden = list(hidden_layers)
        self.lr = float(lr)
        self.epochs = int(epochs)
        self.batch_size = int(batch_size)
        assert activation in ("relu", "sigmoid")
        self.activation_name = activation

        self.W = []
        dims = [self.input_size] + self.hidden + [self.nb_classes]
        for i in range(len(dims) - 1):
            fan_in = dims[i]
            if activation == "relu" and i < len(dims) - 2:
                std = np.sqrt(2.0 / fan_in)
            else:
                std = np.sqrt(1.0 / fan_in)
            self.W.append(rng.normal(0.0, std, size=(dims[i+1], fan_in + 1)))

    @staticmethod
    def _add_bias(x):
        if x.ndim == 1:
            x = x.reshape(-1, 1)
        return np.vstack([x, np.ones((1, x.shape[1]), dtype=x.dtype)])

    @staticmethod
    def _sigmoid(z):
        return 1.0 / (1.0 + np.exp(-z))

    @staticmethod
    def _relu(z):
        return np.maximum(0.0, z)

    @staticmethod
    def _softmax(z):
        z = z - np.max(z, axis=0, keepdims=True)
        e = np.exp(z)
        return e / np.sum(e, axis=0, keepdims=True)

    def forward(self, X):
        A = [self._add_bias(X)]
        Zs = []
        for i in range(len(self.hidden)):
            Z = self.W[i] @ A[-1]
            Zs.append(Z)
            if self.activation_name == "relu":
                H = self._relu(Z)
            else:
                H = self._sigmoid(Z)
            A.append(self._add_bias(H))
        Z = self.W[-1] @ A[-1]
        Zs.append(Z)
        Yhat = self._softmax(Z)
        A.append(Yhat)
        return Zs, A

    def backward(self, Y_true, Zs, A):
        grads = [None] * len(self.W)
        delta = A[-1] - Y_true
        grads[-1] = (delta @ A[-2].T) / Y_true.shape[1]
        for i in range(len(self.hidden) - 1, -1, -1):
            Wnext = self.W[i + 1][:, :-1]
            if self.activation_name == "relu":
                prime = (Zs[i] > 0).astype(Zs[i].dtype)
            else:
                H = A[i + 1][:-1, :]
                prime = H * (1.0 - H)
            delta = (Wnext.T @ delta) * prime
            grads[i] = (delta @ A[i].T) / Y_true.shape[1]
        return grads

    def fit(self, X, y):
        X = X.astype(np.float32)
        y = y.astype(int)
        n = X.shape[0]
        for epoch in range(self.epochs):
            idx = np.random.permutation(n)
            Xs = X[idx]
            ys = y[idx]
            for start in range(0, n, self.batch_size):
                end = min(start + self.batch_size, n)
                xb = Xs[start:end].T
                yb = ys[start:end]
                Y = np.zeros((self.nb_classes, len(yb)), dtype=np.float32)
                Y[yb, np.arange(len(yb))] = 1.0
                Zs, A = self.forward(xb)
                grads = self.backward(Y, Zs, A)
                for i in range(len(self.W)):
                    self.W[i] -= self.lr * grads[i]

    def predict(self, X):
        X = X.astype(np.float32)
        _, A = self.forward(X.T)
        return np.argmax(A[-1], axis=0)


##  Lancer les runs & journaliser

In [None]:

def evaluate_and_log(model, X_train, y_train, X_test, y_test, run_dir: Path, extra: dict | None = None):
    run_dir.mkdir(parents=True, exist_ok=True)

    yhat_train = model.predict(X_train)
    yhat_test  = model.predict(X_test)

    acc_train = float((yhat_train == y_train).mean())
    acc_test  = float((yhat_test  == y_test).mean())

    cm = confusion_matrix(y_test, yhat_test, labels=list(range(model.nb_classes)))
    np.save(run_dir / "confusion_matrix.npy", cm)

    metrics = {
        "accuracy": acc_test,
        "accuracy_train": acc_train,
        "nb_params": int(sum(W.size for W in model.W)),
        "hidden_layers": model.hidden,
        "activation": model.activation_name,
        "batch_size": model.batch_size,
        "epochs": model.epochs,
        "learning_rate": model.lr,
        "created_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
    }
    if extra:
        metrics.update(extra)

    with open(run_dir / "metrics.json", "w", encoding="utf-8") as f:
        json.dump(metrics, f, ensure_ascii=False, indent=2)

    if SAVE_WEIGHTS:
        for i, W in enumerate(model.W):
            np.save(run_dir / f"W{i}.npy", W)

    print("Run sauvegardé dans", run_dir)
    return metrics


all_metrics = []
for activation in ["relu", "sigmoid"]:
    for lr in LR_LIST:
        for arch in ARCH_LIST:
            tag = f"lr{lr}_layers{'-'.join(map(str, arch))}_{activation}"
            run_name = "run_" + datetime.now().strftime("%Y%m%d_%H%M%S") + "_" + tag
            run_dir = MODELS_DIR / run_name
            print("\n=== Run:", run_name, "===")
            
            model = MLP(
                INPUT_SIZE,
                NB_CLASSES,
                arch,
                lr=float(lr),
                epochs=EPOCHS,
                batch_size=BATCH_SIZE,
                activation=activation,  
            )

            t0 = time.time()
            model.fit(X_train, y_train)
            duration = time.time() - t0

            m = evaluate_and_log(
                model, X_train, y_train, X_test, y_test, run_dir,
                extra={"train_seconds": duration, "run_name": run_name}
            )
            all_metrics.append(m)

            
df_runs = pd.DataFrame(all_metrics)
df_runs.sort_values("accuracy", ascending=False, inplace=True)
df_runs.reset_index(drop=True, inplace=True)
df_runs



=== Run: run_20251103_232347_lr0.005_layers256-128_relu ===
Run sauvegardé dans models\run_20251103_232347_lr0.005_layers256-128_relu

=== Run: run_20251103_232409_lr0.005_layers256-128-64_relu ===
Run sauvegardé dans models\run_20251103_232409_lr0.005_layers256-128-64_relu

=== Run: run_20251103_232432_lr0.005_layers256-128-64-32_relu ===
Run sauvegardé dans models\run_20251103_232432_lr0.005_layers256-128-64-32_relu

=== Run: run_20251103_232455_lr0.01_layers256-128_relu ===
Run sauvegardé dans models\run_20251103_232455_lr0.01_layers256-128_relu

=== Run: run_20251103_232516_lr0.01_layers256-128-64_relu ===
Run sauvegardé dans models\run_20251103_232516_lr0.01_layers256-128-64_relu

=== Run: run_20251103_232539_lr0.01_layers256-128-64-32_relu ===
Run sauvegardé dans models\run_20251103_232539_lr0.01_layers256-128-64-32_relu

=== Run: run_20251103_232602_lr0.03_layers256-128_relu ===
Run sauvegardé dans models\run_20251103_232602_lr0.03_layers256-128_relu

=== Run: run_20251103_2326

Unnamed: 0,accuracy,accuracy_train,nb_params,hidden_layers,activation,batch_size,epochs,learning_rate,created_at,train_seconds,run_name
0,0.9697,0.976583,242762,"[256, 128, 64]",relu,32,3,0.03,2025-11-03 23:26:47,21.772865,run_20251103_232624_lr0.03_layers256-128-64_relu
1,0.9673,0.97835,244522,"[256, 128, 64, 32]",relu,32,3,0.03,2025-11-03 23:27:12,23.83941,run_20251103_232647_lr0.03_layers256-128-64-32...
2,0.9643,0.969733,235146,"[256, 128]",relu,32,3,0.03,2025-11-03 23:26:24,20.741346,run_20251103_232602_lr0.03_layers256-128_relu
3,0.9528,0.957117,244522,"[256, 128, 64, 32]",relu,32,3,0.01,2025-11-03 23:26:02,22.17259,run_20251103_232539_lr0.01_layers256-128-64-32...
4,0.9508,0.954067,242762,"[256, 128, 64]",relu,32,3,0.01,2025-11-03 23:25:39,21.944587,run_20251103_232516_lr0.01_layers256-128-64_relu
5,0.9424,0.943417,235146,"[256, 128]",relu,32,3,0.01,2025-11-03 23:25:16,20.42495,run_20251103_232455_lr0.01_layers256-128_relu
6,0.9405,0.943267,244522,"[256, 128, 64, 32]",relu,32,3,0.005,2025-11-03 23:24:55,22.31606,run_20251103_232432_lr0.005_layers256-128-64-3...
7,0.9311,0.9305,242762,"[256, 128, 64]",relu,32,3,0.005,2025-11-03 23:24:32,21.519894,run_20251103_232409_lr0.005_layers256-128-64_relu
8,0.9211,0.921433,235146,"[256, 128]",relu,32,3,0.005,2025-11-03 23:24:09,21.721941,run_20251103_232347_lr0.005_layers256-128_relu
9,0.89,0.886917,235146,"[256, 128]",sigmoid,32,3,0.03,2025-11-03 23:30:07,20.459629,run_20251103_232946_lr0.03_layers256-128_sigmoid


##  Visualisation rapide

In [None]:

if not df_runs.empty:

    plt.figure()
    plt.plot(df_runs["learning_rate"], df_runs["accuracy"], marker="o", linestyle="-")
    plt.xscale("log")
    plt.xlabel("Learning rate (log)")
    plt.ylabel("Accuracy test")
    plt.title("Accuracy vs Learning rate")
    plt.tight_layout()
    plt.show()
else:
    print("Aucun run à afficher.")


NameError: name 'df_runs' is not defined