In [1]:
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd
import numpy as np

  if not hasattr(np, "object"):


In [2]:

from pathlib import Path

MNIST_TRAIN_CSV = Path("data/mnist_train.csv")
MNIST_TEST_CSV  = Path("data/mnist_test.csv")

IMG_DIR_TRAIN = Path("data/training")
IMG_DIR_TEST  = Path("data/testing")


In [3]:

from PIL import Image

def load_from_mnist_csv(train_csv: Path, test_csv: Path):
    if not train_csv.exists() or not test_csv.exists():
        return None
    print(" Chargement depuis MNIST CSV")
    train = pd.read_csv(train_csv)
    test  = pd.read_csv(test_csv)
    X_train = (train.iloc[:, 1:].values.astype(np.float32) / 255.0)
    y_train = train.iloc[:, 0].values.astype(int)
    X_test  = (test.iloc[:, 1:].values.astype(np.float32)  / 255.0)
    y_test  = test.iloc[:, 0].values.astype(int)
    return (X_train, y_train), (X_test, y_test)


def load_from_image_dirs(train_dir: Path, test_dir: Path, size=(28, 28)):
    if not train_dir.exists() or not test_dir.exists():
        return None

    def scan_split(root: Path):
        X, y = [], []
        for label in sorted([d for d in os.listdir(root) if (root / d).is_dir()]):
            dpath = root / label
            for fname in os.listdir(dpath):
                f = dpath / fname
                if not f.is_file():
                    continue
                try:
                    with Image.open(f) as img:
                        img = img.convert("L")
                        img = img.resize(size)
                        arr = np.array(img, dtype=np.float32) / 255.0
                        X.append(arr.reshape(-1))
                        y.append(int(label))
                except Exception as e:
                    print("Image ignorée:", f, e)
        if not X:
            return None, None
        return np.stack(X), np.array(y, dtype=int)

    X_train, y_train = scan_split(train_dir)
    X_test,  y_test  = scan_split(test_dir)
    if X_train is None or X_test is None:
        return None
    print(" Chargement depuis dossiers images training/testing")
    return (X_train, y_train), (X_test, y_test)


def load_from_curated_data(curated_dir: Path, size=(28, 28)):
    import cv2
    if not curated_dir.exists():
        return None

    img_paths = []
    for root, _, files in os.walk(curated_dir):
        for f in files:
            if f.lower().endswith((".png", ".jpg", ".jpeg")):
                img_paths.append(Path(root) / f)
    if not img_paths:
        return None

    X, y_raw = [], []
    for p in img_paths:
        parent = p.parent.name
        if parent.isdigit():
            label = int(parent)
        else:
            label = ord(p.name[0])
        img = cv2.imread(str(p), cv2.IMREAD_GRAYSCALE)
        if img is None:
            continue
        img = cv2.resize(img, size)
        X.append(img.reshape(-1).astype(np.float32) / 255.0)
        y_raw.append(label)

    X = np.stack(X)
    y_raw = np.array(y_raw, dtype=int)

    uniq = np.unique(y_raw)
    remap = {lab: i for i, lab in enumerate(sorted(uniq))}
    y = np.array([remap[v] for v in y_raw], dtype=int)

    remap_path = curated_dir / "label_remap.json"
    with open(remap_path, "w", encoding="utf-8") as f:
        json.dump({"raw_labels": list(map(int, uniq)),
                   "raw_to_idx": {int(k): int(v) for k, v in remap.items()}}, f, indent=2)
    print("Remap des labels sauvegardé dans", remap_path)

    rng = np.random.default_rng(123)
    idx = rng.permutation(len(X))
    split = int(0.8 * len(X))
    tr, te = idx[:split], idx[split:]
    X_train, y_train = X[tr], y[tr]
    X_test,  y_test  = X[te], y[te]

    print(f" curated_data/ : {len(X_train)} train / {len(X_test)} test, classes={len(uniq)}")
    return (X_train, y_train), (X_test, y_test)


def load_data():
    data = load_from_mnist_csv(MNIST_TRAIN_CSV, MNIST_TEST_CSV)
    if data is not None:
        return data

    data = load_from_image_dirs(IMG_DIR_TRAIN, IMG_DIR_TEST)
    if data is not None:
        return data

    data = load_from_curated_data(CURATED_DIR, size=(28, 28))
    if data is not None:
        return data

    raise FileNotFoundError("Aucune source de données trouvée).")


train_split, test_split = load_data()
X_train, y_train = train_split
X_test, y_test   = test_split

INPUT_SIZE = X_train.shape[1]
NB_CLASSES = int(max(y_train.max(), y_test.max()) + 1)

input_shape = [list(X_train.shape)[-1]]
print("Shapes:", X_train.shape, X_test.shape)
print("NB_CLASSES =", NB_CLASSES)
print(input_shape)
print(X_train[0][1])

 Chargement depuis MNIST CSV
Shapes: (60000, 784) (10000, 784)
NB_CLASSES = 10
[784]
0.0


In [13]:
model = keras.Sequential([
    layers.Dense(units=256, activation='relu', input_shape=input_shape, kernel_initializer='he_normal'),
    layers.Dense(units=128, activation='relu', kernel_initializer='he_normal'),
    layers.Dense(units=64, activation='relu', kernel_initializer='he_normal'),
    layers.Dense(NB_CLASSES, activation='softmax', kernel_initializer='glorot_normal'),
])

# Compiler le modèle avec SGD (équivalent aux mises à jour W -= lr * grad)
model.compile(
    optimizer=keras.optimizers.SGD(learning_rate=0.030),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

test sans early stopping

In [14]:

model.fit(
    X_train, y_train,
    epochs=20,
    batch_size=32,
    validation_data=(X_test, y_test)
)
print("\n--- Évaluation sur le jeu de Test ---")
test_results = model.evaluate(X_test, y_test, batch_size=32)

print(f"Test Loss:     {test_results[0]:.4f}")
print(f"Test Accuracy: {test_results[1]*100:.2f}%")

Epoch 1/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 5ms/step - accuracy: 0.9022 - loss: 0.3381 - val_accuracy: 0.9367 - val_loss: 0.2074
Epoch 2/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.9568 - loss: 0.1443 - val_accuracy: 0.9623 - val_loss: 0.1152
Epoch 3/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.9694 - loss: 0.1028 - val_accuracy: 0.9659 - val_loss: 0.1060
Epoch 4/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.9764 - loss: 0.0790 - val_accuracy: 0.9694 - val_loss: 0.0963
Epoch 5/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9815 - loss: 0.0629 - val_accuracy: 0.9731 - val_loss: 0.0822
Epoch 6/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9844 - loss: 0.0523 - val_accuracy: 0.9754 - val_loss: 0.0746
Epoch 7/20
[1m

KeyboardInterrupt: 

Optimize the training to see the best accuracy we can get with keras

In [18]:
#Using early stopping to prevent overfitting
from tensorflow.keras import layers, callbacks
early_stopping = callbacks.EarlyStopping(
    min_delta=0.0001, # minimium amount of change to count as an improvement
    patience=5, # how many epochs to wait before stopping
    restore_best_weights=True,
)

In [19]:
#Using early stopping to prevent overfitting
from tensorflow.keras import layers, callbacks
early_stopping = callbacks.EarlyStopping(
    min_delta=0.0001, # minimium amount of change to count as an improvement
    patience=5, # how many epochs to wait before stopping
    restore_best_weights=True,
)
model.fit(
    X_train, y_train,
    epochs=20,
    batch_size=32,
    validation_data=(X_test, y_test),
    callbacks=[early_stopping],
)


Epoch 1/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9981 - loss: 0.0093 - val_accuracy: 0.9794 - val_loss: 0.0713
Epoch 2/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9987 - loss: 0.0071 - val_accuracy: 0.9779 - val_loss: 0.0751
Epoch 3/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9992 - loss: 0.0052 - val_accuracy: 0.9805 - val_loss: 0.0697
Epoch 4/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9994 - loss: 0.0043 - val_accuracy: 0.9804 - val_loss: 0.0711
Epoch 5/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9996 - loss: 0.0034 - val_accuracy: 0.9812 - val_loss: 0.0694
Epoch 6/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9998 - loss: 0.0024 - val_accuracy: 0.9802 - val_loss: 0.0724
Epoch 7/20
[1m1

<keras.src.callbacks.history.History at 0x22da7044ef0>

In [20]:
print("\n--- Évaluation sur le jeu de Test ---")
test_results = model.evaluate(X_test, y_test, batch_size=32)

print(f"Test Loss:     {test_results[0]:.4f}")
print(f"Test Accuracy: {test_results[1]*100:.2f}%")


--- Évaluation sur le jeu de Test ---
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9812 - loss: 0.0694
Test Loss:     0.0694
Test Accuracy: 98.12%


In [21]:
#Comparaison rapide pour LR de 0.05
model.compile(
    optimizer=keras.optimizers.SGD(learning_rate=0.050),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)
model.fit(
    X_train, y_train,
    epochs=20,
    batch_size=32,
    validation_data=(X_test, y_test),
    callbacks=[early_stopping],
)
print("\n--- Évaluation sur le jeu de Test ---")
test_results = model.evaluate(X_test, y_test, batch_size=32)

print(f"Test Loss:     {test_results[0]:.4f}")
print(f"Test Accuracy: {test_results[1]*100:.2f}%")

Epoch 1/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9926 - loss: 0.0236 - val_accuracy: 0.9756 - val_loss: 0.0899
Epoch 2/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9903 - loss: 0.0301 - val_accuracy: 0.9774 - val_loss: 0.0798
Epoch 3/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9929 - loss: 0.0215 - val_accuracy: 0.9785 - val_loss: 0.0779
Epoch 4/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9958 - loss: 0.0129 - val_accuracy: 0.9791 - val_loss: 0.0864
Epoch 5/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9984 - loss: 0.0062 - val_accuracy: 0.9796 - val_loss: 0.0782

--- Évaluation sur le jeu de Test ---
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9756 - loss: 0.0899
Test Loss:     0.0899
Test Accur

Changing optimizer and adding batchnormalisation and Adam opti

In [5]:
model_opti = keras.Sequential([
    # On sépare l'activation pour insérer la BN au milieu
    layers.Dense(units=256, kernel_initializer='he_normal', use_bias=False), 
    layers.BatchNormalization(),
    layers.Activation('relu'),
    #layers.Dropout(0.2), le droupout baisse la performance ici donc on le commente

    layers.Dense(units=128, kernel_initializer='he_normal', use_bias=False),
    layers.BatchNormalization(),
    layers.Activation('relu'),
    #layers.Dropout(0.2),

    layers.Dense(units=64, kernel_initializer='he_normal', use_bias=False),
    layers.BatchNormalization(),
    layers.Activation('relu'),

    # Pas de Batch juste avant le Softmax final
    layers.Dense(NB_CLASSES, activation='softmax', kernel_initializer='glorot_normal')
])

model_opti.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [6]:
model_opti.fit(
    X_train, y_train,
    epochs=20,
    batch_size=32,
    validation_data=(X_test, y_test),
    callbacks=[early_stopping],
)

Epoch 1/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 4ms/step - accuracy: 0.9307 - loss: 0.2374 - val_accuracy: 0.9651 - val_loss: 0.1105
Epoch 2/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step - accuracy: 0.9669 - loss: 0.1090 - val_accuracy: 0.9739 - val_loss: 0.0846
Epoch 3/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.9736 - loss: 0.0836 - val_accuracy: 0.9762 - val_loss: 0.0751
Epoch 4/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.9785 - loss: 0.0675 - val_accuracy: 0.9770 - val_loss: 0.0738
Epoch 5/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.9829 - loss: 0.0534 - val_accuracy: 0.9824 - val_loss: 0.0607
Epoch 6/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.9847 - loss: 0.0480 - val_accuracy: 0.9827 - val_loss: 0.0551
Epoch 7/20
[1m

<keras.src.callbacks.history.History at 0x22da6342960>

In [7]:
print("\n--- Évaluation sur le jeu de Test ---")
test_results = model_opti.evaluate(X_test, y_test, batch_size=32)

print(f"Test Loss:     {test_results[0]:.4f}")
print(f"Test Accuracy: {test_results[1]*100:.2f}%")


--- Évaluation sur le jeu de Test ---
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9827 - loss: 0.0551
Test Loss:     0.0551
Test Accuracy: 98.27%


pareil mais sans adams mais avec normalisation

In [9]:
model_opti.compile(
    optimizer=keras.optimizers.SGD(learning_rate=0.030),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)
model_opti.fit(
    X_train, y_train,
    epochs=20,
    batch_size=32,
    validation_data=(X_test, y_test),
    callbacks=[early_stopping],
)
print("\n--- Évaluation sur le jeu de Test ---")
test_results = model_opti.evaluate(X_test, y_test, batch_size=32)

print(f"Test Loss:     {test_results[0]:.4f}")
print(f"Test Accuracy: {test_results[1]*100:.2f}%")

Epoch 1/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.9919 - loss: 0.0257 - val_accuracy: 0.9853 - val_loss: 0.0473
Epoch 2/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.9939 - loss: 0.0200 - val_accuracy: 0.9859 - val_loss: 0.0467
Epoch 3/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9947 - loss: 0.0180 - val_accuracy: 0.9861 - val_loss: 0.0464
Epoch 4/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9941 - loss: 0.0178 - val_accuracy: 0.9860 - val_loss: 0.0472
Epoch 5/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9956 - loss: 0.0153 - val_accuracy: 0.9858 - val_loss: 0.0470
Epoch 6/20
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 4ms/step - accuracy: 0.9960 - loss: 0.0136 - val_accuracy: 0.9859 - val_loss: 0.0463
Epoch 7/20
[1m1

Conclusion : 
En gardant la meme architecture de batch et epochs que le meilleur modele codé a la main avec numpy, la meilleur accuracy avec keras en utilisant la sgd plutot que admas avec le meme lr et la batch noramlisation on monte a environ 98.6 d'accuracy sur le test 