# Preprocessing:

In [1]:
import os
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import KFold
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam


In [2]:
root_dir = "./10000/"

X = []
y = []

In [3]:


for label in range(10):
    folder_path = os.path.join(root_dir, str(label))
    
    for filename in os.listdir(folder_path):
        if filename.endswith(".jpg") or filename.endswith(".jpeg"):
            img_path = os.path.join(folder_path, filename)

          
            img = Image.open(img_path)
            img = img.convert("L")
            img = img.resize((28, 28))
            img_array = np.array(img)
            img_flat = img_array.flatten()

            X.append(img_flat)
            y.append(label)

X = np.array(X)
y = np.array(y)

print("X shape:", X.shape)
print("y shape:", y.shape)

X shape: (10000, 784)
y shape: (10000,)


In [4]:
X = X / 255.0

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

print("Train size:", X_train.shape)
print("Test size:", X_test.shape)

Train size: (8000, 784)
Test size: (2000, 784)


# Deep learning model 1 :

In [10]:
model = models.Sequential([
    layers.Input(shape=(784,)),               
    layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax')    
])

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy', 
    metrics=['accuracy']
)

history = model.fit(
    X_train, y_train,
    epochs=20,
    batch_size=32,
    validation_split=0.1
)


Epoch 1/20
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.2671 - loss: 2.0709 - val_accuracy: 0.4087 - val_loss: 1.7538
Epoch 2/20
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5119 - loss: 1.5154 - val_accuracy: 0.5863 - val_loss: 1.3162
Epoch 3/20
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6144 - loss: 1.2214 - val_accuracy: 0.5775 - val_loss: 1.2920
Epoch 4/20
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6600 - loss: 1.0802 - val_accuracy: 0.6175 - val_loss: 1.1344
Epoch 5/20
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6944 - loss: 0.9762 - val_accuracy: 0.6812 - val_loss: 1.0067
Epoch 6/20
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7046 - loss: 0.9464 - val_accuracy: 0.6850 - val_loss: 0.9928
Epoch 7/20
[1m225/225[0m 

In [14]:
y_pred = model.predict(X_test)
y_pred_labels = np.argmax(y_pred, axis=1)

print("Accuracy:", accuracy_score(y_test, y_pred_labels))
print("Precision:", precision_score(y_test, y_pred_labels, average='macro'))
print("Recall:", recall_score(y_test, y_pred_labels, average='macro'))
print("F1-score:", f1_score(y_test, y_pred_labels, average='macro'))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_labels))


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
Accuracy: 0.7655
Precision: 0.7737659604809325
Recall: 0.7655000000000001
F1-score: 0.7644220906966155
Confusion Matrix:
[[171   7   2   1   5   1   3   1   1   8]
 [  6 169   4   5   1   0   1  12   2   0]
 [  6   4 156   9   4   0   0   8   1  12]
 [  6   4  10 149   7   1   0   7   8   8]
 [  5   5   5   0 153   2   6  10   2  12]
 [ 10   6  10   3   4 132  12   8   8   7]
 [  1   9   1   0   2  10 175   1   1   0]
 [  1   9   6   2   7   0   0 153   2  20]
 [  7   7   9   6  13   5   5  24 114  10]
 [  5   2   2   8  10   0   0  13   1 159]]


# Deep learning model 2 :

In [59]:
model = models.Sequential([
    layers.Input(shape=(784,)),               
    layers.Dense(512, activation='relu'),

    layers.Dropout(0.2),  
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.2),  
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.2),       
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.1), 
    layers.Dense(64, activation='relu'),

    layers.Dense(10, activation='softmax')    
])

optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
model.compile(
    optimizer=optimizer,
    loss='sparse_categorical_crossentropy', 
    metrics=['accuracy']
)

history = model.fit(
    X_train, y_train,
    epochs=80,
    batch_size=16,
    validation_split=0.1
)


Epoch 1/80
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 10ms/step - accuracy: 0.1417 - loss: 2.2774 - val_accuracy: 0.2750 - val_loss: 2.1489
Epoch 2/80
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - accuracy: 0.2981 - loss: 1.9731 - val_accuracy: 0.4500 - val_loss: 1.6320
Epoch 3/80
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 10ms/step - accuracy: 0.4497 - loss: 1.5829 - val_accuracy: 0.5200 - val_loss: 1.3641
Epoch 4/80
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 0.5515 - loss: 1.3312 - val_accuracy: 0.5888 - val_loss: 1.2769
Epoch 5/80
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 11ms/step - accuracy: 0.6175 - loss: 1.1739 - val_accuracy: 0.6675 - val_loss: 1.0178
Epoch 6/80
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 10ms/step - accuracy: 0.6467 - loss: 1.0807 - val_accuracy: 0.6913 - val_loss: 0.9544
Epoch 7/80
[1m450/450

In [60]:
y_pred = model.predict(X_test)
y_pred_labels = np.argmax(y_pred, axis=1)

print("Accuracy:", accuracy_score(y_test, y_pred_labels))
print("Precision:", precision_score(y_test, y_pred_labels, average='macro'))
print("Recall:", recall_score(y_test, y_pred_labels, average='macro'))
print("F1-score:", f1_score(y_test, y_pred_labels, average='macro'))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_labels))


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Accuracy: 0.885
Precision: 0.8857699547877813
Recall: 0.885
F1-score: 0.8847912786700688
Confusion Matrix:
[[190   1   0   0   0   2   4   0   1   2]
 [  0 182   2   1   2   0   3   9   1   0]
 [  0   1 171   9   3   3   1   3   4   5]
 [  1   2   4 177   2   5   0   3   3   3]
 [  0   7   2   0 170   0   4   3   8   6]
 [  1   0   2   1   1 183   6   1   5   0]
 [  2   0   0   0   1  11 184   0   2   0]
 [  0   7   2   0   2   1   1 179   3   5]
 [  6   4   0   2   0   4   6   4 172   2]
 [  3   4   2   9   5   2   0   9   4 162]]


# Deep learning model 3 :

In [6]:
model = models.Sequential([
    layers.Input(shape=(784,)),  
    layers.Dense(256, activation='relu'),             
    layers.Dense(512, activation='relu'),
    layers.Dense(512, activation='relu'),
    layers.Dense(1024, activation='relu'),
    layers.Dense(512, activation='relu'),
    layers.Dense(10, activation='softmax')    
])

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy', 
    metrics=['accuracy']
)

history = model.fit(
    X_train, y_train,
    epochs=20,
    batch_size=32,
    validation_split=0.1
)

Epoch 1/20
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 16ms/step - accuracy: 0.2457 - loss: 2.0108 - val_accuracy: 0.3913 - val_loss: 1.6457
Epoch 2/20
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.4654 - loss: 1.4974 - val_accuracy: 0.5238 - val_loss: 1.3860
Epoch 3/20
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5418 - loss: 1.3331 - val_accuracy: 0.5225 - val_loss: 1.3990
Epoch 4/20
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.5994 - loss: 1.1815 - val_accuracy: 0.6237 - val_loss: 1.1222
Epoch 5/20
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.6288 - loss: 1.0991 - val_accuracy: 0.6500 - val_loss: 1.0909
Epoch 6/20
[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.6587 - loss: 1.0243 - val_accuracy: 0.6438 - val_loss: 1.0621
Epoch 7/20
[1m225/225

In [8]:
y_pred = model.predict(X_test)
y_pred_labels = np.argmax(y_pred, axis=1)

print("Accuracy:", accuracy_score(y_test, y_pred_labels))
print("Precision:", precision_score(y_test, y_pred_labels, average='macro'))
print("Recall:", recall_score(y_test, y_pred_labels, average='macro'))
print("F1-score:", f1_score(y_test, y_pred_labels, average='macro'))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_labels))


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
Accuracy: 0.761
Precision: 0.7864132492492276
Recall: 0.761
F1-score: 0.7591463706533685
Confusion Matrix:
[[152  12   1   1   8   8  10   1   4   3]
 [  0 191   1   0   4   0   0   4   0   0]
 [  1  14 150   2   6   1   0  21   4   1]
 [  3   6  19 125   3   2   0  15  25   2]
 [  0   8   2   0 173   0   5   3   8   1]
 [  1   6   3   1   3 164   7   2  13   0]
 [  2  15   0   0   1  16 162   0   4   0]
 [  0  23   2   1   9   1   0 160   2   2]
 [  3  10   1   1  16   3   4  14 148   0]
 [  7   6   3  12  25   2   0  43   5  97]]


# Model 2 tunning:

## with defualt hyper parameters for all 3 models:
training : accuracy: 0.8222
testing : Accuracy: 0.8155

## with learning_rate=0.0007 and 20 epoch and 32 batch size:
training : accuracy: 0.8349
testing : Accuracy: 0.794
شويه overfit

## with learning_rate=0.0001 and 20 epoch and 32 batch size:
training : accuracy: 0.8368
testing : Accuracy: 0.817

## with learning_rate=0.0001 and 70 epoch and 32 batch size:
training : accuracy: 0.9653
testing : Accuracy: 0.8515
overfit كبير

## with learning_rate=0.0001 and 70 epoch and 32 batch size and 3 dropout layers:
training : accuracy: 0.9228
testing : Accuracy: 0.872

## with learning_rate=0.0001 and 80 epoch and 32 batch size and 4 dropout layers:
training : accuracy: 0.9146
testing : Accuracy: 0.872

## with learning_rate=0.0001 and 80 epoch and 16 batch size and 4 dropout layers:
training : accuracy: 0.9139
testing : Accuracy: 0.885

# k_fold for Deep Learning model :

In [5]:
def create_model():
    model = models.Sequential([
        layers.Input(shape=(784,)),               
        layers.Dense(512, activation='relu'),
        layers.Dropout(0.2),  

        layers.Dense(512, activation='relu'),
        layers.Dropout(0.2),  

        layers.Dense(256, activation='relu'),
        layers.Dropout(0.2),       

        layers.Dense(128, activation='relu'),
        layers.Dropout(0.1), 

        layers.Dense(64, activation='relu'),
        layers.Dense(10, activation='softmax')    
    ])

    optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
    model.compile(
        optimizer=optimizer,
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    return model


In [6]:
kf_nn = KFold(n_splits=5, shuffle=True, random_state=42)

cv_scores_nn = []

for train_idx, test_idx in kf_nn.split(X):
    X_train_fold, X_test_fold = X[train_idx], X[test_idx]
    y_train_fold, y_test_fold = y[train_idx], y[test_idx]

    model = create_model()

    model.fit(
        X_train_fold, y_train_fold,
        epochs=80,
        batch_size=16
    )

    loss, fold_acc = model.evaluate(
        X_test_fold, y_test_fold
    )

    print(f"Fold accuracy: {fold_acc:.4f}")
    cv_scores_nn.append(fold_acc)

print("Mean :", np.mean(cv_scores_nn))
print("Standard deviation :", np.std(cv_scores_nn))


Epoch 1/80
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step - accuracy: 0.1579 - loss: 2.2549
Epoch 2/80
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.3481 - loss: 1.8469
Epoch 3/80
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 6ms/step - accuracy: 0.5054 - loss: 1.4619
Epoch 4/80
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.5851 - loss: 1.2623
Epoch 5/80
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.6295 - loss: 1.1318
Epoch 6/80
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.6644 - loss: 1.0405
Epoch 7/80
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.6826 - loss: 0.9851
Epoch 8/80
[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.7005 - loss: 0.9325
Epoch 9/80
[1m500/500[0m [32m━━━━━━━━

In [7]:
print(cv_scores_nn)

[0.8615000247955322, 0.8600000143051147, 0.8504999876022339, 0.8834999799728394, 0.8740000128746033]
