## Importing libraries

In [1]:
import numpy as np
import keras.datasets
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report



## Data preparation

In [2]:
# Data loading and spliting
(X_train, y_train), (X_test, y_test) = keras.datasets.mnist.load_data()
(X_train, y_train), (X_test, y_test)

((array([[[0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          ...,
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0]],
  
         [[0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          ...,
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0]],
  
         [[0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          ...,
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0]],
  
         ...,
  
         [[0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          ...,
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0]],
  
         [[0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0

In [3]:
# Normalize pixel values
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

In [4]:
# Convert labels to encoding
num_classes = 10
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

### Now you have preprocessed the data and split it into training, validation, and test sets, which can be used for training and evaluating your deep learning models for handwritten digit recognition.

In [5]:
# Split the data into training and testing sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)
print("Training set shape:", X_train.shape)
print("Validation set shape:", X_val.shape)
print("Test set shape:", X_test.shape)

Training set shape: (48000, 28, 28)
Validation set shape: (12000, 28, 28)
Test set shape: (10000, 28, 28)


In [6]:
(X_train, y_train), (X_test, y_test)

((array([[[0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          ...,
          [0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.]],
  
         [[0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          ...,
          [0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.]],
  
         [[0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          ...,
          [0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.]],
  
         ...,
  
         [[0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          ...,
          [0., 0., 0., ..., 0., 0., 0.],
         

## A baseline model using traditional machine learning algorithms


In [7]:
# Flatten the images
X_train_flat = X_train.reshape(X_train.shape[0], -1)
X_val_flat = X_val.reshape(X_val.shape[0], -1)
X_test_flat = X_test.reshape(X_test.shape[0], -1)

# Initialize the logistic regression model
model = LogisticRegression(max_iter=1000)

# Train the model
model.fit(X_train_flat, np.argmax(y_train, axis=1))

# Make predictions on the validation set
val_predictions = model.predict(X_val_flat)

# Calculate accuracy on the validation set
val_accuracy = accuracy_score(np.argmax(y_val, axis=1), val_predictions)
print("Validation Accuracy:", val_accuracy)

# Make predictions on the test set
test_predictions = model.predict(X_test_flat)

# Calculate accuracy on the test set
test_accuracy = accuracy_score(np.argmax(y_test, axis=1), test_predictions)
print("Test Accuracy:", test_accuracy)

Validation Accuracy: 0.9221666666666667
Test Accuracy: 0.924


### Evaluate the baseline model's performance.


In [8]:
# Calculate accuracy on the test set
test_accuracy = accuracy_score(np.argmax(y_test, axis=1), test_predictions)
print("Test Accuracy:", test_accuracy)

# Calculate the confusion matrix
confusion_mat = confusion_matrix(np.argmax(y_test, axis=1), test_predictions)
print("Confusion Matrix:")
print(confusion_mat)

# Generate the classification report
class_report = classification_report(np.argmax(y_test, axis=1), test_predictions)
print("Classification Report:")
print(class_report)

Test Accuracy: 0.924
Confusion Matrix:
[[ 961    0    1    3    1    4    6    3    1    0]
 [   0 1113    2    2    0    1    4    1   12    0]
 [   7   11  921   18    9    4   13   11   34    4]
 [   3    1   17  921    1   23    3   10   22    9]
 [   1    4    6    3  919    0   10    6    7   26]
 [   8    2    3   38   11  771   16    7   31    5]
 [   9    3    9    2    6   17  910    2    0    0]
 [   0    8   24    9    5    1    0  948    1   32]
 [   8   10    7   24    8   28    9    9  856   15]
 [   8    8    1    9   24    7    1   22    9  920]]
Classification Report:
              precision    recall  f1-score   support

           0       0.96      0.98      0.97       980
           1       0.96      0.98      0.97      1135
           2       0.93      0.89      0.91      1032
           3       0.90      0.91      0.90      1010
           4       0.93      0.94      0.93       982
           5       0.90      0.86      0.88       892
           6       0.94     

##  Ensemble of Machine Learning Algorithms

In [9]:
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.svm import SVC
# from sklearn.ensemble import VotingClassifier
# from sklearn.tree import DecisionTreeClassifier

# # Shrinking traning data set
# X_train_small, _, y_train_small, _ = train_test_split(X_train_flat, y_train, test_size=0.1, random_state=42)

# # Initialize individual models
# decision_tree = DecisionTreeClassifier()
# random_forest = RandomForestClassifier(n_estimators=100)
# svm_model = SVC(kernel='linear', probability=True)

# # Initialize the voting classifier with the individual models
# voting_classifier = VotingClassifier(estimators=[('decision_tree', decision_tree),
#                                                  ('random_forest', random_forest),
#                                                  ('svm', svm_model)],voting='soft', n_jobs=-1) 

# # Fit the classifier using the smaller training set
# voting_classifier.fit(X_train_small, np.argmax(y_train_small, axis=1))

# # Predict te test data
# test_predictions_ensemble = voting_classifier.predict(X_test_flat)

In [10]:
# class_report_ensemble = classification_report(np.argmax(y_test, axis=1), test_predictions_ensemble)
# print("Classification Report (Ensemble): \n", class_report_ensemble)

<img src='class_report_ensemble.jpg' alt='classifier report img'>

## Neural Network Model Design
• Design and implement multiple neural network architectures for handwritten digit recognition.

In [11]:
from tensorflow import keras
from tensorflow.keras import layers

### Multi-Layer Perceptron (MLP)

In [None]:
#Creating the model MLP 
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[28, 28]))
model.add(keras.layers.Dense(300, activation="relu"))
model.add(keras.layers.Dense(100, activation="relu"))
model.add(keras.layers.Dense(10, activation="softmax"))


### Deep Convolutional Neural Network (CNN)

In [None]:
##Creating the model deep CNN 
model_deep_cnn = keras.Sequential([
    layers.Conv2D(32, kernel_size=(3, 3), activation="relu", input_shape=(28, 28, 1)),
    layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Dropout(0.25),
    layers.Flatten(),
    layers.Dense(128, activation="relu"),
    layers.Dropout(0.5),
    layers.Dense(10, activation="softmax")
])

In [14]:
# Compile the models
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
model_deep_cnn.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

In [15]:
# Train the models
model.fit(X_train, y_train, batch_size=128, epochs=10, validation_data=(X_val, y_val))

Epoch 1/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 7ms/step - accuracy: 0.8462 - loss: 0.5461 - val_accuracy: 0.9557 - val_loss: 0.1495
Epoch 2/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.9641 - loss: 0.1256 - val_accuracy: 0.9707 - val_loss: 0.1005
Epoch 3/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.9785 - loss: 0.0744 - val_accuracy: 0.9714 - val_loss: 0.0905
Epoch 4/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9843 - loss: 0.0542 - val_accuracy: 0.9739 - val_loss: 0.0815
Epoch 5/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9890 - loss: 0.0381 - val_accuracy: 0.9762 - val_loss: 0.0771
Epoch 6/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9930 - loss: 0.0246 - val_accuracy: 0.9753 - val_loss: 0.0825
Epoch 7/10
[1m375/375[0m 

<keras.src.callbacks.history.History at 0x1da267ff290>

In [16]:
model_deep_cnn.fit(X_train, y_train, batch_size=128, epochs=10, validation_data=(X_val, y_val))

Epoch 1/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 79ms/step - accuracy: 0.8259 - loss: 0.5446 - val_accuracy: 0.9807 - val_loss: 0.0625
Epoch 2/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 68ms/step - accuracy: 0.9707 - loss: 0.1052 - val_accuracy: 0.9844 - val_loss: 0.0508
Epoch 3/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 69ms/step - accuracy: 0.9763 - loss: 0.0779 - val_accuracy: 0.9882 - val_loss: 0.0406
Epoch 4/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 72ms/step - accuracy: 0.9829 - loss: 0.0558 - val_accuracy: 0.9886 - val_loss: 0.0385
Epoch 5/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 171ms/step - accuracy: 0.9851 - loss: 0.0495 - val_accuracy: 0.9898 - val_loss: 0.0361
Epoch 6/10
[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 86ms/step - accuracy: 0.9870 - loss: 0.0411 - val_accuracy: 0.9896 - val_loss: 0.0380
Epoch 7/10
[1m

<keras.src.callbacks.history.History at 0x1da265a3a40>

### regularization techniques.

In [17]:
from tensorflow.keras.regularizers import l2

# Creating the model MLP
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[28, 28]))
model.add(keras.layers.Dense(300, activation="relu"))
model.add(keras.layers.Dropout(0.5))  # Dropout regularization with 50% dropout 
model.add(keras.layers.Dense(100, activation="relu"))
model.add(keras.layers.Dense(10, activation="softmax"))

## Hyperparameter Tuning and Overfitting Mitigation

In [18]:
# Creating the model deep CNN
model_deep_cnn = keras.Sequential([
    layers.Conv2D(32, kernel_size=(3, 3), activation="relu", input_shape=(28, 28, 1)),
    layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Dropout(0.25),  # Dropout regularization with 25% dropout 
    layers.Flatten(),
    layers.Dense(128, activation="relu", kernel_regularizer=keras.regularizers.l2(0.001)),  # L2 regularization with regularization of 0.001
    layers.Dropout(0.25),  # Dropout regularization with 50% dropout 
    layers.Dense(10, activation="softmax")
])

In [19]:
# Compile the models
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model_deep_cnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the models
model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val))

Epoch 1/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 6ms/step - accuracy: 0.8352 - loss: 0.5376 - val_accuracy: 0.9589 - val_loss: 0.1342
Epoch 2/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - accuracy: 0.9433 - loss: 0.1857 - val_accuracy: 0.9675 - val_loss: 0.1102
Epoch 3/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - accuracy: 0.9550 - loss: 0.1439 - val_accuracy: 0.9686 - val_loss: 0.1020
Epoch 4/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - accuracy: 0.9602 - loss: 0.1226 - val_accuracy: 0.9738 - val_loss: 0.0853
Epoch 5/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - accuracy: 0.9639 - loss: 0.1140 - val_accuracy: 0.9748 - val_loss: 0.0852
Epoch 6/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 10ms/step - accuracy: 0.9673 - loss: 0.1053 - val_accuracy: 0.9762 - val_loss: 0.0790
Epoch 7/10
[

<keras.src.callbacks.history.History at 0x1da392b7770>

In [20]:
model_deep_cnn.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val))

Epoch 1/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 32ms/step - accuracy: 0.8973 - loss: 0.4912 - val_accuracy: 0.9783 - val_loss: 0.1985
Epoch 2/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 27ms/step - accuracy: 0.9699 - loss: 0.2260 - val_accuracy: 0.9805 - val_loss: 0.1840
Epoch 3/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 29ms/step - accuracy: 0.9746 - loss: 0.2063 - val_accuracy: 0.9818 - val_loss: 0.1829
Epoch 4/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 28ms/step - accuracy: 0.9776 - loss: 0.1854 - val_accuracy: 0.9844 - val_loss: 0.1685
Epoch 5/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 28ms/step - accuracy: 0.9777 - loss: 0.1876 - val_accuracy: 0.9847 - val_loss: 0.1593
Epoch 6/10
[1m1500/1500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 28ms/step - accuracy: 0.9781 - loss: 0.1801 - val_accuracy: 0.9847 - val_loss: 0.1628
Epoc

<keras.src.callbacks.history.History at 0x1da2667edb0>

In [21]:
# Evaluate the models 
mlp_test_loss, mlp_test_accuracy = model.evaluate(X_test, y_test)
deep_cnn_test_loss, deep_cnn_test_accuracy = model_deep_cnn.evaluate(X_test, y_test)

print("MLP Test Loss:", mlp_test_loss)
print("MLP Test Accuracy:", mlp_test_accuracy)
print("Deep CNN Test Loss:", deep_cnn_test_loss)
print("Deep CNN Test Accuracy:", deep_cnn_test_accuracy)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9754 - loss: 0.0834
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - accuracy: 0.9860 - loss: 0.1488
MLP Test Loss: 0.06859961897134781
MLP Test Accuracy: 0.979200005531311
Deep CNN Test Loss: 0.1402757614850998
Deep CNN Test Accuracy: 0.9890000224113464


• Apply Principal Component Analysis (PCA) to transform the original data into a lower-dimensional space.

In [57]:
from sklearn.decomposition import PCA

# Reshape X_train to have two dimensions
X_train_2d = np.reshape(X_train, (X_train.shape[0], -1))

# Apply PCA transformation
pca = PCA(n_components=50)  # Specify the desired number of components
X_train_pca = pca.fit_transform(X_train_2d)