In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers,models
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Input, GlobalAveragePooling2D
from tensorflow.keras.utils import to_categorical
from keras.callbacks import ReduceLROnPlateau


In [None]:
#read data sets
df_train = pd.read_csv('\\Users\\malak\\Downloads\\test\\train.csv')
df_test = pd.read_csv('\\Users\\malak\\Downloads\\test\\test.csv')

In [None]:
#number of missing values per column
df_train.isnull().sum()

In [None]:
# np array of all the target labels (0 to 9)
y_train  = df_train['label'].values 
# drop the labels from training to keep only pixel features 
# reshape into 4D array (num_rows, height , width , channel)
# /255.0 to normalize these pixel values to the range [0, 1] 
X_train = df_train.drop(columns=['label']).values.reshape(-1,28,28,1)/255.0
X_test = df_test.values.reshape(-1, 28, 28, 1) / 255.0 

In [None]:
# converts integer labels into a one-hot encoded formatbinary vector
# The length of the vector equals the number of classes
# Only the index corresponding to the class is 1, and all other entries are 0
y_train_encoded = to_categorical(y_train, num_classes=10)

In [None]:
#visualize 10 random images from the training dataset with their labels

#2 rows, 5 columns, 12X5 width x height, 
fig , axes  = plt.subplots(2, 5, figsize=(12,5))

#flatten the 2x5 to loop through subplots
axes   = axes.flatten()

#random select of 10 indexes
idx = np.random.randint(0, 42000, size=10)


for i in range(10):
    #gets image and reshapes
    axes[i].imshow(X_train[idx[i], :].reshape(28,28), cmap='gray')
    # hide the axes ticks
    axes[i].axis('off')
    #displays label
    axes[i].set_title(str(int(y_train[idx[i]])), color='black', fontsize=25)

plt.show()

In [None]:
#Convolutional Neural Network

#Creates a linear stack of layers
#layers are added in sequence, from input to output

model = models.Sequential([
    
    #Conv2D: A convolutional layer that applies filters to extract features (edges, textures,...)
    #filters=64: Specifies the number of filters (feature detectors) to apply
    #kernel_size=3: Sets the size of the filter (3x3)
    #padding='same': assure output of the convolution has the same width and height as the input by adding zero-padding
    #activation='relu': Rectified Linear Unit (ReLU) activation function (non-linear)
    #input_shape=(28,28,1): 28x28 pixels with 1 channel for grayscale
    #MaxPool2D: A pooling layer downsamples the spatial dimensions of the feature maps
    #Reduces computational complexity and prevent overfitting by making the model invariant to small translations in the input
    layers.Conv2D(filters=64, kernel_size=3, padding='same', activation='relu', input_shape=(28,28,1)),
    layers.Conv2D(filters=64, kernel_size=3, padding='same',activation='relu'),
    layers.Conv2D(filters=128, kernel_size=3, padding='same',activation='relu'),
    layers.MaxPool2D(pool_size=2),
    layers.Conv2D(filters=128, kernel_size=3, padding='same',activation='relu'),
    layers.Conv2D(filters=192, kernel_size=3, padding='same',activation='relu'),layers.MaxPool2D(pool_size=2),
    
    #large filter focuses on capturing more global patterns
    layers.Conv2D(filters=192, kernel_size=5, padding='same',activation='relu'),
    layers.MaxPool2D(pool_size=2, padding='same'),
    
    #Flatten: Converts the 2D feature maps into a 1D vector, preparing the data for the fully connected (dense) layers
    #Necessary to transition from convolutional layers to dense layers
    layers.Flatten(),
    #Dense: A fully connected layer, 256 neurons, relu activation function
    layers.Dense(units=256, activation='relu'),
    # Output layer: 10 neurons, one for each class
    # softmax: Converts the outputs into a probability distribution over the 10 classes
    layers.Dense(units=10, activation='softmax'),
])

In [None]:
#display a detailed summary of the architecture of a neural network model
model.summary()

In [None]:
#compiling the model

#loss='categorical_crossentropy' : loss function used for multi-class classification problems where the target labels are one-hot encoded
#calculates the difference between the predicted probability distribution and the true one-hot encoded labels
#optimizer='adam': Adam (Adaptive Moment Estimation) optimization algorithm. It adapts the learning rate during training to improve convergence
#metrics=['accuracy'] : Tracks the accuracy of the model during training and validation
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

#Dynamic Learning Rate Adjustment

#ReduceLROnPlateau: This is a Keras (TF library) callback that reduces the learning rate when the model's performance stops
#monitor='loss': Monitors the training loss to decide when to reduce the learning rate
#factor=0.3: Reduces * 0.3
#verbose=1: displays message when learning rate is reduced
#patience=2: Waits 2 epochs(complete pass through dataset) without improvement in loss before reducing the learning rate
#min_lr=0.00000001:lower limit for learning rate
reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.3, verbose=1,
                              patience=2, min_lr=0.00000001)

#Training the Model

#model.fit(): Trains the model on the training data
#epochs: 25 full passes through dataset
#validation_split=0.1: Reserves 10% of the training data for validation
#ReduceLROnPlateau callback to adjust the learning rate dynamically based on training performance
history = model.fit(
    X_train, y_train_encoded, 
    epochs=25,
    validation_split=0.1, 
    callbacks=[reduce_lr],
)

In [None]:
#Convert Training History to a DataFrame and Plot

#history.history: A dictionary containing training metrics recorded during model.fit()
#pd.DataFrame: Converts the dictionary into a DataFrame, to visualize
history_frame = pd.DataFrame(history.history)
#loss: Training loss for each epoch.
#val_loss: Validation loss for each epoch.
history_frame.loc[: , ['loss', 'val_loss']].plot()
#accuracy: Training accuracy for each epoch.
#val_accuracy: Validation accuracy for each epoch.
history_frame.loc[: , ['accuracy', 'val_accuracy']].plot()

In [None]:
#Predictions using test data
predictions = model.predict(X_test)
predictions

In [None]:
# Convert probability outputs to digit predictions
#Finds the index of the highest probability along the second axis, corresponds to the predicted class label
predicted_labels = np.argmax(predictions, axis=1)

# Pandas DataFrame to organize the predictions in the required submission format
submission = pd.DataFrame({
    #assigns an identifier to each image
    "ImageId": range(1, len(predicted_labels) + 1),
    #stores predicted labels
    "Label": predicted_labels
})

# Save the DataFrame to a CSV file
# index=False: Prevents Pandas from writing row indices to the file
submission.to_csv('submission.csv', index=False)