In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/digit-recognizer/sample_submission.csv
/kaggle/input/digit-recognizer/train.csv
/kaggle/input/digit-recognizer/test.csv


## 1️⃣ Introduction
Handwritten digit recognition is a classic problem in computer vision and machine learning. It involves classifying grayscale images of handwritten digits (0–9) into their respective categories. The MNIST dataset, which consists of 28x28 pixel images of digits, is widely used as a benchmark for this task.

In this project, we use Deep Learning with Convolutional Neural Networks (CNNs) to classify these images. CNNs are powerful models that excel at capturing spatial hierarchies in image data.



## 2️⃣ Understanding the Dataset
The dataset consists of:

Training Data: 42,000 images labeled from 0 to 9 (each image is 28×28 pixels).
Test Data: 28,000 images without labels (to be predicted).
Each image is represented as a grayscale matrix of shape (28,28,1) where each pixel has an intensity between 0 (black) and 255 (white).

## 3️⃣ Convolutional Neural Networks (CNNs) Overview
A CNN is a deep learning model specifically designed for image recognition. It consists of the following layers:

🔹 Convolutional Layer
Extracts patterns (edges, curves, textures) from the image using filters/kernels.
Captures spatial relationships by applying a sliding window operation over the image.
🔹 Activation Function (ReLU - Rectified Linear Unit)
Introduces non-linearity to the model.
Converts negative values to zero to maintain positive activations.
🔹 Pooling Layer (MaxPooling)
Reduces dimensionality while retaining important features.
Helps in making the model translation-invariant.
🔹 Fully Connected (Dense) Layers
Flattens the output and connects it to Dense layers.
The final layer contains 10 neurons (one for each digit 0-9) with a softmax activation function.
🔹 Dropout Layer
Prevents overfitting by randomly turning off some neurons during training.
## 4️⃣ Data Preprocessing & Augmentation
Normalization: Pixel values are scaled between 0 and 1 to speed up training.
Reshaping: Images are reshaped from (28,28) to (28,28,1) to fit CNN input requirements.
Data Augmentation:
Rotation (±10°): Helps the model generalize across different orientations.
Zooming (±10%): Simulates variations in digit sizes.
Shifting (±10%): Accounts for slight positional variations.
## 5️⃣ Model Architecture Used
Our CNN model follows this architecture:

Layer	Type	Filters/Units	Activation	Purpose
Conv2D	Convolution	32 filters (3x3)	ReLU	Extracts features (edges, patterns)
MaxPooling2D	Pooling	(2x2)	-	Reduces spatial dimensions
Conv2D	Convolution	64 filters (3x3)	ReLU	Learns deeper features
MaxPooling2D	Pooling	(2x2)	-	Further reduces dimensions
Flatten	Flatten	-	-	Converts 2D features into a 1D vector
Dense	Fully Connected	128	ReLU	Learns complex representations
Dropout	Regularization	30% neurons off	-	Reduces overfitting
Dense	Fully Connected	64	ReLU	More abstract feature learning
Dense	Output	10 neurons	Softmax	Predicts probability for each digit
## 6️⃣ Training the Model
Loss Function: Sparse Categorical Crossentropy
Optimizer: Adam (Adaptive Moment Estimation)
Metric: Accuracy
Batch Size: 64
Epochs: 20
Data Generator: ImageDataGenerator for augmentation
## 7️⃣ Model Evaluation
We use a train-validation split (90%-10%).
Monitor accuracy and loss curves.
If validation accuracy is low, tweak hyperparameters.
## 8️⃣ Making Predictions
The model predicts on test images.
Converts one-hot encoded outputs to class labels.
Generates a submission.csv file with predictions.
## 9️⃣ Key Takeaways
✅ CNNs excel in image classification tasks.
✅ Data Augmentation improves generalization.
✅ Dropout & Regularization reduce overfitting.
✅ Hyperparameter tuning can further boost accuracy.

🔹 Next Steps to Improve
✔️ Use Deeper CNNs (ResNet, EfficientNet).
✔️ Implement Learning Rate Schedulers.
✔️ Experiment with Ensemble Models.

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load dataset
train = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
test = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')

# Split features & labels
X = train.drop('label', axis=1).values.reshape(-1, 28, 28, 1) / 255.0  # Normalize
y = train['label'].values

# Train-validation split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, random_state=42, stratify=y)

# Data Augmentation
datagen = ImageDataGenerator(rotation_range=10, zoom_range=0.1, width_shift_range=0.1, height_shift_range=0.1)
datagen.fit(X_train)

# CNN Model
model = Sequential([
    Conv2D(32, (3,3), activation='relu', input_shape=(28,28,1)),
    MaxPooling2D(2,2),
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D(2,2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dense(10, activation='softmax')
])

# Compile Model
model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train Model
history = model.fit(datagen.flow(X_train, y_train, batch_size=64), validation_data=(X_val, y_val), epochs=20)

# Predict on test data
X_test = test.values.reshape(-1, 28, 28, 1) / 255.0
predictions = model.predict(X_test).argmax(axis=1)

# Create submission file
submission = pd.DataFrame({'ImageId': np.arange(1, len(predictions) + 1), 'Label': predictions})
submission.to_csv('submission.csv', index=False)

print(" Submission file saved")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20


  self._warn_if_super_not_called()


[1m591/591[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 41ms/step - accuracy: 0.6774 - loss: 0.9520 - val_accuracy: 0.9586 - val_loss: 0.1283
Epoch 2/20
[1m591/591[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 41ms/step - accuracy: 0.9325 - loss: 0.2221 - val_accuracy: 0.9821 - val_loss: 0.0631
Epoch 3/20
[1m591/591[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 40ms/step - accuracy: 0.9520 - loss: 0.1537 - val_accuracy: 0.9850 - val_loss: 0.0539
Epoch 4/20
[1m591/591[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 41ms/step - accuracy: 0.9597 - loss: 0.1319 - val_accuracy: 0.9843 - val_loss: 0.0503
Epoch 5/20
[1m591/591[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 39ms/step - accuracy: 0.9661 - loss: 0.1112 - val_accuracy: 0.9919 - val_loss: 0.0334
Epoch 6/20
[1m591/591[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 40ms/step - accuracy: 0.9706 - loss: 0.0971 - val_accuracy: 0.9910 - val_loss: 0.0326
Epoch 7/20
[1m591/591[0m 