In [1]:
#Import libraries
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Input, ReLU, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator


In [2]:
# 1. Load Data from CSV
train_df = pd.read_csv("/kaggle/input/digit-recognizer/train.csv")

In [3]:
# 2. Preprocess the Data
X = train_df.iloc[:, 1:].values  # Features (all columns except the first one)
y = train_df.iloc[:, 0].values   # Target (first column)

X = X.reshape(-1, 28, 28, 1)
X = X/255.0


In [4]:
# 3. Data Augmentation
datagen = ImageDataGenerator(
    rotation_range=40,      # Randomly rotate images by up to 40 degrees
    width_shift_range=0.2,  # Randomly shift images horizontally by 20%
    height_shift_range=0.2, # Randomly shift images vertically by 20%
    zoom_range=0.2,         # Randomly zoom in or out
    fill_mode='nearest' ,   # How to fill in new pixels after transformations
    validation_split=0.2 
)

datagen.fit(X)
# Apply augmentation to the training data and validation split
train_generator = datagen.flow(X, y, batch_size=32, subset='training')
val_generator = datagen.flow(X, y, batch_size=32, subset='validation')

In [5]:
# 4. Define the Convolutionary Neural Network Model
model = Sequential([
    Input(shape=(28, 28, 1)),

    Conv2D(32, (3, 3), padding='same'), # First Convolutional Layer
    ReLU(),  # Apply ReLU after Conv2D
    
    Conv2D(64, (3, 3), padding='same'), # Second Convolutional Layer
    ReLU(),  # Apply ReLU after Conv2D
     
    Conv2D(128, (5, 5), padding='same'), # Third Convolutional Layer
    ReLU(),  # Apply ReLU after Conv2D
    
    Conv2D(128, (7, 7), padding='same'), # Fourth Convolutional Layer
    BatchNormalization(), #Batch normalization
    ReLU(),  # Apply ReLU after Conv2D
    
    MaxPooling2D(pool_size=(2, 2)),  # Max pooling to reduce the spatial dimensions

    # Flatten the 3D outputs to 1D
    Flatten(),
    
    # Dense Layers
    Dense(256),                          # First dense layer
    BatchNormalization(),                # Batch Normalization after the first dense layer
    ReLU(),                              # Activation function after BatchNorm
    
    Dense(128),                          # Second dense layer (256 nodes added)
    ReLU(),                              # Activation function after Dense
    
    Dense(64),                           # Third dense layer
    ReLU(),                              # Activation function after Dense

    Dense(10, activation='softmax')      # Output layer (classification)
])


#5: Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',  # For integer labels
              metrics=['accuracy'])


In [6]:
#6: Train the model
model.fit(train_generator, epochs=20, validation_data=val_generator)


Epoch 1/20


  self._warn_if_super_not_called()
I0000 00:00:1733624735.022317      96 service.cc:145] XLA service 0x7d00ec0086f0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1733624735.022405      96 service.cc:153]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0


[1m  10/1050[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m18s[0m 18ms/step - accuracy: 0.1459 - loss: 2.3883

I0000 00:00:1733624739.154770      96 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m1050/1050[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 21ms/step - accuracy: 0.7021 - loss: 0.8874 - val_accuracy: 0.9460 - val_loss: 0.1811
Epoch 2/20
[1m1050/1050[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 20ms/step - accuracy: 0.9419 - loss: 0.1876 - val_accuracy: 0.9348 - val_loss: 0.2092
Epoch 3/20
[1m1050/1050[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 20ms/step - accuracy: 0.9576 - loss: 0.1410 - val_accuracy: 0.9688 - val_loss: 0.0998
Epoch 4/20
[1m1050/1050[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 20ms/step - accuracy: 0.9654 - loss: 0.1150 - val_accuracy: 0.9617 - val_loss: 0.1262
Epoch 5/20
[1m1050/1050[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 20ms/step - accuracy: 0.9652 - loss: 0.1147 - val_accuracy: 0.9690 - val_loss: 0.1112
Epoch 6/20
[1m1050/1050[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 20ms/step - accuracy: 0.9695 - loss: 0.0993 - val_accuracy: 0.9773 - val_loss: 0.0728
Epoch 7/20
[1m

<keras.src.callbacks.history.History at 0x7d018851fdc0>

In [7]:
#7. Output the submission file
test_df= pd.read_csv("/kaggle/input/digit-recognizer/test.csv") #load the test data

test= test_df.to_numpy()
test= test.reshape(-1, 28 , 28,1)
test = test/255.0

predictions = model.predict(test)
predicted_labels = np.argmax(predictions, axis=1)

submissions= pd.DataFrame({'Imageid': range(1, 28001)})
submissions['Label']= predicted_labels
submissions.to_csv('submission.csv', index=None)

[1m875/875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step
