In [1]:
#Import libraries
import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [14]:
# 1. Load Data from CSV
train_df = pd.read_csv("/kaggle/input/digit-recognizer/train.csv")

In [15]:
# 2. Preprocess the Data
X = train_df.iloc[:, 1:].values  # Features (all columns except the first one)
y = train_df.iloc[:, 0].values   # Target (first column)

# Optional: Normalize or Standardize the data (important for NN)
scaler = StandardScaler()
scaler.fit(X)
X_scaled = scaler.transform(X)

In [16]:
# 3. Split the Data into Training and Validation Sets
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.1, random_state=42)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)  # For classification (long tensor)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)

In [17]:
# 4. Create DataLoaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

trainloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
valloader = DataLoader(val_dataset, batch_size=64, shuffle=False)

In [22]:
# 5. Define the Convolutionary Neural Network Model
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        
        # First Convolutional Layer: input 1 channel, output 32 channels, kernel size 3x3
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, padding=1)
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # Second Convolutional Layer: input 32 channels, output 64 channels, kernel size 3x3
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        # Fully connected layers
        self.fc1 = nn.Linear(64 * 7 * 7, 128)  # 64 channels, 7x7 feature map size after pooling
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(128, 10)  # Output 10 classes (digits 0-9)
        
    def forward(self, x):
        x = self.pool1(self.relu1(self.conv1(x)))  # Conv1 -> ReLU -> MaxPool1
        x = self.pool2(self.relu2(self.conv2(x)))  # Conv2 -> ReLU -> MaxPool2
        x = x.view(-1, 64 * 7 * 7)  # Flatten the tensor
        x = self.relu3(self.fc1(x))  # Fully connected layer 1 -> ReLU
        x = self.fc2(x)  # Fully connected layer 2 -> Output layer
        return x

In [23]:
# 6. Initialize Model, Loss Function, and Optimizer
input_dim = X_train.shape[1]  # Number of features in the dataset
output_dim = len(train_df.iloc[:, 0].unique())  # Number of classes for classification (unique labels)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleNN(input_dim, output_dim).to(device)  # Move model to GPU
criterion = nn.CrossEntropyLoss()  # Loss function for multi-class classification
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer


In [24]:
# 7. Training Loop
num_epochs = 15

for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in trainloader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()  # Zero the gradients
        outputs = model(inputs)  # Forward pass
        loss = criterion(outputs, labels)  # Compute the loss

        loss.backward()  # Backpropagation
        optimizer.step()  # Update weights

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)  # Get predicted class
        total += labels.size(0)
        correct += (predicted == labels).sum().item()  # Count correct predictions

    # Print statistics for this epoch
    epoch_loss = running_loss / len(trainloader)
    epoch_accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%")


Epoch [1/15], Loss: 0.2927, Accuracy: 91.53%
Epoch [2/15], Loss: 0.1137, Accuracy: 96.56%
Epoch [3/15], Loss: 0.0755, Accuracy: 97.60%
Epoch [4/15], Loss: 0.0526, Accuracy: 98.37%
Epoch [5/15], Loss: 0.0374, Accuracy: 98.80%
Epoch [6/15], Loss: 0.0294, Accuracy: 99.07%
Epoch [7/15], Loss: 0.0236, Accuracy: 99.24%
Epoch [8/15], Loss: 0.0228, Accuracy: 99.30%
Epoch [9/15], Loss: 0.0174, Accuracy: 99.46%
Epoch [10/15], Loss: 0.0239, Accuracy: 99.24%
Epoch [11/15], Loss: 0.0157, Accuracy: 99.53%
Epoch [12/15], Loss: 0.0134, Accuracy: 99.62%
Epoch [13/15], Loss: 0.0272, Accuracy: 99.30%
Epoch [14/15], Loss: 0.0163, Accuracy: 99.47%
Epoch [15/15], Loss: 0.0076, Accuracy: 99.76%


In [25]:
# 8. Evaluation on Test Set
model.eval()  # Set the model to evaluation mode
correct = 0
total = 0

with torch.no_grad():  # Disable gradient calculation during evaluation
    for inputs, labels in valloader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

test_accuracy = 100 * correct / total
print(f"Test Accuracy: {test_accuracy:.2f}%")

Test Accuracy: 97.05%


In [26]:
# 9. Output the submission file
test_df= pd.read_csv("/kaggle/input/digit-recognizer/test.csv")#load the test data
test_scaled=scaler.transform(test_df)#Scale the data
test_tensor= torch.tensor(test_scaled, dtype=torch.float32) #Convert the into tensor
test_tensor = test_tensor.to(device)

with torch.no_grad():  # Disable gradient calculations (no need for backprop during inference)
    outputs = model(test_tensor)  # Get raw model outputs (logits)
    _, predicted_classes = torch.max(outputs, 1)  # Get the predicted class labels
    

submissions= pd.DataFrame({'Imageid': range(1, 28001)})
predictions= predicted_classes = predicted_classes.cpu().numpy()
submissions['Label']= predictions
submissions.to_csv('submission.csv', index=None)


