In [1]:
#Import libraries
import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
# 1. Load Data from CSV
train_df = pd.read_csv("/kaggle/input/digit-recognizer/train.csv")

In [3]:
# 2. Preprocess the Data
X = train_df.iloc[:, 1:].values  # Features (all columns except the first one)
y = train_df.iloc[:, 0].values   # Target (first column)

# Optional: Normalize or Standardize the data (important for NN)
scaler = StandardScaler()
scaler.fit(X)
X_scaled = scaler.transform(X)

In [4]:
# 3. Split the Data into Training and Validation Sets
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)  # For classification (long tensor)
y_val_tensor = torch.tensor(y_val, dtype=torch.long)

In [5]:
# 4. Create DataLoaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)

trainloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
valloader = DataLoader(val_dataset, batch_size=64, shuffle=False)

In [6]:
# 5. Define the Neural Network Model (Simple Feedforward NN)
class SimpleNN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(SimpleNN, self).__init__()
        # Define layers
        self.fc1 = nn.Linear(input_dim, 128)  # First hidden layer
        self.fc2 = nn.Linear(128, 64)         # Second hidden layer
        self.fc3 = nn.Linear(64, output_dim)  # Output layer

    def forward(self, x):
        x = torch.relu(self.fc1(x))  # ReLU activation
        x = torch.relu(self.fc2(x))  # ReLU activation
        x = self.fc3(x)  # Output layer (no activation)
        return x

In [9]:
# 6. Initialize Model, Loss Function, and Optimizer
input_dim = X_train.shape[1]  # Number of features in the dataset
output_dim = len(train_df.iloc[:, 0].unique())  # Number of classes for classification (unique labels)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SimpleNN(input_dim, output_dim).to(device)  # Move model to GPU
criterion = nn.CrossEntropyLoss()  # Loss function for multi-class classification
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer


In [10]:
# 7. Training Loop
num_epochs = 10

for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in trainloader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()  # Zero the gradients
        outputs = model(inputs)  # Forward pass
        loss = criterion(outputs, labels)  # Compute the loss

        loss.backward()  # Backpropagation
        optimizer.step()  # Update weights

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)  # Get predicted class
        total += labels.size(0)
        correct += (predicted == labels).sum().item()  # Count correct predictions

    # Print statistics for this epoch
    epoch_loss = running_loss / len(trainloader)
    epoch_accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.2f}%")


Epoch [1/10], Loss: 0.3170, Accuracy: 90.73%
Epoch [2/10], Loss: 0.1146, Accuracy: 96.50%
Epoch [3/10], Loss: 0.0720, Accuracy: 97.77%
Epoch [4/10], Loss: 0.0543, Accuracy: 98.31%
Epoch [5/10], Loss: 0.0380, Accuracy: 98.74%
Epoch [6/10], Loss: 0.0304, Accuracy: 99.02%
Epoch [7/10], Loss: 0.0233, Accuracy: 99.21%
Epoch [8/10], Loss: 0.0232, Accuracy: 99.27%
Epoch [9/10], Loss: 0.0142, Accuracy: 99.52%
Epoch [10/10], Loss: 0.0181, Accuracy: 99.45%


In [11]:
# 8. Evaluation on Test Set
model.eval()  # Set the model to evaluation mode
correct = 0
total = 0

with torch.no_grad():  # Disable gradient calculation during evaluation
    for inputs, labels in valloader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

test_accuracy = 100 * correct / total
print(f"Test Accuracy: {test_accuracy:.2f}%")

Test Accuracy: 96.36%


In [12]:
# 9. Output the submission file
test_df= pd.read_csv("/kaggle/input/digit-recognizer/test.csv")#load the test data
test_scaled=scaler.transform(test_df)#Scale the data
test_tensor= torch.tensor(test_scaled, dtype=torch.float32) #Convert the into tensor
test_tensor = test_tensor.to(device)

with torch.no_grad():  # Disable gradient calculations (no need for backprop during inference)
    outputs = model(test_tensor)  # Get raw model outputs (logits)
    _, predicted_classes = torch.max(outputs, 1)  # Get the predicted class labels
    

submissions= pd.DataFrame({'Imageid': range(1, 28001)})
predictions= predicted_classes = predicted_classes.cpu().numpy()
submissions['Label']= predictions
submissions.to_csv('submission.csv', index=None)


