In [179]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [140]:
train_df = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
test_df = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')

In [141]:
y = train_df['label'].values
X = train_df.drop('label', axis=1).values

## PyTorch implementation

### Prepare dataset

In [142]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [144]:
X_train = torch.tensor(X_train, dtype=torch.float32) / 255.0
X_val = torch.tensor(X_val, dtype=torch.float32) / 255.0
X_test = torch.tensor(test_df.values, dtype=torch.float32) / 255.0

  X_train = torch.tensor(X_train, dtype=torch.float32) / 255.0
  X_val = torch.tensor(X_val, dtype=torch.float32) / 255.0


In [145]:
y_train = torch.tensor(y_train, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)

In [146]:
# Needed for PyTorch, would've been easier to use PyTorch's own MNIST dataset in other circumstances
class MNISTDataset(Dataset):
    def __init__(self, images, labels, transform=None):
        self.images = images
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

In [147]:
train_dataset = MNISTDataset(X_train, y_train)
val_dataset = MNISTDataset(X_val, y_val)

In [148]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

### Prepare model

In [149]:
class MLP(nn.Module):
    def __init__(self, input_size=28*28, hidden_size1=128, hidden_size2=64, num_classes=10):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.fc3 = nn.Linear(hidden_size2, num_classes)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [165]:
model = MLP()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.005)

### Train model

In [166]:
num_epochs = 40

In [167]:
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')

Epoch [1/40], Loss: 1.0898
Epoch [2/40], Loss: 0.5674
Epoch [3/40], Loss: 0.4277
Epoch [4/40], Loss: 0.3743
Epoch [5/40], Loss: 0.3370
Epoch [6/40], Loss: 0.3026
Epoch [7/40], Loss: 0.2752
Epoch [8/40], Loss: 0.2513
Epoch [9/40], Loss: 0.2291
Epoch [10/40], Loss: 0.2098
Epoch [11/40], Loss: 0.1937
Epoch [12/40], Loss: 0.1787
Epoch [13/40], Loss: 0.1643
Epoch [14/40], Loss: 0.1528
Epoch [15/40], Loss: 0.1403
Epoch [16/40], Loss: 0.1322
Epoch [17/40], Loss: 0.1236
Epoch [18/40], Loss: 0.1154
Epoch [19/40], Loss: 0.1090
Epoch [20/40], Loss: 0.1018
Epoch [21/40], Loss: 0.0967
Epoch [22/40], Loss: 0.0923
Epoch [23/40], Loss: 0.0858
Epoch [24/40], Loss: 0.0809
Epoch [25/40], Loss: 0.0773
Epoch [26/40], Loss: 0.0750
Epoch [27/40], Loss: 0.0688
Epoch [28/40], Loss: 0.0672
Epoch [29/40], Loss: 0.0639
Epoch [30/40], Loss: 0.0616
Epoch [31/40], Loss: 0.0564
Epoch [32/40], Loss: 0.0557
Epoch [33/40], Loss: 0.0511
Epoch [34/40], Loss: 0.0506
Epoch [35/40], Loss: 0.0488
Epoch [36/40], Loss: 0.0449
E

### Evaluate model

In [168]:
model.eval()
correct = 0
total = 0

In [169]:
with torch.no_grad():
    for images, labels in val_loader:
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {100 * correct / total:.2f}%')

Accuracy: 96.54%


### Grab submission

In [175]:
test_dataset = TensorDataset(X_test)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [176]:
model.eval()

MLP(
  (fc1): Linear(in_features=784, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=10, bias=True)
  (relu): ReLU()
)

In [177]:
all_predictions = []

with torch.no_grad():
    for inputs in test_loader:
        inputs = inputs[0] 
        outputs = model(inputs)
        _, predictions = torch.max(outputs, 1)
        all_predictions.append(predictions.numpy())

In [180]:
all_predictions = np.concatenate(all_predictions)

predictions_list = all_predictions.tolist()

In [184]:
submission = pd.DataFrame(predictions_list)
submission.index.name='ImageId'
submission.index+=1
submission.columns=['Label']

In [183]:
submission.to_csv('submission.csv', index="ImageId")

### Update code to see overfitting + hyperparameter tuning

In [None]:
'''
model = MLP()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)
'''

In [None]:
'''
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

num_epochs = 100

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0

    for images, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * images.size(0)
        _, predicted = torch.max(outputs, 1)
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    train_loss = running_loss / len(train_loader.dataset)
    train_accuracy = correct_train / total_train

    model.eval()
    val_loss = 0.0
    correct_val = 0
    total_val = 0

    with torch.no_grad():
        for images, labels in val_loader:
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            val_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs, 1)
            total_val += labels.size(0)
            correct_val += (predicted == labels).sum().item()

    val_loss /= len(val_loader.dataset)
    val_accuracy = correct_val / total_val

    train_losses.append(train_loss)
    val_losses.append(val_loss)
    train_accuracies.append(train_accuracy)
    val_accuracies.append(val_accuracy)

    print(f'Epoch {epoch+1}/{num_epochs}')
    print(f'Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}')
    print(f'Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}')
'''

In [None]:
'''
epochs = range(1, num_epochs + 1)

plt.figure(figsize=(12, 5))

# Plot loss
plt.subplot(1, 2, 1)
plt.plot(epochs, train_losses, 'bo-', label='Training Loss')
plt.plot(epochs, val_losses, 'ro-', label='Validation Loss')
plt.title('Loss vs Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

# Plot accuracy
plt.subplot(1, 2, 2)
plt.plot(epochs, train_accuracies, 'bo-', label='Training Accuracy')
plt.plot(epochs, val_accuracies, 'ro-', label='Validation Accuracy')
plt.title('Accuracy vs Epochs')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout()
plt.show()
'''