In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

# Load the data
data = pd.read_csv('data/mnist.csv')

data

Using device: cuda


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,775,776,777,778,779,780,781,782,783,label
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69995,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2
69996,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3
69997,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4
69998,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5


In [38]:
class SimpleNN(torch.nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = torch.nn.Linear(784, 30)
        self.fc2 = torch.nn.Linear(30, 10)
        
    def forward(self, x):
        x = torch.sigmoid(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        return x
    
model = SimpleNN().to(device)

# Loss and optimizer
criterion = torch.nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1)

In [39]:
import tqdm

X = data.drop('label', axis=1)
y = data['label']

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = torch.tensor(X_train.values, dtype=torch.float32).to(device)
X_test = torch.tensor(X_test.values, dtype=torch.float32).to(device)

y_train = pd.get_dummies(y_train).values
y_test = pd.get_dummies(y_test).values

y_train = torch.tensor(y_train, dtype=torch.float32).to(device)
y_test = torch.tensor(y_test, dtype=torch.float32).to(device)

# Training loop
n_epochs = 1000
batch_size = 1000

for epoch in range(n_epochs):
    for i in range(0, len(X_train), batch_size):
        X_batch = X_train[i:i+batch_size]
        y_batch = y_train[i:i+batch_size]
        
        # Forward pass
        outputs = model(X_batch)

        loss = criterion(outputs, y_batch)
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if epoch % 10 == 0:
        print(f'Epoch {epoch+1}/{n_epochs}, Loss: {loss.item()}')


Epoch 1/1000, Loss: 0.08947660028934479
Epoch 11/1000, Loss: 0.07332063466310501
Epoch 21/1000, Loss: 0.04989853501319885
Epoch 31/1000, Loss: 0.03738477826118469
Epoch 41/1000, Loss: 0.030177395790815353
Epoch 51/1000, Loss: 0.025737041607499123
Epoch 61/1000, Loss: 0.02285456843674183
Epoch 71/1000, Loss: 0.020883508026599884
Epoch 81/1000, Loss: 0.019444376230239868
Epoch 91/1000, Loss: 0.01832912303507328
Epoch 101/1000, Loss: 0.017424730584025383
Epoch 111/1000, Loss: 0.016667388379573822
Epoch 121/1000, Loss: 0.01601864956319332
Epoch 131/1000, Loss: 0.01545366458594799
Epoch 141/1000, Loss: 0.014955339953303337
Epoch 151/1000, Loss: 0.014511357992887497
Epoch 161/1000, Loss: 0.014112502336502075
Epoch 171/1000, Loss: 0.013751687481999397
Epoch 181/1000, Loss: 0.013423324562609196
Epoch 191/1000, Loss: 0.0131229218095541
Epoch 201/1000, Loss: 0.012846793979406357
Epoch 211/1000, Loss: 0.012591885402798653
Epoch 221/1000, Loss: 0.012355630286037922
Epoch 231/1000, Loss: 0.01213585

In [40]:
from sklearn.metrics import classification_report

outputs = model(X_test)
_, predicted = torch.max(outputs, 1)

print(classification_report(torch.argmax(y_test, 1).cpu().numpy(), predicted.cpu().numpy()))

              precision    recall  f1-score   support

           0       0.96      0.98      0.97      1343
           1       0.96      0.98      0.97      1600
           2       0.95      0.93      0.94      1380
           3       0.94      0.92      0.93      1433
           4       0.93      0.94      0.94      1295
           5       0.94      0.93      0.93      1273
           6       0.95      0.97      0.96      1396
           7       0.95      0.96      0.95      1503
           8       0.94      0.94      0.94      1357
           9       0.94      0.93      0.93      1420

    accuracy                           0.95     14000
   macro avg       0.95      0.95      0.95     14000
weighted avg       0.95      0.95      0.95     14000

