In [1]:
import numpy as np
import pandas as pd
import torch
from torch import nn
from torchsummary import summary
device = "cpu"

In [2]:
data = pd.read_csv('train.csv')
xtrain = data.copy()
ytrain = xtrain.pop('label')
xtrain = xtrain.to_numpy(dtype=np.float64)

In [3]:
class NeuralNetwork(nn.Module):
    def __init__(self, *args, **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.flatten = nn.Flatten()

        self.conv_stack_1 = nn.Sequential(
            nn.Conv2d(1, 6, kernel_size=5, stride=1, padding=0),
            nn.BatchNorm2d(6),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2)
        )

        self.conv_stack_2 = nn.Sequential(
            nn.Conv2d(6, 16, kernel_size=5, stride=1, padding=0),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 2, stride = 2)
        )
        self.linear_stack = nn.Sequential(
            nn.Linear(256, 120),
            nn.ReLU(),
            nn.Linear(120, 84),
            nn.ReLU(),
            nn.Linear(84, 10)
        )

    def forward(self, x):
        x = self.conv_stack_1(x)
        x = self.conv_stack_2(x)
        x = x.reshape(x.size(0), -1)
        x = self.linear_stack(x)
        return x

In [4]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (conv_stack_1): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    (1): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv_stack_2): Sequential(
    (0): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (linear_stack): Sequential(
    (0): Linear(in_features=256, out_features=120, bias=True)
    (1): ReLU()
    (2): Linear(in_features=120, out_features=84, bias=True)
    (3): ReLU()
    (4): Linear(in_features=84, out_features=10, bias=True)
  )
)


In [5]:
xtrain = torch.from_numpy(xtrain.reshape((-1, 1, 28, 28))).float()
ytrain = torch.from_numpy(ytrain.to_numpy()).long()

In [6]:
loss_fn = nn.CrossEntropyLoss()
opt_fn = torch.optim.Adam(model.parameters(), lr=0.001)

In [22]:
def acc(pred, true):
    return np.sum(np.argmax(pred.detach().numpy(), axis=1) == true.detach().numpy())/len(true)

for epoch in range(100):

    model.train(True)

    opt_fn.zero_grad()

    outputs = model(xtrain)
    loss = loss_fn(outputs, ytrain)
    loss.backward()
    opt_fn.step()

    model.train(False)    

    print(f"EPOCH {epoch + 1}:\tLOSS: {loss}\t\t Training Accuracy: {acc(outputs, ytrain)}")

EPOCH 1:	LOSS: 0.3345722556114197		 Training Accuracy: 0.9084285714285715
EPOCH 2:	LOSS: 0.31677189469337463		 Training Accuracy: 0.9114523809523809
EPOCH 3:	LOSS: 0.3008989691734314		 Training Accuracy: 0.9150714285714285
EPOCH 4:	LOSS: 0.28667858242988586		 Training Accuracy: 0.9185714285714286
EPOCH 5:	LOSS: 0.2738930284976959		 Training Accuracy: 0.9216904761904762


KeyboardInterrupt: 

In [20]:
np.sum(np.argmax(outputs.detach().numpy(), axis=1) == ytrain.detach().numpy())

35677