In [26]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np



In [None]:
# Set training device
if torch.cuda.is_available():
    device = torch.device('cuda')
elif torch.backends.mps.is_available():
    device = torch.device('mps')
else:
    device = torch.device('cpu')

print(device)

In [5]:
# Load MNIST dataset
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz


6.3%

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100.0%


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100.0%
2.0%

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100.0%
100.0%


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz
Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



In [9]:
# Define data loaders
batch_size = 128
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [17]:
# Define LeNet-5 architecture
class LeNet(nn.Module):
    def __init__(self, lr=0.1, num_classes=10):
        super().__init__()
        self.net = nn.Sequential(
            nn.LazyConv2d(6, kernel_size=5, padding=2), nn.Sigmoid(),
            nn.AvgPool2d(kernel_size=2, stride=2),
            nn.LazyConv2d(16, kernel_size=5), nn.Sigmoid(),
            nn.AvgPool2d(kernel_size=2, stride=2),
            nn.Flatten(),
            nn.LazyLinear(120), nn.Sigmoid(),
            nn.LazyLinear(84), nn.Sigmoid(),
            nn.LazyLinear(num_classes))
    

    def forward(self, x):
        x = self.net(x)

        return x

In [10]:
train_iter = iter(train_loader)

In [11]:
print(train_iter)

<torch.utils.data.dataloader._SingleProcessDataLoaderIter object at 0x1571b62f0>


In [21]:
# Training 
model = LeNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum =0.9)
Epochs = 100

for epoch in range(0,Epochs):
    model.train()
    train_iter = iter(train_loader)
    loss_acc = 0

    for idx, (images, labels) in enumerate(train_iter):
        labels = labels.to(device)
        images = images.to(device)
        output = model(images)

        loss = criterion(output, labels)
        loss_acc += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        
    loss_acc /= Epochs
    print('Epoch %d: Loss = %.2f'%(epoch+1, loss_acc))

Epoch 1: Loss = 10.83
Epoch 2: Loss = 10.80
Epoch 3: Loss = 10.80
Epoch 4: Loss = 10.80
Epoch 5: Loss = 10.80
Epoch 6: Loss = 10.80
Epoch 7: Loss = 10.80
Epoch 8: Loss = 10.79
Epoch 9: Loss = 9.95
Epoch 10: Loss = 4.49
Epoch 11: Loss = 1.43
Epoch 12: Loss = 0.85
Epoch 13: Loss = 0.63
Epoch 14: Loss = 0.51
Epoch 15: Loss = 0.43
Epoch 16: Loss = 0.38
Epoch 17: Loss = 0.34
Epoch 18: Loss = 0.31
Epoch 19: Loss = 0.28
Epoch 20: Loss = 0.26
Epoch 21: Loss = 0.24
Epoch 22: Loss = 0.23
Epoch 23: Loss = 0.22
Epoch 24: Loss = 0.21
Epoch 25: Loss = 0.20
Epoch 26: Loss = 0.19
Epoch 27: Loss = 0.17
Epoch 28: Loss = 0.16
Epoch 29: Loss = 0.16
Epoch 30: Loss = 0.16
Epoch 31: Loss = 0.14
Epoch 32: Loss = 0.14
Epoch 33: Loss = 0.13
Epoch 34: Loss = 0.13
Epoch 35: Loss = 0.13
Epoch 36: Loss = 0.13
Epoch 37: Loss = 0.12
Epoch 38: Loss = 0.11
Epoch 39: Loss = 0.10
Epoch 40: Loss = 0.10
Epoch 41: Loss = 0.09
Epoch 42: Loss = 0.09
Epoch 43: Loss = 0.09
Epoch 44: Loss = 0.09
Epoch 45: Loss = 0.08
Epoch 46: L

In [34]:
#Testing

test_epochs = 25
model.eval()
for epoch in range(test_epochs):
    acc_cum = 0
    test_size = 0

    for idx, (images, labels) in enumerate(test_loader):
        images = images.to(device)
        labels = labels.to(device)
        output = model(images.float()).detach()
        output =torch.argmax(output, dim=-1)
        current_correct_num = output == labels
        acc_cum += np.sum(current_correct_num.to('cpu').numpy(), axis=-1)
        test_size += current_correct_num.shape[0]
    acc = acc_cum / test_size
    print('accuracy: %.3f'%(acc))


accuracy: 0.990
accuracy: 0.990
accuracy: 0.990
accuracy: 0.990
accuracy: 0.990
accuracy: 0.990
accuracy: 0.990
accuracy: 0.990
accuracy: 0.990
accuracy: 0.990
accuracy: 0.990
accuracy: 0.990
accuracy: 0.990
accuracy: 0.990
accuracy: 0.990
accuracy: 0.990
accuracy: 0.990
accuracy: 0.990
accuracy: 0.990
accuracy: 0.990
accuracy: 0.990
accuracy: 0.990
accuracy: 0.990
accuracy: 0.990
accuracy: 0.990
