In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.datasets as datasets

from LeNet5 import LeNet5

In [2]:
input_transform = torchvision.transforms.Compose([
    torchvision.transforms.Resize((32,32)),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.1307,), (0.3081,))
])

In [3]:
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=input_transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=input_transform)
print(len(train_dataset))
print(len(test_dataset))

60000
10000


In [4]:
batch_size = 64

train_loader = torch.utils.data.DataLoader(dataset = train_dataset,
                                           batch_size = batch_size,
                                           shuffle = True)


test_loader = torch.utils.data.DataLoader(dataset = test_dataset,
                                           batch_size = batch_size,
                                           shuffle = True)

In [5]:
print ('==>>> total trainning batch number: {}'.format(len(train_loader)))
print ('==>>> total testing batch number: {}'.format(len(test_loader)))

==>>> total trainning batch number: 938
==>>> total testing batch number: 157


In [34]:
examples = enumerate(test_loader)
batch_idx, (example_data, example_targets) = next(examples)
print(example_data[0].shape)
print(example_data[0][0])

torch.Size([1, 32, 32])
tensor([[-0.4242, -0.4242, -0.4242,  ..., -0.4242, -0.4242, -0.4242],
        [-0.4242, -0.4242, -0.4242,  ..., -0.4242, -0.4242, -0.4242],
        [-0.4242, -0.4242, -0.4242,  ..., -0.4242, -0.4242, -0.4242],
        ...,
        [-0.4242, -0.4242, -0.4242,  ..., -0.4242, -0.4242, -0.4242],
        [-0.4242, -0.4242, -0.4242,  ..., -0.4242, -0.4242, -0.4242],
        [-0.4242, -0.4242, -0.4242,  ..., -0.4242, -0.4242, -0.4242]])


In [35]:
print(torch.max(example_data[0][0]))
print(torch.min(example_data[0][0]))

tensor(2.8215)
tensor(-0.4242)


In [None]:
print((0 - 0.1307) /  0.3081)
print((1 - 0.1307) /  0.3081)

In [19]:
model = LeNet5()
model

LeNet5(
  (layer1): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    (1): Tanh()
    (2): AvgPool2d(kernel_size=2, stride=2, padding=0)
  )
  (layer2): Sequential(
    (0): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (1): Tanh()
    (2): AvgPool2d(kernel_size=2, stride=2, padding=0)
  )
  (fc1): Sequential(
    (0): Linear(in_features=400, out_features=120, bias=True)
    (1): Tanh()
  )
  (fc2): Sequential(
    (0): Linear(in_features=120, out_features=84, bias=True)
    (1): Tanh()
  )
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [8]:
# Define relevant variables for the ML task
num_classes = 10
learning_rate = 0.001
num_epochs = 10

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = LeNet5(num_classes).to(device)

#Setting the loss function
cost = nn.CrossEntropyLoss()

#Setting the optimizer with the model parameters and learning rate
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

#this is defined to print how many steps are remaining when training
total_step = len(train_loader)

In [9]:
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):  
        images = images.to(device)
        labels = labels.to(device)
        
        #Forward pass
        outputs = model(images)
        loss = cost(outputs, labels)
        	
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        		
        if (i+1) % 400 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
        		           .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

Epoch [1/10], Step [400/938], Loss: 0.0978
Epoch [1/10], Step [800/938], Loss: 0.0530
Epoch [2/10], Step [400/938], Loss: 0.0601
Epoch [2/10], Step [800/938], Loss: 0.0130
Epoch [3/10], Step [400/938], Loss: 0.0125
Epoch [3/10], Step [800/938], Loss: 0.0186
Epoch [4/10], Step [400/938], Loss: 0.0478
Epoch [4/10], Step [800/938], Loss: 0.0475
Epoch [5/10], Step [400/938], Loss: 0.0217
Epoch [5/10], Step [800/938], Loss: 0.0111
Epoch [6/10], Step [400/938], Loss: 0.0295
Epoch [6/10], Step [800/938], Loss: 0.0187
Epoch [7/10], Step [400/938], Loss: 0.0077
Epoch [7/10], Step [800/938], Loss: 0.0130
Epoch [8/10], Step [400/938], Loss: 0.0013
Epoch [8/10], Step [800/938], Loss: 0.0211
Epoch [9/10], Step [400/938], Loss: 0.0370
Epoch [9/10], Step [800/938], Loss: 0.0022
Epoch [10/10], Step [400/938], Loss: 0.0202
Epoch [10/10], Step [800/938], Loss: 0.0007


In [10]:
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / total))

Accuracy of the network on the 10000 test images: 98.39 %


In [11]:
model.eval()

LeNet5(
  (layer1): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    (1): Tanh()
    (2): AvgPool2d(kernel_size=2, stride=2, padding=0)
  )
  (layer2): Sequential(
    (0): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (1): Tanh()
    (2): AvgPool2d(kernel_size=2, stride=2, padding=0)
  )
  (fc): Sequential(
    (0): Linear(in_features=400, out_features=120, bias=True)
    (1): Tanh()
  )
  (fc1): Sequential(
    (0): Linear(in_features=120, out_features=84, bias=True)
    (1): Tanh()
  )
  (fc2): Linear(in_features=84, out_features=10, bias=True)
)

In [12]:
traced_script_module = torch.jit.trace(model.to("cpu"), torch.rand(1, 1, 32, 32))
traced_script_module.save("model.pt")

In [18]:
traced_script_module = torch.jit.trace(model.to("cuda"), torch.rand(1, 1, 32, 32).to("cuda"))
traced_script_module.save("model_cuda.pt")