In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyper-parameters 
input_size = 784
hidden_size = 500
num_classes = 10
num_epochs = 5
batch_size = 100
learning_rate = 0.001

In [2]:
trainset = torchvision.datasets.MNIST(root='./mnist', train=True,
                                        download=True, transform=transforms.ToTensor())
print("train : " + str(len(trainset)) + ' images')

testset = torchvision.datasets.MNIST(root='./mnist', train=False,
                                        download=True, transform=transforms.ToTensor())
print("test : " + str(len(trainset)) + ' images')

# Data loader
trainloader = torch.utils.data.DataLoader(dataset=trainset, 
                                           batch_size=batch_size, 
                                           shuffle=True)

testloader = torch.utils.data.DataLoader(dataset=testset, 
                                          batch_size=batch_size, 
                                          shuffle=False)



train : 60000 images
test : 60000 images


In [3]:
class Net(nn.Module):
    
    #define the learnable paramters by calling the respective modules (nn.Conv2d, nn.MaxPool2d etc.)
    def __init__(self):
        super(Net, self).__init__()
        
        #fully connected layers
        self.fc1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes) 
    
    #defining the structure of the network
    def forward(self, x):
        
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out
                        

model = Net()
if torch.cuda.is_available():
    model = model.cuda()
    
model    

Net(
  (fc1): Linear(in_features=784, out_features=500, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=500, out_features=10, bias=True)
)

In [4]:

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)  


In [5]:
########################################################################
# Train the network
# ^^^^^^^^^^^^^^^^^^^^

def train(epoch, trainloader, optimizer, criterion):
    running_loss = 0.0
    
    for i, (inputs,labels) in enumerate(tqdm(trainloader), 0):
        # get the inputs
        inputs = inputs.reshape(-1, 28*28).to(device)
        labels = labels.to(device)
            
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # addup loss
        running_loss += loss.item()

    print('epoch %d training loss: %.3f' %
            (epoch + 1, running_loss / (len(trainloader))))
    return running_loss / (len(trainloader))   

In [6]:
########################################################################
# Let us look at how the network performs on the test dataset.

def test(testloader, model):
    running_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for (inputs,labels) in tqdm(testloader):
            # get the inputs
            inputs = inputs.reshape(-1, 28*28).to(device)
            labels = labels.to(device)

            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # addup loss
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: %d %%, loss = %f' % (
                                    100 * correct / total, running_loss / len(testloader)))
    return running_loss / len(testloader)


In [7]:
import os
from tqdm import tqdm
print('Start Training')
if not os.path.exists('./models'):
    os.mkdir('./models')

training_losses = []
testing_losses = []
num_epochs = 10


for epoch in range(num_epochs):  # loop over the dataset multiple times
    print('epoch ', epoch + 1)
    train_loss = train(epoch, trainloader, optimizer, criterion)
    test_loss = test(testloader, model)
#     classwise_test(testloader, net)
    
    
    training_losses.append(train_loss)
    testing_losses.append(test_loss)

print('Finished Training')


  1%|▏         | 8/600 [00:00<00:07, 78.57it/s]

Start Training
epoch  1


100%|██████████| 600/600 [00:05<00:00, 109.92it/s]
 12%|█▏        | 12/100 [00:00<00:00, 114.43it/s]

epoch 1 training loss: 0.297


100%|██████████| 100/100 [00:00<00:00, 119.68it/s]
  2%|▏         | 11/600 [00:00<00:05, 103.81it/s]

Accuracy of the network on the 10000 test images: 95 %, loss = 0.145779
epoch  2


100%|██████████| 600/600 [00:05<00:00, 109.05it/s]
 12%|█▏        | 12/100 [00:00<00:00, 114.85it/s]

epoch 2 training loss: 0.117


100%|██████████| 100/100 [00:00<00:00, 111.83it/s]
  2%|▏         | 10/600 [00:00<00:06, 95.28it/s]

Accuracy of the network on the 10000 test images: 96 %, loss = 0.103900
epoch  3


100%|██████████| 600/600 [00:05<00:00, 107.16it/s]
 12%|█▏        | 12/100 [00:00<00:00, 117.00it/s]

epoch 3 training loss: 0.076


100%|██████████| 100/100 [00:00<00:00, 121.26it/s]
  2%|▏         | 11/600 [00:00<00:05, 108.73it/s]

Accuracy of the network on the 10000 test images: 97 %, loss = 0.075954
epoch  4


100%|██████████| 600/600 [00:05<00:00, 110.81it/s]
 12%|█▏        | 12/100 [00:00<00:00, 115.27it/s]

epoch 4 training loss: 0.054


100%|██████████| 100/100 [00:00<00:00, 120.96it/s]
  2%|▏         | 11/600 [00:00<00:05, 103.38it/s]

Accuracy of the network on the 10000 test images: 97 %, loss = 0.072981
epoch  5


100%|██████████| 600/600 [00:05<00:00, 109.59it/s]
 12%|█▏        | 12/100 [00:00<00:00, 115.83it/s]

epoch 5 training loss: 0.040


100%|██████████| 100/100 [00:00<00:00, 120.36it/s]
  2%|▏         | 11/600 [00:00<00:05, 103.85it/s]

Accuracy of the network on the 10000 test images: 97 %, loss = 0.067757
epoch  6


100%|██████████| 600/600 [00:05<00:00, 108.89it/s]
 13%|█▎        | 13/100 [00:00<00:00, 120.70it/s]

epoch 6 training loss: 0.029


100%|██████████| 100/100 [00:00<00:00, 122.71it/s]
  2%|▏         | 11/600 [00:00<00:05, 106.24it/s]

Accuracy of the network on the 10000 test images: 97 %, loss = 0.064904
epoch  7


100%|██████████| 600/600 [00:05<00:00, 105.35it/s]
 12%|█▏        | 12/100 [00:00<00:00, 114.61it/s]

epoch 7 training loss: 0.022


100%|██████████| 100/100 [00:00<00:00, 119.60it/s]
  2%|▏         | 10/600 [00:00<00:05, 99.72it/s]

Accuracy of the network on the 10000 test images: 97 %, loss = 0.064587
epoch  8


100%|██████████| 600/600 [00:05<00:00, 109.07it/s]
 12%|█▏        | 12/100 [00:00<00:00, 112.57it/s]

epoch 8 training loss: 0.017


100%|██████████| 100/100 [00:00<00:00, 120.71it/s]
  2%|▏         | 11/600 [00:00<00:05, 105.25it/s]

Accuracy of the network on the 10000 test images: 98 %, loss = 0.068411
epoch  9


100%|██████████| 600/600 [00:05<00:00, 110.23it/s]
 12%|█▏        | 12/100 [00:00<00:00, 115.31it/s]

epoch 9 training loss: 0.014


100%|██████████| 100/100 [00:00<00:00, 121.15it/s]
  2%|▏         | 11/600 [00:00<00:05, 102.29it/s]

Accuracy of the network on the 10000 test images: 98 %, loss = 0.069623
epoch  10


100%|██████████| 600/600 [00:05<00:00, 110.86it/s]
 12%|█▏        | 12/100 [00:00<00:00, 116.93it/s]

epoch 10 training loss: 0.010


100%|██████████| 100/100 [00:00<00:00, 121.01it/s]

Accuracy of the network on the 10000 test images: 98 %, loss = 0.068435
Finished Training



