In [16]:
# based on tutorial: https://www.youtube.com/watch?v=Jy4wM2X21u0
# imports
import torch # entire lib
import torch.nn as nn # all neuronetwork modules
import torch.optim as optim # optimization algorithms
import torch.nn.functional as F # activation functions
from torch.utils.data import DataLoader # dataset management
import torchvision.datasets as datasets # standard datasets (for easy importing)
import torchvision.transforms as transforms # transformations to perform on a dataset

In [17]:
# create fully connected network
class NN(nn.Module): # inherit from nn.Module
    def __init__(self, input_size, num_classes): # input size: 28*28=784 nodes
        super(NN, self).__init__() # super calls the initialization method of the parent class
        self.fc1 = nn.Linear(input_size, 50)
        self.fc2 = nn.Linear(50, num_classes) # hidden layer of 50 nodes (torch.nn.Linear(in_features, out_features, bias=True, device=None, dtype=None))

    def forward(self, x): # will run on some input x
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = NN(784, 10) # 10 for the number of digits (classes 0-9)
x = torch.randn(64, 784) # 64 - batch size (number of images to run simultaneously)
# torch.randn(*size, *, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False) → Tensor
# returns a tensor filled with random numbers from a normal distribution with mean 0 and variance 1 (also called the standard normal distribution).

print(model(x).shape) # must be 64 by 10

torch.Size([64, 10])


In [18]:
# set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [19]:
# hyperparameters
input_size = 784
num_classes = 10
learning_rate = 0.001
batch_size = 64
num_epochs = 10

In [20]:
# load data
train_dataset = datasets.MNIST(root='dataset/', train=True, transform=transforms.ToTensor(), download=True) # 'dataset/' - directory to keep datasets
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) # shuffle makes sure we don't have same images in a batch for every epoch
test_dataset = datasets.MNIST(root='dataset/', train=False, transform=transforms.ToTensor(), download=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

In [21]:
# initialize network
model = NN(input_size=input_size, num_classes=num_classes).to(device)

In [22]:
# loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = learning_rate)

In [25]:
# train network
for epoch in range(num_epochs): # 1 epoch means that network has seen all the images in the dataset
    for batch_idx, (data, targets) in enumerate(train_loader): # we go for each batch we have in train loader, data - images, targets - correct labels for each image
        # get data to cuda if possible
        data = data.to(device=device) # to - returns a Tensor with the specified device and (optional) dtype
        targets = targets.to(device=device)
        
        # get to correct shape (unroll matrix to a long vector: 28x28 to 28*28=784), '-1' flatten all to single dimension
        data = data.reshape(data.shape[0], -1) # from torch.Size([64, 1, 28, 28]) to torch.Size([64, 784]) (1 here is channel (0 or 1 for pixel not filled or filled))
        
        # forward
        scores = model(data)
        loss = criterion(scores, targets)
        
        # backward
        optimizer.zero_grad() # set all gradients to zero for each batch (so it doesn't store calc from previous props)
        loss.backward() # here we update the weights
        
        # gradient descent or adam step
        optimizer.step()
print('Train finished')

Train finished


In [27]:
# check accuracy on training and test to see how good is our model
def check_accuracy(loader, model):
    if loader.dataset.train:
        print('Checking accuracy on training data')
    else:
        print('Checking accuracy on test data')
    
    num_correct = 0
    num_samples = 0
    model.eval() # you can call either model.eval() or model.train(mode=False) to tell that you are testing
    
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)
            x = x.reshape(x.shape[0], -1)
            
            scores = model(x)
            # shape of scores 64 * 10
            _, predictions = scores.max(1) # index of a value
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0) # torch.Size([64, 1, 28, 28]) -> 64
        
        print(f'Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}')
    
    model.train()

check_accuracy(train_loader, model)
check_accuracy(test_loader, model) # 97.26

Checking accuracy on training data
Got 59679 / 60000 with accuracy 99.47
Checking accuracy on test data
Got 9726 / 10000 with accuracy 97.26
