In [2]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

BATCH_SIZE = 128
EPOCHS = 10
LEARNING_RATE = 0.001

## Mnist dataset traininig

### Download dataset

In [4]:
# Download dataset

def download_mnist_datasets():
    # Dataset MNIST class is concrete implementation of dataset class that comes with Pytorch.
    # Dataset class allows us to store our data in orderly manner (labels, annoteations, images, etc.) and then use them in our model to train.
    train_data = datasets.MNIST(
        # Where to store the dataset, which we downloading -> in a new folder "data" in working dir.
        root="data",
        # If the download has not beed downloaded yet, download it.
        download=True,
        # we are intereseted in the training set.
        train=True,
        # transform the data to a tensor.
        transform=ToTensor(),
    )
    validation_data = datasets.MNIST(
    root="data",
    download=True,
    # we are not intereseted in the training set.
    train=False,
    transform=ToTensor(),
)
    return train_data, validation_data

### Data loader 

In [8]:
if __name__ == "__main__":
    #Download mnist dataset
    train_data, _ = download_mnist_datasets()
    print("Mnist dataset downloaded")
    
    ## Create Data loader :
    # Data loader - Class that we can use to wrap a dataset and fetch (load) data in batches
    # Data loader is an iterable object, allows to load datasets that are queit heavy and not fit in memory.

    # Pass the datasetand batch size
    train_data_loader = DataLoader(train_data, batch_size=BATCH_SIZE)

Mnist dataset downloaded


### Create the model

In [13]:
#Build the model:

# Models that are created with Pytorch are subclasses of nn.Module class.
# A model is a subclass of nn.Module class, when we create a model we inherit from nn.Module class and override the __init__ and forward() methods.

# Out NN class:
class FeedForwardNet(nn.Module):
    # In Pytorch we need to define constructor (__init__ method) and forward method.
    
    #constructor:
    def __init__(self):
        #super() - allows us to use all the methods and properties of the parent class (nn.Module).
        super(FeedForwardNet ,self).__init__()
        # Store all the leyers as attributes of the class.
        # nn.Flatten - layer that converts the input into 1D tensor.
        self.flatten = nn.Flatten()
        # nn.Sequential() - container for multiple layers, that are executed sequentially.
        self.dense_layers = nn.Sequential(
            nn.Linear(28*28, 256), # 1st layer - Linear = Dense, 28*28 = input size, 256 = output size.
            nn.ReLU(), # Activation function.
            nn.Linear(256, 10) # 2nd layer - Linear = Dense, 256 = input size, 10 = output size.
        )
        self.softmax = nn.Softmax(dim=1) # Transforms the output into probability distribution . Kind of normalization.
    
    # Defining forward method
    # Forward method indicates Pytroch in what sequence to process the data.
    def forward(self, input_data):
        flattened_data = self.flatten(input_data)
        logits = self.dense_layers(flattened_data)
        predictions = self.softmax(logits)
        return predictions

### Build Model

In [15]:
# Build model
# Create an instance of the model:
if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"

feed_forward_net = FeedForwardNet().to(device)

### Train Function

In [17]:
# Train model:

def train_one_epoch(model, data_loader, loss_fn, optimizer, device):
    # cerate a loop that will iterate over the data loader
    for inputs, targets in data_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        
        # calcualte loss
        predictions = model(inputs)
        loss = loss_fn(predictions, targets)
        
        # backpropagate loss and update weights
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(f"Loss: {loss.item()}")
               
def train(model, data_loader, loss_fn, optimizer, device, epochs):
    for i in range(epochs):
        print(f"Epoch {i+1}")
        train_one_epoch(model, data_loader, loss_fn, optimizer, device)
        print("---------------------------")
    print("Finished training")

In [23]:
#Instantuiate optimizer and loss function

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(feed_forward_net.parameters(), lr=LEARNING_RATE)

### Training the model

In [24]:
train(feed_forward_net, train_data_loader, loss_fn, optimizer, device, EPOCHS)

# Saves model .state_dict() - method that returns the state of the model as a dictionary.
torch.save(feed_forward_net.state_dict(), "feedforwardnet.pth")

print("Model trained and saved")

Epoch 1
Loss: 1.5129016637802124
---------------------------
Epoch 2
Loss: 1.5009535551071167
---------------------------
Epoch 3
Loss: 1.5004338026046753
---------------------------
Epoch 4
Loss: 1.4847851991653442
---------------------------
Epoch 5
Loss: 1.4795503616333008
---------------------------
Epoch 6
Loss: 1.4752353429794312
---------------------------
Epoch 7
Loss: 1.4729794263839722
---------------------------
Epoch 8
Loss: 1.4723771810531616
---------------------------
Epoch 9
Loss: 1.4725545644760132
---------------------------
Epoch 10
Loss: 1.4722226858139038
---------------------------
Finished training
Model trained and saved


## Inference making predictions

### Prediction and mapping function

In [25]:
class_mapping = [
    "0",
    "1",
    "2",
    "3",
    "4",
    "5",
    "6",
    "7",
    "8",
    "9"
]

def predict(model, input, target, class_mapping):
    # eval is method that changes how the pytorch model behaves , when activatted it will disable dropout and batch normalization.
    model.eval()
    # context manager to disable calculation of gradient descent
    with torch.no_grad():
        predictions = model(input)
        # 2D tensors (1 num of inputs, 10 num of classes) -> [[0.1, 0.01, ... , 0.6]]
        predicted_index = predictions[0].argmax(0)
        predicted = class_mapping[predicted_index]
        expected = class_mapping[target]
    return predicted, expected

### Running Inference

In [29]:
if __name__ =="__main__":
    # load back the model
    feed_forward_net = FeedForwardNet()
    state_dict = torch.load("feedforwardnet.pth")
    feed_forward_net.load_state_dict(state_dict)
    
    # load MNIST validation dataset
    _, validation_data = download_mnist_datasets()
    
    # get a sample from the validation dataset for inference
    # get the first item and target from the validation dataset
    input, target = validation_data[7][0], validation_data[7][1]
    
    # make an inference
    predicted, expected = predict(feed_forward_net, input, target, class_mapping)
    
    print(f"Predicted: '{predicted}', expected: '{expected}'")

Predicted: '9', expected: '9'
