1. Saving a model: We can save a modle using pytorch `torch.save` method. We can also save the ***leraned parameter*** in an internal state dictionary called `state_dict()`. ***`Syntax: torch.save(model.state_dict(), "data/model.pth")`.***
2. load the model: To load the model, we will define the model class which contains the state and parameters of the neural network used to train the model.When loading model weights, we needed to instantiate the model class first, because the class defines the structure of a network. Next, we load the parameters using the `load_state_dict()` method.

In [27]:
# import necessay modules
import torch
import torchinfo
import torch.nn.functional as F  # Parameterless functions, like (some) activation functions
import torchvision.datasets as datasets  # Standard datasets
from torch.utils.data import Dataset, DataLoader # Gives easier dataset managment by creating mini batches etc.
import torchvision.transforms as transforms  # Transformations we can perform on our dataset for augmentation
from torch import optim  # For optimizers like SGD, Adam, etc.
from torch import nn  # All neural network modules
from tqdm import tqdm # for nice 

In [4]:
# device config
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu') # 

In [5]:
# create CNN layer:
class CNNNet(nn.Module):
    def __init__(self, in_channels =1, num_classes=10):
        super(CNNNet, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels= 8, kernel_size=(3,3),stride=(1,1), padding=(1,1))
        self.pool = nn.MaxPool2d(kernel_size=(2,2), stride=(2,2))
        self.conv2 = nn.Conv2d(in_channels = 8, out_channels= 16, kernel_size=(3,3),stride=(1,1), padding=(1,1))
        self.fc1 = nn.Linear(16*7*7, num_classes)
    
    def forward(self,x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc1(x)

        return x


In [19]:
# Hyperparameter
in_channel = 1 # for minist 28*28
num_classes = 10 # 0-9
learning_rate = 0.001
batch_size = 64
num_epochs = 3
load_model = True

In [7]:
# Load Data
train_dataset = datasets.MNIST(
    root="./data", train=True, transform=transforms.ToTensor(), download=True
)
test_dataset = datasets.MNIST(
    root="./data", train=False, transform=transforms.ToTensor(), download=True
)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)


[notice] A new release of pip is available: 23.1.2 -> 23.2.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Collecting torchinfo
  Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.8.0


In [37]:
# Initialize network
model = CNNNet().to(device)
print(torchinfo.summary(model))

Layer (type:depth-idx)                   Param #
CNNNet                                   --
├─Conv2d: 1-1                            80
├─MaxPool2d: 1-2                         --
├─Conv2d: 1-3                            1,168
├─Linear: 1-4                            7,850
Total params: 9,098
Trainable params: 9,098
Non-trainable params: 0


In [9]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [16]:
def save_checkpoint(state, file_name="checkpoints/my_checkpoint.pth.tar"):
    print("=>saving the checkpoint")
    torch.save(state, file_name)

In [20]:
def load_checkpoint(model, checkpoint):
    print("=> loading checkpoint:: ")
    model.load_state_dict(checkpoint['state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer'])

In [22]:
# Train Network
def train_network(num_epochs, loader, model, device, loss_fn, optimizer):
    if load_model:
        load_checkpoint(model, torch.load("checkpoints/my_checkpoint.pth.tar"))
    
    for epoch in range(num_epochs):
        losses = []
        check_point = {'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}
        if(epoch==2):
            save_checkpoint(check_point)
        
        for batch_idx, (data, targets) in enumerate(tqdm(loader)):
            # Get data to cuda if possible
            data = data.to(device=device)
            targets = targets.to(device=device)

            # Forward
            scores = model(data)
            loss = loss_fn(scores, targets)
            losses.append(loss.item())

            # Backward
            optimizer.zero_grad()
            loss.backward()

            # Gradient descent or adam step
            optimizer.step()
            
        mean_loss = sum(losses)/len(losses)
        print(f"loss at each epoch {mean_loss:.5f}")
            
train_network(num_epochs, train_loader, model, device, criterion, optimizer)

=> loading checkpoint:: 


  0%|          | 1/938 [00:00<02:36,  5.99it/s]

100%|██████████| 938/938 [00:21<00:00, 43.52it/s]


loss at each epoch 0.02685


100%|██████████| 938/938 [00:18<00:00, 49.95it/s]


loss at each epoch 0.02519
=>saving the checkpoint


100%|██████████| 938/938 [00:18<00:00, 49.50it/s]

loss at each epoch 0.02254





In [23]:
train_network(num_epochs, train_loader, model, device, criterion, optimizer)

=> loading checkpoint:: 


100%|██████████| 938/938 [00:21<00:00, 43.23it/s]


loss at each epoch 0.02301


100%|██████████| 938/938 [00:18<00:00, 49.49it/s]


loss at each epoch 0.02069
=>saving the checkpoint


100%|██████████| 938/938 [00:19<00:00, 49.06it/s]

loss at each epoch 0.01947





In [28]:
# Check accuracy on training & test to see how good our model
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()

    # We don't need to keep track of gradients here so we wrap it in torch.no_grad()
    with torch.no_grad():
        # Loop through the data
        for x, y in loader:

            # Move data to device
            x = x.to(device=device)
            y = y.to(device=device)

            # Forward pass
            scores = model(x)
            _, predictions = scores.max(1)

            # Check how many we got correct
            num_correct += (predictions == y).sum()

            # Keep track of number of samples
            num_samples += predictions.size(0)

    model.train()
    return num_correct / num_samples

In [29]:
# Check accuracy on training & test to see how good our model
print(f"Accuracy on training set: {check_accuracy(train_loader, model)*100:.2f}")
print(f"Accuracy on test set: {check_accuracy(test_loader, model)*100:.2f}")

Accuracy on training set: 98.05
Accuracy on test set: 98.00
