In [69]:
import numpy as np
import matplotlib.pyplot as plt
import warnings
import torch
from torchvision.datasets import MNIST
from torchvision import transforms
from torch.utils.data import DataLoader
from torch import nn
from torch.nn import functional as F
from google.colab import drive
warnings.filterwarnings("ignore")
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [70]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

In [71]:
train_data = MNIST(root='./data', train=True, download=True, transform=transform)
test_data = MNIST(root='./data', train=False, download=True, transform=transform)

In [72]:
VALIDATION = 0.2
BATCH_SIZE = 64
EPOCHS = 10

In [73]:
idx = np.arange(len(train_data))
np.random.shuffle(idx)
split = int(np.floor(VALIDATION * len(train_data)))
train_idx, validation_idx = idx[split:], idx[:split]

train_sample = torch.utils.data.sampler.SubsetRandomSampler(train_idx)
validation_sample = torch.utils.data.sampler.SubsetRandomSampler(validation_idx)

# when use sampler the shuffle is ignored
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, sampler=train_sample)
validation_loader = DataLoader(train_data, batch_size=BATCH_SIZE, sampler=validation_sample)
test_loader = DataLoader(test_data, batch_size=BATCH_SIZE)

In [74]:
for data, target in train_loader:
    print(data.shape, target.shape)
    break

torch.Size([64, 1, 28, 28]) torch.Size([64])


# nn.Sequential:
Below we have defined the Deep Neural Network archietecture with the help of nn.Sequential.

In [101]:
model = nn.Sequential(
    nn.Linear(784, 256),
    nn.ReLU(),
    nn.Dropout(0.2),

    nn.Linear(256, 64),
    nn.ReLU(),
    nn.Dropout(0.2),

    nn.Linear(64, 32),
    nn.ReLU(),
    nn.Dropout(0.2),

    nn.Linear(32, 10)
)

In [102]:
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# GPU Support:
First check that your GPU is working in Pytorch:

In [103]:
print(torch.cuda.is_available())

True


In [104]:
device = torch.device(
    "cuda") if torch.cuda.is_available() else torch.device("cpu")
print(device)

cuda


In [105]:
model.to(device)

Sequential(
  (0): Linear(in_features=784, out_features=256, bias=True)
  (1): ReLU()
  (2): Dropout(p=0.2, inplace=False)
  (3): Linear(in_features=256, out_features=64, bias=True)
  (4): ReLU()
  (5): Dropout(p=0.2, inplace=False)
  (6): Linear(in_features=64, out_features=32, bias=True)
  (7): ReLU()
  (8): Dropout(p=0.2, inplace=False)
  (9): Linear(in_features=32, out_features=10, bias=True)
)

In [107]:
for epoch in range(EPOCHS):
  train_loss, valid_loss = [], []

  model.train()
  for data, target in train_loader:

    data, target = data.to(device), target.to(device)

    data = data.view(data.shape[0], -1)

    optimizer.zero_grad()

    output = model(data)

    loss_value = loss(output, target)

    loss_value.backward()

    optimizer.step()

    train_loss.append(loss_value.item())

  print ("Epoch:", epoch, "Training Loss: ", np.mean(train_loss))

Epoch: 0 Training Loss:  0.24178626283506552
Epoch: 1 Training Loss:  0.20305461174001296
Epoch: 2 Training Loss:  0.18748830536256234
Epoch: 3 Training Loss:  0.16939764230449994
Epoch: 4 Training Loss:  0.15851584869995713
Epoch: 5 Training Loss:  0.14813509132837255
Epoch: 6 Training Loss:  0.14135003662109374
Epoch: 7 Training Loss:  0.13241480752887824
Epoch: 8 Training Loss:  0.13126647907868028
Epoch: 9 Training Loss:  0.12394213820373019


# Save & Load The Model:
As now the model has been trained , we will save the model and load again for future use.

In [108]:
print("printing our model: \n\n", model)


printing our model: 

 Sequential(
  (0): Linear(in_features=784, out_features=256, bias=True)
  (1): ReLU()
  (2): Dropout(p=0.2, inplace=False)
  (3): Linear(in_features=256, out_features=64, bias=True)
  (4): ReLU()
  (5): Dropout(p=0.2, inplace=False)
  (6): Linear(in_features=64, out_features=32, bias=True)
  (7): ReLU()
  (8): Dropout(p=0.2, inplace=False)
  (9): Linear(in_features=32, out_features=10, bias=True)
)


To see the weights and biases of the model

The parameters for PyTorch models are stored in a model's state_dict. state_dict containts the weights & biases of each of the layer , which can be accesed by state_dict().keys().

Below we can see that , every layer's weight and biases have been printed out -

In [109]:
print("Models layer keys: \n\n", model.state_dict().keys())

Models layer keys: 

 odict_keys(['0.weight', '0.bias', '3.weight', '3.bias', '6.weight', '6.bias', '9.weight', '9.bias'])


# Weights and Bias Values

In [110]:
for params, values in model.state_dict().items():
    print(params, ":", values)
    break

0.weight : tensor([[ 0.0349, -0.0043, -0.0057,  ...,  0.0188,  0.0100,  0.0073],
        [-0.0158, -0.0011, -0.0023,  ..., -0.0206,  0.0304, -0.0233],
        [-0.0259, -0.0173,  0.0187,  ...,  0.0158,  0.0196, -0.0012],
        ...,
        [ 0.0149,  0.0209,  0.0136,  ...,  0.0182,  0.0537,  0.0171],
        [ 0.0120, -0.0070, -0.0152,  ...,  0.0267,  0.0261,  0.0423],
        [ 0.0391,  0.0377,  0.0366,  ..., -0.0065,  0.0196,  0.0039]],
       device='cuda:0')


# Model's statedict can be saved using the **torch.save** which also accepts the models name as parameter as - model.pth

In [116]:
torch.save(model.state_dict(), 'model.pth')

# Saved model can also be loaded using the **torch.load()** using the saved model's path

In [117]:
state_dict = torch.load('model.pth')
print(state_dict.keys())

odict_keys(['0.weight', '0.bias', '3.weight', '3.bias', '6.weight', '6.bias', '9.weight', '9.bias'])


# To load the state dict in to the new model, you do **model.load_state_dict(state_dict)**.

In [118]:
model.load_state_dict(state_dict)

<All keys matched successfully>

# **Important Note**: Loading the state dict will work only if the new model architecture is exactly the same as the saved's model's architecture