In [2]:
%matplotlib inline
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
print("Modules loaded")

Modules loaded


In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))

Using cpu device


In [4]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
        nn.Linear(28*28, 512),
        nn.ReLU(),
        nn.Linear(512, 512),
        nn.ReLU(),
        nn.Linear(512, 10),
        nn.ReLU()
    )
    
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [5]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
    (5): ReLU()
  )
)


In [6]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([7])


In [7]:
print(f"First Linear weights: {model.linear_relu_stack[0].weight} \n")
print(f"First Linear weights: {model.linear_relu_stack[0].bias} \n")

First Linear weights: Parameter containing:
tensor([[ 0.0352, -0.0019,  0.0276,  ...,  0.0038, -0.0181,  0.0119],
        [-0.0243, -0.0040,  0.0111,  ...,  0.0047,  0.0204,  0.0301],
        [ 0.0155, -0.0048, -0.0231,  ..., -0.0274, -0.0220,  0.0199],
        ...,
        [ 0.0328, -0.0177,  0.0161,  ..., -0.0144, -0.0251, -0.0011],
        [-0.0026,  0.0220, -0.0269,  ..., -0.0010,  0.0203,  0.0187],
        [-0.0158, -0.0356, -0.0045,  ..., -0.0121, -0.0208, -0.0275]],
       requires_grad=True) 

First Linear weights: Parameter containing:
tensor([-1.4680e-02, -1.5305e-02, -1.2052e-02, -2.0271e-02,  1.1510e-02,
         2.0742e-02,  1.1668e-02, -3.5465e-02,  3.2835e-02,  1.1606e-02,
        -3.3731e-02, -3.4751e-02,  8.6885e-03,  2.0563e-02,  1.7087e-02,
         3.1093e-02, -2.3801e-02,  1.9098e-03, -1.4252e-02,  1.9442e-02,
        -1.7422e-02,  2.0704e-02, -1.0668e-02,  4.1207e-03,  2.8590e-02,
        -1.1370e-02, -7.6407e-03, -1.2068e-02, -3.0384e-02,  3.3704e-03,
        -3.

In [8]:
input_image = torch.rand(3,28,28)
print(input_image.size())

torch.Size([3, 28, 28])


In [9]:
flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())

torch.Size([3, 784])


In [10]:
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 20])


In [11]:
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")

Before ReLU: tensor([[ 0.2382, -0.3125,  0.4690, -0.3651, -0.1723,  0.2923, -0.1185,  0.0232,
          0.1019, -0.0761, -0.1298, -0.0152, -0.4816,  0.0981,  0.2682, -0.4743,
          0.4099, -0.0503, -0.0365,  0.0605],
        [ 0.5000, -0.1458,  0.2309, -0.0489, -0.1231, -0.0740,  0.1045,  0.0135,
          0.2630,  0.0781, -0.1491,  0.0503, -0.3803,  0.0650,  0.0217, -0.0301,
          0.6531,  0.2403, -0.0524, -0.1480],
        [ 0.5194,  0.1616,  0.1917, -0.2057, -0.0661, -0.3417,  0.2378, -0.1309,
          0.1831,  0.2072, -0.0525,  0.1594, -0.1588, -0.0906,  0.1708,  0.0611,
          0.2773,  0.1345,  0.2013, -0.2320]], grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.2382, 0.0000, 0.4690, 0.0000, 0.0000, 0.2923, 0.0000, 0.0232, 0.1019,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0981, 0.2682, 0.0000, 0.4099, 0.0000,
         0.0000, 0.0605],
        [0.5000, 0.0000, 0.2309, 0.0000, 0.0000, 0.0000, 0.1045, 0.0135, 0.2630,
         0.0781, 0.0000, 0.0503, 0.0000, 0.0650, 0.02

In [12]:
seq_modules = nn.Sequential(
flatten,
layer1,
nn.ReLU(),
nn.Linear(20, 10)
)
input_image = torch.rand(3,28,28)
logits = seq_modules(input_image)

In [13]:
softmax = nn.Softmax(dim=1)
pred_probab = softmax(logits)

In [15]:
print("Model structure: ", model, "\n\n")
for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure:  NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
    (5): ReLU()
  )
) 


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values : tensor([[ 0.0352, -0.0019,  0.0276,  ...,  0.0038, -0.0181,  0.0119],
        [-0.0243, -0.0040,  0.0111,  ...,  0.0047,  0.0204,  0.0301]],
       grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values : tensor([-0.0147, -0.0153], grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values : tensor([[-0.0096,  0.0330, -0.0159,  ...,  0.0287, -0.0343, -0.0419],
        [-0.0367, -0.0219, -0.0334,  ...,  0.0017, -0.0235,  0.0381]],
       grad_fn=<SliceBackward0>) 

Layer: linear_re

Check your knowledge
A. The base class for all neural network modules in PyTorch is torch.nn.Module
1. True
2. False

In [None]:
## True