In [1]:
# In this notebook, you learn:
# 
# 1) How to use nn.linear in pytorch?

In [2]:
import torch
import torch.nn as nn

## [nn.Linear](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html#linear)

In [3]:
# Usefule Resources:
# 
# 1) https://docs.kanaries.net/topics/Python/nn-linear
#       -- Very good blog that explains what is 'nn.Linear' and how to use it.

In [6]:
# Applies a linear transformation on the incoming input.
# output = input * Weight_{Transpose} + Bias
#
# For the example linear_layer below, the input should be a tensor of size 5 and the output is 
# a tensor of size 1. 
linear_layer = nn.Linear(in_features=5, out_features=1, bias=True)
print(linear_layer)

Linear(in_features=5, out_features=1, bias=True)


In [7]:
# The linear_layer object (pytorch module) is itself callable and applies the linear transformation
# on calling the object with an input.
input1 = torch.tensor(data=[1, 2, 3, 4, 5], dtype=torch.float)
print(input1.shape)
print(input1)
print("-" * 150)
output1 = linear_layer(input1)
print(output1.shape)
print(output1)

torch.Size([5])
tensor([1., 2., 3., 4., 5.])
------------------------------------------------------------------------------------------------------------------------------------------------------
torch.Size([1])
tensor([0.1204], grad_fn=<ViewBackward0>)


In [8]:
# The linear_layer has 2 sets of parameters (weight and bias) that are used in the linear transformation
# calculation. 
# The weight parameter has the shape (out_features x in_features) = (1, 5)
print(linear_layer.weight, linear_layer.weight.shape)
print(linear_layer.bias, linear_layer.bias.shape)

Parameter containing:
tensor([[-0.1352,  0.1167,  0.0176,  0.1402, -0.0964]], requires_grad=True) torch.Size([1, 5])
Parameter containing:
tensor([-0.1091], requires_grad=True) torch.Size([1])


In [9]:
# Now, what happens if the input is not a 1D tensor? What if the input is a 2D tensor or 
# a 3D tensor or some other high dimensional tensor?
#
# Linear Layer always operates on the last dimension of the input layer. The input tensor
# can have any shape with the condition that the size of the last dimension should be 
# equal to the number of input features expected by the linear layer.

In [12]:
input2 = torch.tensor(data=[[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]], dtype=torch.float)
print(input2.shape)
print(input2)
print("-" * 150)

# input2 is 2D tensor containing two 1D tensors [1, 2, 3, 4, 5] and [6, 7, 8, 9, 10].
# Linear transformation operates on the last dimension i.e., it independently transforms
# the two 1D tensors.
# 
# linear_layer([1, 2, 3, 4, 5])  --> -2.5216 --> Same as output3
# linear_layer([6, 7, 8, 9, 10]) --> -6.1213 --> Same as output4
# 
# Also, notice that the shape of the input is maintained in the output.
output2 = linear_layer(input2)
print(output2.shape)
print(output2)
print("-" * 150)

output3 = linear_layer(torch.tensor(data=[1, 2, 3, 4, 5], dtype=torch.float))
print(output3)
print("-" * 150)

output4 = linear_layer(torch.tensor(data=[6, 7, 8, 9, 10], dtype=torch.float))
print(output4)

torch.Size([2, 5])
tensor([[ 1.,  2.,  3.,  4.,  5.],
        [ 6.,  7.,  8.,  9., 10.]])
------------------------------------------------------------------------------------------------------------------------------------------------------
torch.Size([2, 1])
tensor([[0.1204],
        [0.3343]], grad_fn=<AddmmBackward0>)
------------------------------------------------------------------------------------------------------------------------------------------------------
tensor([0.1204], grad_fn=<ViewBackward0>)
------------------------------------------------------------------------------------------------------------------------------------------------------
tensor([0.3343], grad_fn=<ViewBackward0>)


In [13]:
input3 = torch.tensor(data=[[[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]], [[11, 12, 13, 14, 15], [16, 17, 18, 19, 20]]], dtype=torch.float)
print(input3.shape)
print(input3)
print("-" * 150)
# input2 is 3D tensor containing four 1D tensors [1, 2, 3, 4, 5]; [6, 7, 8, 9, 10]; 
# [11, 12, 13, 14, 15] and [16, 17, 18, 19, 20]
# Linear transformation operates on the last dimension i.e., it independently transforms
# the four 1D tensors.
# 
# linear_layer([1, 2, 3, 4, 5])      --> -2.5216  --> Same as output6
# linear_layer([6, 7, 8, 9, 10])     --> -6.1213  --> Same as output7
# linear_layer([11, 12, 13, 14, 15]) --> -9.7209  --> Same as output8
# linear_layer([16, 17, 18, 19, 20]) --> -13.3206 --> Same as output9
#
# Also, notice that the shape of the input is maintained in the output.
output5 = linear_layer(input3)
print(output5.shape)
print(output5)
print("-" * 150)

output6 = linear_layer(torch.tensor(data=[1, 2, 3, 4, 5], dtype=torch.float))
print(output6)
print("-" * 150)

output7 = linear_layer(torch.tensor(data=[6, 7, 8, 9, 10], dtype=torch.float))
print(output7)
print("-" * 150)

output8 = linear_layer(torch.tensor(data=[11, 12, 13, 14, 15], dtype=torch.float))
print(output8)
print("-" * 150)

output9 = linear_layer(torch.tensor(data=[16, 17, 18, 19, 20], dtype=torch.float))
print(output9)

torch.Size([2, 2, 5])
tensor([[[ 1.,  2.,  3.,  4.,  5.],
         [ 6.,  7.,  8.,  9., 10.]],

        [[11., 12., 13., 14., 15.],
         [16., 17., 18., 19., 20.]]])
------------------------------------------------------------------------------------------------------------------------------------------------------
torch.Size([2, 2, 1])
tensor([[[0.1204],
         [0.3343]],

        [[0.5483],
         [0.7622]]], grad_fn=<ViewBackward0>)
------------------------------------------------------------------------------------------------------------------------------------------------------
tensor([0.1204], grad_fn=<ViewBackward0>)
------------------------------------------------------------------------------------------------------------------------------------------------------
tensor([0.3343], grad_fn=<ViewBackward0>)
------------------------------------------------------------------------------------------------------------------------------------------------------
tensor([0.5483]

In [14]:
input4 = torch.tensor(data=[[1, 2, 3], [4, 5, 6]], dtype=torch.float)
print(input4)
print(input4.shape)

# As expected, this raises an error since the size of the last dimension (3) is different
# from the number of input features expected by the linear_layer
output10 = linear_layer(input4)

tensor([[1., 2., 3.],
        [4., 5., 6.]])
torch.Size([2, 3])


RuntimeError: mat1 and mat2 shapes cannot be multiplied (2x3 and 5x1)