# Emulation of torch.nn.linear #

This notebook executes a `torch.nn.linear` forward pass, repeats the same operation using matrix multiplication with a bias vector, **B**, separate from **X** and **W**, then again with the bias components integrated into **X** and **W**

You can see the same results for each of the 3 approaches

Thus we can directly relate **Z = W.X** from the theory sessions to **Z = X.**t(**W**), which is what PyTorch does

In [1]:
#
# Torch - Z = X.t(W) + B
#
# Note that B is the vector of bias weights. The bias unit constants are implicitly 1
#
# It's not uncommon for the bias unit constants to be represented by a variable of all
# 1's added to X, rather than a separate vector
#
import torch
import torch.nn as nn
import numpy as np
#
# Define linear layer
#
linear_layer = nn.Linear(3, 6)
#
# Multiply X by 2 to distinguish from the bias, which is internal to Torch
#
X_torch = torch.ones(5, 3) * 2
#
# Forward pass
#
Z_torch = linear_layer(X_torch)
#
print("X:")
print(X_torch)
print()
#
print("Bias:")
print(linear_layer.bias)
print()
#
print("W:")
print(linear_layer.weight)
print()
#
print("Z:")
print(Z_torch)
print()
#

X:
tensor([[2., 2., 2.],
        [2., 2., 2.],
        [2., 2., 2.],
        [2., 2., 2.],
        [2., 2., 2.]])

Bias:
Parameter containing:
tensor([ 0.4651, -0.0523,  0.4757, -0.4737, -0.2291, -0.4987],
       requires_grad=True)

W:
Parameter containing:
tensor([[-0.1085,  0.0158, -0.3217],
        [-0.1135, -0.2007, -0.2412],
        [ 0.1758,  0.1456,  0.3941],
        [ 0.3927,  0.4872,  0.4312],
        [-0.3661, -0.3290, -0.5376],
        [ 0.3550, -0.0855, -0.5272]], requires_grad=True)

Z:
tensor([[-0.3639, -1.1630,  1.9065,  2.1483, -2.6946, -1.0142],
        [-0.3639, -1.1630,  1.9065,  2.1483, -2.6946, -1.0142],
        [-0.3639, -1.1630,  1.9065,  2.1483, -2.6946, -1.0142],
        [-0.3639, -1.1630,  1.9065,  2.1483, -2.6946, -1.0142],
        [-0.3639, -1.1630,  1.9065,  2.1483, -2.6946, -1.0142]],
       grad_fn=<AddmmBackward0>)



In [2]:
#
# Matrix multiplication - Z = X.t(W) + B
#
# Get data from Torch
#
X = X_torch.numpy()
W = linear_layer.weight.detach().numpy()
B = linear_layer.bias.detach().numpy()
#
print("X:")
print(X)
print()
#
print("W:")
print(W)
print()
#
print("B:")
print(B)
print()
#
# Linear sum
#
Z = np.matmul(X, np.transpose(W)) + B
#
print("Z:")
print(Z)
print()
#

X:
[[2. 2. 2.]
 [2. 2. 2.]
 [2. 2. 2.]
 [2. 2. 2.]
 [2. 2. 2.]]

W:
[[-0.10854128  0.01580048 -0.321738  ]
 [-0.1135062  -0.20068005 -0.24117315]
 [ 0.17577547  0.1455571   0.39408076]
 [ 0.39267427  0.48715985  0.43115246]
 [-0.36613792 -0.32896414 -0.53762877]
 [ 0.354953   -0.08546546 -0.5272183 ]]

B:
[ 0.46506703 -0.052306    0.47567463 -0.4736848  -0.2290915  -0.49872625]

Z:
[[-0.3638906 -1.1630247  1.9065013  2.1482882 -2.6945531 -1.0141878]
 [-0.3638906 -1.1630247  1.9065013  2.1482882 -2.6945531 -1.0141878]
 [-0.3638906 -1.1630247  1.9065013  2.1482882 -2.6945531 -1.0141878]
 [-0.3638906 -1.1630247  1.9065013  2.1482882 -2.6945531 -1.0141878]
 [-0.3638906 -1.1630247  1.9065013  2.1482882 -2.6945531 -1.0141878]]



In [3]:
#
# Matrix multiplication - Z = X.t(W), where X contains the bias constants and W contains the bias weights
#
#
X_with_bias = np.concatenate((np.ones((5, 1)), X), axis=1)
W_with_bias = np.concatenate((B.reshape(6, 1), W), axis=1)
Z_with_bias = np.matmul(X_with_bias, np.transpose(W_with_bias))
#
print("X_with_bias:")
print(X_with_bias)
print()
#
print("W_with_bias")
print(W_with_bias)
print()
#
print("Z_with_bias")
print(Z_with_bias)
print()
#

X_with_bias:
[[1. 2. 2. 2.]
 [1. 2. 2. 2.]
 [1. 2. 2. 2.]
 [1. 2. 2. 2.]
 [1. 2. 2. 2.]]

W_with_bias
[[ 0.46506703 -0.10854128  0.01580048 -0.321738  ]
 [-0.052306   -0.1135062  -0.20068005 -0.24117315]
 [ 0.47567463  0.17577547  0.1455571   0.39408076]
 [-0.4736848   0.39267427  0.48715985  0.43115246]
 [-0.2290915  -0.36613792 -0.32896414 -0.53762877]
 [-0.49872625  0.354953   -0.08546546 -0.5272183 ]]

Z_with_bias
[[-0.36389059 -1.16302478  1.90650129  2.14828837 -2.69455317 -1.01418775]
 [-0.36389059 -1.16302478  1.90650129  2.14828837 -2.69455317 -1.01418775]
 [-0.36389059 -1.16302478  1.90650129  2.14828837 -2.69455317 -1.01418775]
 [-0.36389059 -1.16302478  1.90650129  2.14828837 -2.69455317 -1.01418775]
 [-0.36389059 -1.16302478  1.90650129  2.14828837 -2.69455317 -1.01418775]]

