In [1]:
import numpy as np
import pandas as pd
import os
import torch

In [2]:
# broadcasting

In [3]:
# In torch broadcasting, the operations happen in an elemen-wise fashion - meaning this is not equal to matrix multiplication "*"

# The broadcasting happens in the singleton dimension, if there exist one such dimension (the axis that has a dimenison of 1). We basically strech the contents along this axis.

In [4]:
# consider the following example

In [5]:
x = torch.randn((3,1))

In [6]:
x

tensor([[ 0.1485],
        [-1.3646],
        [-0.4325]])

In [7]:
x.shape

torch.Size([3, 1])

In [8]:
y = torch.randn((1,3))

In [9]:
y

tensor([[ 1.1200,  1.6239, -0.2311]])

In [10]:
x*y

tensor([[ 0.1663,  0.2411, -0.0343],
        [-1.5284, -2.2160,  0.3153],
        [-0.4844, -0.7023,  0.0999]])

In [11]:
y*x

tensor([[ 0.1663,  0.2411, -0.0343],
        [-1.5284, -2.2160,  0.3153],
        [-0.4844, -0.7023,  0.0999]])

In [12]:
# note both of these return the same output

In [13]:
# What happense here?

In [14]:
# # x has dimensions 3*1. The contensts are strched column-wise
# # therefore x becomes

# x = [0.1485 0.1485 0.1485
#      -1.3646 -1.3646 -1.3646
#      -0.4325 -0.4325 -0.4325
# ]
# # y is stretched along row axis

# y  =  [ 1.1200  1.6239 -0.2311
#         1.1200  1.6239 -0.2311
#         1.1200  1.623, -0.2311]

# # now the element-wise multiplication

# x becomes 3*3, and y becomes 3*3, and the resulting element-wise multiplication will give a 3*3 matrix

In [36]:
# the first row should look something like this
0.1485*1.1200, 0.1485*1.6239, 0.1485*-0.2311

(0.16632, 0.24114914999999998, -0.03431835)

In [37]:
1.1200*-1.3646,  1.6239*-1.3646, -0.2311*-1.3646

(-1.5283520000000002, -2.21597394, 0.31535906)

In [38]:
1.1200*-0.4325,  1.623*-0.4325, -0.2311*-0.4325

(-0.48440000000000005, -0.7019475, 0.09995075)

In [18]:
# Okay, now what if we introudce a third matrix, that is of shape (2,1,1)?

In [19]:
a = torch.randn((2,1,1))

In [20]:
a

tensor([[[-0.2526]],

        [[ 0.0676]]])

In [21]:
a.shape

torch.Size([2, 1, 1])

In [22]:
# how will x*y*a look like?

In [25]:
result = x*y*a

In [26]:
result

tensor([[[-0.0420, -0.0609,  0.0087],
         [ 0.3861,  0.5598, -0.0797],
         [ 0.1224,  0.1774, -0.0252]],

        [[ 0.0112,  0.0163, -0.0023],
         [-0.1034, -0.1499,  0.0213],
         [-0.0328, -0.0475,  0.0068]]])

In [27]:
result.shape

torch.Size([2, 3, 3])

In [28]:
# how does this happen?

In [29]:
# x is 3,1
# y is 1,3

In [30]:
# we need to make both these 2D matrices 3D to do the above operation

# do this by adding dimension 1 to front

# x becomes 1,3,1
# y becomes 1,1,3

# now x*y here will first be 1,3,3, and then this times z will be 2,3,3

# x*y = [ 0.1663 0.2411-0.0343
#         -1.5284 -2.2160 0.3153
#         -0.4844 -0.7023 0.0999]

# z = [[[-0.2526]],

#         [[ 0.0676]]]

In [34]:
print(0.1663*-0.2526, 0.2411*-0.2526, -0.0343*-0.2526)
print(-1.5284*-0.2526, -2.2160*-0.2526, 0.3153*-0.2526)
print(-0.4844*-0.2526, -0.7023*-0.2526, 0.0999*-0.2526)

-0.04200738 -0.06090186 0.008664179999999999
0.38607384 0.5597616000000001 -0.07964478
0.12235944 0.17740098 -0.02523474


In [35]:
print(0.1663*0.0676, 0.2411*0.0676, -0.0343*0.0676)
print(-1.5284*0.0676, -2.2160*0.0676, 0.3153*0.0676)
print(-0.4844*0.0676, -0.7023*0.0676, 0.0999*0.0676)

0.01124188 0.016298359999999998 -0.0023186799999999996
-0.10331983999999998 -0.1498016 0.021314279999999998
-0.032745439999999994 -0.04747548 0.00675324
