In [1]:
import torch

import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use("seaborn-v0_8")
plt.rcParams["font.family"] = "monospace"

In [2]:
if torch.cuda.is_available():
    print("CUDA found, bounding...")
    DEVICE = torch.device('cuda:0')
else:
    print("NO CUDA found, bounding to CPU...")
    DEVICE = torch.device('cpu')

CUDA found, bounding...


In [3]:
torch.cuda.get_device_properties(DEVICE)

_CudaDeviceProperties(name='NVIDIA GeForce GTX 1650', major=7, minor=5, total_memory=4095MB, multi_processor_count=16)

In [4]:
DTYPE = torch.float32

# Elementwise Operations
A: (M, N)
B: (M, N)
Result: (M, N)

-> Shapes MUST match
Every elements is --operation-- in the same location
ADDING, SUBTRACTING, MULTIPLYING, DIVIDING etc...

In [12]:
A = torch.rand((5, 5), dtype=DTYPE, device=DEVICE)
A, A.shape

(tensor([[0.4384, 0.1983, 0.8377, 0.3858, 0.2744],
         [0.8071, 0.0273, 0.1593, 0.9037, 0.9704],
         [0.6785, 0.0208, 0.2913, 0.0522, 0.8723],
         [0.1333, 0.3109, 0.3667, 0.2385, 0.3994],
         [0.8602, 0.2686, 0.4060, 0.6478, 0.9957]]),
 torch.Size([5, 5]))

In [13]:
B = torch.rand((5, 5), dtype=DTYPE, device=DEVICE)
B, B.shape

(tensor([[3.9433e-01, 6.2108e-04, 4.1310e-01, 7.9625e-01, 1.4572e-01],
         [5.2915e-01, 5.8589e-01, 5.8698e-01, 5.2897e-01, 8.2075e-01],
         [1.2902e-01, 9.3988e-02, 5.9249e-01, 4.7929e-01, 4.7052e-01],
         [7.4067e-01, 9.1130e-01, 2.9639e-02, 2.6085e-01, 1.3902e-01],
         [7.5832e-01, 6.5414e-01, 6.8506e-01, 3.3213e-01, 1.8475e-01]]),
 torch.Size([5, 5]))

In [14]:
# Adding
AB = A + B
AB, AB.shape

(tensor([[0.8327, 0.1989, 1.2508, 1.1820, 0.4201],
         [1.3363, 0.6132, 0.7463, 1.4327, 1.7911],
         [0.8075, 0.1148, 0.8838, 0.5315, 1.3428],
         [0.8740, 1.2222, 0.3964, 0.4993, 0.5384],
         [1.6186, 0.9228, 1.0911, 0.9800, 1.1805]]),
 torch.Size([5, 5]))

In [15]:
(A + B).all() == torch.add(A, B).all()

tensor(True)

In [16]:
# Subtracting
AB = torch.sub(A, B)
AB, AB.shape

(tensor([[ 0.0440,  0.1977,  0.4246, -0.4105,  0.1286],
         [ 0.2780, -0.5586, -0.4277,  0.3748,  0.1496],
         [ 0.5495, -0.0732, -0.3011, -0.4271,  0.4017],
         [-0.6074, -0.6004,  0.3371, -0.0224,  0.2604],
         [ 0.1019, -0.3855, -0.2791,  0.3157,  0.8110]]),
 torch.Size([5, 5]))

In [17]:
torch.sub(A, B).all() == (A - B).all()

tensor(True)

In [18]:
# Multiplying
AB = torch.mul(A, B)
AB, AB.shape

(tensor([[1.7286e-01, 1.2316e-04, 3.4604e-01, 3.0718e-01, 3.9982e-02],
         [4.2710e-01, 1.5985e-02, 9.3518e-02, 4.7805e-01, 7.9643e-01],
         [8.7536e-02, 1.9520e-03, 1.7262e-01, 2.5014e-02, 4.1042e-01],
         [9.8727e-02, 2.8333e-01, 1.0870e-02, 6.2208e-02, 5.5523e-02],
         [6.5233e-01, 1.7573e-01, 2.7814e-01, 2.1517e-01, 1.8397e-01]]),
 torch.Size([5, 5]))

In [19]:
torch.mul(A, B).all() == (A * B).all()

tensor(True)

In [20]:
# Dividing
AB = torch.div(A, B)
AB, AB.shape

(tensor([[1.1117e+00, 3.1927e+02, 2.0278e+00, 4.8450e-01, 1.8828e+00],
         [1.5254e+00, 4.6568e-02, 2.7142e-01, 1.7085e+00, 1.1823e+00],
         [5.2590e+00, 2.2098e-01, 4.9173e-01, 1.0889e-01, 1.8538e+00],
         [1.7997e-01, 3.4117e-01, 1.2374e+01, 9.1423e-01, 2.8729e+00],
         [1.1344e+00, 4.1069e-01, 5.9266e-01, 1.9506e+00, 5.3895e+00]]),
 torch.Size([5, 5]))

In [21]:
torch.div(A, B).all() == (A / B).all()

tensor(True)

In [22]:
# Exponential
torch.e ** A

tensor([[1.5502, 1.2193, 2.3110, 1.4708, 1.3157],
        [2.2415, 1.0277, 1.1727, 2.4688, 2.6389],
        [1.9709, 1.0210, 1.3382, 1.0536, 2.3923],
        [1.1426, 1.3647, 1.4430, 1.2693, 1.4909],
        [2.3637, 1.3082, 1.5008, 1.9114, 2.7067]])

In [23]:
torch.exp(A).all() == (torch.e ** A).all()

tensor(True)

In [24]:
# Square Root
A ** .5

tensor([[0.6621, 0.4453, 0.9152, 0.6211, 0.5238],
        [0.8984, 0.1652, 0.3991, 0.9506, 0.9851],
        [0.8237, 0.1441, 0.5398, 0.2285, 0.9339],
        [0.3651, 0.5576, 0.6056, 0.4883, 0.6320],
        [0.9275, 0.5183, 0.6372, 0.8049, 0.9979]])

In [25]:
torch.sqrt(A).all() == (A ** .5).all()

tensor(True)

What happens if shapes DO NOT MATCH?

v: (N) -> N elements
M: (M, N) -> M rows, N columns

Broadcasting -> "Strecthing" a lower dimension so it matches with the other.

V: (M, N)

EXAMPLE

v: [[0., 1., 2., 3., 4.]] -> (1, 5)

M: [0., 1., 2., 0., 5.], -> (3, 5)
   [3., 0., 1., 1., 1.],
   [10., 3., 2., 1., 4.]

After Broadcasting:

V: [0., 1., 2., 3., 4.], -> (3, 5)
   [0., 1., 2., 3., 4.],
   [0., 1., 2., 3., 4.]

Result:

[0., 1., 4., 0., 20.],
[0., 0., 2., 3., 4.],
[0., 3., 4., 3., 16.]

In [26]:
v = torch.arange(0, 5, 1, dtype=DTYPE, device=DEVICE)
v, v.shape, v.ndim

(tensor([0., 1., 2., 3., 4.]), torch.Size([5]), 1)

In [27]:
M = torch.tensor([
    [0., 1., 2., 0., 5.],
    [3., 0., 1., 1., 1.],
    [10., 3., 2., 1., 4.]
], dtype=DTYPE, device=DEVICE)
M, M.shape, M.ndim

(tensor([[ 0.,  1.,  2.,  0.,  5.],
         [ 3.,  0.,  1.,  1.,  1.],
         [10.,  3.,  2.,  1.,  4.]]),
 torch.Size([3, 5]),
 2)

In [28]:
Mv = torch.mul(M, v)
Mv, Mv.shape, Mv.ndim

(tensor([[ 0.,  1.,  4.,  0., 20.],
         [ 0.,  0.,  2.,  3.,  4.],
         [ 0.,  3.,  4.,  3., 16.]]),
 torch.Size([3, 5]),
 2)

# Transpose of a Tensor
A: (M, N) -> M rows, N columns

Transposing: Swaping dimensions with each other.
(Rows become Columns, Columns become Rows)

A.T: (N, M)

In [29]:
M, M.shape

(tensor([[ 0.,  1.,  2.,  0.,  5.],
         [ 3.,  0.,  1.,  1.,  1.],
         [10.,  3.,  2.,  1.,  4.]]),
 torch.Size([3, 5]))

In [30]:
MT = M.T
MT, MT.shape

(tensor([[ 0.,  3., 10.],
         [ 1.,  0.,  3.],
         [ 2.,  1.,  2.],
         [ 0.,  1.,  1.],
         [ 5.,  1.,  4.]]),
 torch.Size([5, 3]))

In [31]:
v, v.shape

(tensor([0., 1., 2., 3., 4.]), torch.Size([5]))

In [32]:
v = v.reshape(1, 5)
v, v.shape, v.ndim

(tensor([[0., 1., 2., 3., 4.]]), torch.Size([1, 5]), 2)

In [33]:
vT = v.T
vT, vT.shape, vT.ndim

(tensor([[0.],
         [1.],
         [2.],
         [3.],
         [4.]]),
 torch.Size([5, 1]),
 2)

# Dot Product

Perceptrons (in NNs) -> Biological Neurons
* X: Input
* y: Output
* W: Weights
* b: Bias
* f: Activation Functions

y = f(torch.dot(X, W) + b)

THOUSANDS of Perceptrons -> Neural Networks

Dot Product:
1. Vector - Vector
2. Matrix - Vector
3. Matrix - Matrix

# 1. Vector - Vector
-> Vectors must have SAME # of elements!
A: (N)
B: (N)

Result: Scalar

y = sum(A * B)

A: [0., 1., 2., 3., 4.]
B: [ 0., 1., 1., 2., 10.]

A * B = [0., 1., 2., 6., 40.]
y = sum(A * B) = sum([0., 1., 2., 6., 40.]) = 0. + 1. + 2. + 6. + 40. = 49.

Result: 49.

In [6]:
A = torch.arange(0, 5, 1, dtype=DTYPE, device=DEVICE)
A, A.shape, A.ndim

(tensor([0., 1., 2., 3., 4.], device='cuda:0'), torch.Size([5]), 1)

In [7]:
B = torch.tensor([0., 1., 1., 2., 10.], dtype=DTYPE, device=DEVICE)
B, B.shape, B.ndim

(tensor([ 0.,  1.,  1.,  2., 10.], device='cuda:0'), torch.Size([5]), 1)

In [9]:
DP = torch.dot(A, B)
# DP, DP.shape, DP.ndim

DPs = sum(A * B)
DPs, DPs.shape, DPs.ndim

(tensor(49., device='cuda:0'), torch.Size([]), 0)

# 2. Matrix - Vector

A: (M, N)
B: (N)

A -> A Column Vector

A: -> (3, 5)
[ 1.,  1.,  0.,  2.,  3.],
[ 0.,  5.,  2.,  0., 10.],
[ 1.,  1.,  4.,  3., 10.]

A.T: -> (5, 3)
[1., 0., 1.],
[1., 5., 1.],
[0., 2., 4.],
[2., 0., 3.],
[3., 10., 10.]

B: -> (5)
[ 0., 1., 1., 2., 10.]

A.T -> [A0],
       [A1],
       [A2],
       [A3],
       [A4]

A0 = [1., 0., 1.],
A1 = [1., 5., 1.],
A2 = [0., 2., 4.],
A3 = [2., 0., 3.],
A4 = [3., 10., 10.]

A * B = [(A0 * 0.), (A1 * 1.), [A2 * 1.], [A3 * 2.], [A4 * 10.]] = [[0., 0., 0.,], [1., 5., 1.],  [0., 2., 4.], [4., 0., 6.], [30., 100., 100.]]
y = sum(A * B) = sum([[0., 0., 0.,], [1., 5., 1.],  [0., 2., 4.], [4., 0., 6.], [30., 100., 100.]]) = [0., 7., 6., 10., 230.]

Result: [0., 7., 6., 10., 230.]

In [13]:
A = torch.tensor([
    [1., 1., 0., 2., 3.],
    [0., 5., 2., 0., 10.],
    [1., 1., 4., 3., 10.]],
    dtype=DTYPE, device=DEVICE)
A, A.shape, A.ndim

(tensor([[ 1.,  1.,  0.,  2.,  3.],
         [ 0.,  5.,  2.,  0., 10.],
         [ 1.,  1.,  4.,  3., 10.]], device='cuda:0'),
 torch.Size([3, 5]),
 2)

In [14]:
B = torch.tensor([0., 1., 1., 2., 10.], dtype=DTYPE, device=DEVICE)
B, B.shape, B.ndim

(tensor([ 0.,  1.,  1.,  2., 10.], device='cuda:0'), torch.Size([5]), 1)

In [15]:
DP = torch.mv(A, B)
DP, DP.shape, DP.ndim

(tensor([ 35., 107., 111.], device='cuda:0'), torch.Size([3]), 1)