In [1]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import time

In [2]:
n = np.linspace(0,1,5)
t = torch.linspace(0,1,5)

print(n)
print(t)

[0.   0.25 0.5  0.75 1.  ]
tensor([0.0000, 0.2500, 0.5000, 0.7500, 1.0000])


In [4]:
n = np.arange(48).reshape(3,4,4)
t = torch.arange(48).reshape(3,4,4)

print(n.shape)
print(t.shape)

(3, 4, 4)
torch.Size([3, 4, 4])


In [5]:
a = np.array([1,2])
b = np.array([3,4])

a*b

array([3, 8])

General Broadcasting Rules
When operating on two arrays, NumPy compares their shapes element-wise. It starts with the trailing (i.e. rightmost) dimensions and works its way left. Two dimensions are compatible when

they are equal, or
one of them is 1
Example: The following are compatible

Shape 1: (1,6,4,1,7,2)

Shape 2: (5,6,1,3,1,2)

In [7]:
a = np.ones((6,5))
b = np.arange(5).reshape((1,5))

a

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [8]:
b

array([[0, 1, 2, 3, 4]])

In [10]:
a + b

array([[1., 2., 3., 4., 5.],
       [1., 2., 3., 4., 5.],
       [1., 2., 3., 4., 5.],
       [1., 2., 3., 4., 5.],
       [1., 2., 3., 4., 5.],
       [1., 2., 3., 4., 5.]])

In [11]:
a = torch.ones((6,5))
b = torch.arange(5).reshape(1,5)

a+b

tensor([[1., 2., 3., 4., 5.],
        [1., 2., 3., 4., 5.],
        [1., 2., 3., 4., 5.],
        [1., 2., 3., 4., 5.],
        [1., 2., 3., 4., 5.],
        [1., 2., 3., 4., 5.]])

The arrays/tensors don't need to have the same number of dimenions. If one of the arrays/tensors has less dimensions than the other

Example: Scaling each other the color channels of an image by a different amount:

Image  (3d array): 256 x 256 x 3
Scale  (1d array):             3
Result (3d array): 256 x 256 x 3

In [14]:
Image = torch.ones((2,2,3))
Scale = torch.tensor([0.5,1.5,1]) # scalare pentru RGB (compatibilitate de marime)

Result = Image * Scale
Result

tensor([[[0.5000, 1.5000, 1.0000],
         [0.5000, 1.5000, 1.0000]],

        [[0.5000, 1.5000, 1.0000],
         [0.5000, 1.5000, 1.0000]]])

Example: One has an array of 2 images and wants to scale the color channels of each image by a slightly different amount:

Images  (4d array): 2 x 256 x 256 x 3
Scales  (4d array): 2 x 1 x 1 x 3
Results  (4d array): 2 x 256 x 256 x 3

In [15]:
Images = torch.ones((2,4,4,3)) # two 4x4 RGB images
Scales = torch.tensor([0.5, 1.5,1,1.5,1,0.5]).reshape(2,1,1,3)

Result = Images * Scales

Result

tensor([[[[0.5000, 1.5000, 1.0000],
          [0.5000, 1.5000, 1.0000],
          [0.5000, 1.5000, 1.0000],
          [0.5000, 1.5000, 1.0000]],

         [[0.5000, 1.5000, 1.0000],
          [0.5000, 1.5000, 1.0000],
          [0.5000, 1.5000, 1.0000],
          [0.5000, 1.5000, 1.0000]],

         [[0.5000, 1.5000, 1.0000],
          [0.5000, 1.5000, 1.0000],
          [0.5000, 1.5000, 1.0000],
          [0.5000, 1.5000, 1.0000]],

         [[0.5000, 1.5000, 1.0000],
          [0.5000, 1.5000, 1.0000],
          [0.5000, 1.5000, 1.0000],
          [0.5000, 1.5000, 1.0000]]],


        [[[1.5000, 1.0000, 0.5000],
          [1.5000, 1.0000, 0.5000],
          [1.5000, 1.0000, 0.5000],
          [1.5000, 1.0000, 0.5000]],

         [[1.5000, 1.0000, 0.5000],
          [1.5000, 1.0000, 0.5000],
          [1.5000, 1.0000, 0.5000],
          [1.5000, 1.0000, 0.5000]],

         [[1.5000, 1.0000, 0.5000],
          [1.5000, 1.0000, 0.5000],
          [1.5000, 1.0000, 0.5000],
          [1.5

Operations Across Dimensions
This is so fundamental for pytorch. Obviously simple operations can be done one 1 dimensional tensors:

In [16]:
t = torch.tensor([0.5,1,3,4])
torch.mean(t), torch.std(t), torch.max(t), torch.min(t)

(tensor(2.1250), tensor(1.6520), tensor(4.), tensor(0.5000))

But suppose we have a 2d tensor, for example, and want to compute the mean value of each columns:

Note: taking the mean of each column means taking the mean across the rows (which are the first dimension)

In [26]:
t = torch.arange(20,dtype=float).reshape(5,4)
t

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.],
        [12., 13., 14., 15.],
        [16., 17., 18., 19.]], dtype=torch.float64)

In [18]:
t[0]

tensor([0., 1., 2., 3.], dtype=torch.float64)

In [19]:
torch.mean(t, axis=0) # mean across the rows

tensor([ 8.,  9., 10., 11.], dtype=torch.float64)

In [29]:
t = torch.randn(5,256,256,3)

# Take the mean across the batch (size 5)

torch.mean(t,axis=0).shape

torch.Size([256, 256, 3])

In [30]:
# Take the mean across the color channels

torch.mean(t,axis=-1).shape # average of brightness

torch.Size([5, 256, 256])

Take only the maximum color channel values (and get the corresponding indices):

This is done all the time in image segmentation models (i.e. take an image, decide which pixels correspond to, say, a car)

In [31]:
values, indices = torch.max(t,axis=-1)

indices # tells you when red was maximum (e.g. element == 2) or blue, or green

tensor([[[2, 0, 1,  ..., 1, 1, 2],
         [1, 0, 2,  ..., 2, 2, 2],
         [2, 1, 1,  ..., 1, 2, 1],
         ...,
         [0, 1, 2,  ..., 1, 1, 1],
         [2, 1, 2,  ..., 0, 2, 0],
         [2, 2, 0,  ..., 0, 0, 0]],

        [[0, 2, 0,  ..., 0, 0, 0],
         [0, 1, 1,  ..., 1, 0, 2],
         [2, 0, 1,  ..., 0, 1, 0],
         ...,
         [2, 2, 0,  ..., 2, 0, 1],
         [2, 0, 2,  ..., 0, 2, 2],
         [0, 1, 1,  ..., 1, 0, 0]],

        [[0, 1, 1,  ..., 0, 2, 0],
         [1, 0, 0,  ..., 1, 1, 1],
         [1, 2, 2,  ..., 2, 1, 0],
         ...,
         [0, 0, 0,  ..., 1, 0, 0],
         [0, 1, 2,  ..., 1, 0, 0],
         [2, 1, 1,  ..., 1, 1, 1]],

        [[1, 1, 2,  ..., 0, 0, 1],
         [2, 2, 0,  ..., 2, 1, 1],
         [0, 2, 1,  ..., 2, 0, 2],
         ...,
         [1, 2, 0,  ..., 2, 2, 2],
         [0, 2, 0,  ..., 1, 1, 2],
         [0, 2, 0,  ..., 1, 0, 0]],

        [[1, 2, 1,  ..., 0, 0, 2],
         [0, 1, 2,  ..., 2, 0, 0],
         [2, 0, 2,  ..., 2

# So Where Do Pytorch and Numpy Differ?
Pytorch starts to really differ from numpy in terms of automatically computing gradients of operations


has a gradient

In [32]:
x = torch.tensor([[5.,8.],[4.,6.]], requires_grad=True)

x

tensor([[5., 8.],
        [4., 6.]], requires_grad=True)

In [33]:
y = x.pow(3).sum()
y

tensor(917., grad_fn=<SumBackward0>)

In [34]:
# Compute the gradient:

y.backward() #compute the gradient
x.grad #print the gradient (everything that has happened to x)

tensor([[ 75., 192.],
        [ 48., 108.]])

In [35]:
3 * x**2

tensor([[ 75., 192.],
        [ 48., 108.]], grad_fn=<MulBackward0>)

The automatic computation of gradients is the backbone of training deep learning models. Unlike in the example above, most gradient computations don't have an analytical formula, so the automatic computation of gradients is essential. In general, if one has

Then pytorch can compute
. For each of element of the vector
. In the context of machine learning,
 contains all the weights (also known as parameters) of the neural network and  is the Loss Function of the neural network.

Additional Benefits
In addition, any sort of large matrix multiplication problem is faster with torch tensors than it is with numpy arrays, especially if you're running on a GPU

Using torch: (with a CPU. With GPU, this is much much faster)

In [57]:
A = torch.randn((1000,1000))
B = torch.randn((1000,1000))

t1 = time.perf_counter()
torch.matmul(A,B)
t2 = time.perf_counter()
print(t2-t1)

0.01982990000396967


In [53]:
A = np.random.randn(int(1e6)).reshape((1000,1000))
B = np.random.randn(int(1e6)).reshape((1000,1000))

t1 = time.perf_counter()
A@B
t2 = time.perf_counter()
print(t2-t1)

0.03208850000373786


In [58]:
!wget https://raw.githubusercontent.com/lukepolson/youtube_channel/main/Python%20Shorts/crossentropy.ipynb

--2022-08-21 21:37:33--  https://raw.githubusercontent.com/lukepolson/youtube_channel/main/Python%20Shorts/crossentropy.ipynb
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.110.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 326747 (319K) [text/plain]
Saving to: 'crossentropy.ipynb'

     0K .......... .......... .......... .......... .......... 15%  727K 0s
    50K .......... .......... .......... .......... .......... 31% 4.61M 0s
   100K .......... .......... .......... .......... .......... 47% 1.22M 0s
   150K .......... .......... .......... .......... .......... 62% 1.61M 0s
   200K .......... .......... .......... .......... .......... 78% 1.61M 0s
   250K .......... .......... .......... .......... .......... 94% 8.92M 0s
   300K .......... .........                                  100% 7.42M=0.2s
