In [1]:
import numpy as np
import torch

# Creating Tensors

## Ranges of Values

In [2]:
# Create a 1D Numpy array containing the 32-bit floating point values from 0 to 11
x_numpy = np.arange(12, dtype=np.float32)
# Create a 1D Torch tensor containing the 32-bit floating point values from 0 to 11
x_torch = torch.arange(12, dtype=torch.float32)

In [6]:
print(type(x_numpy), type(x_torch))
print(x_numpy.shape, x_torch.shape)

<class 'numpy.ndarray'> <class 'torch.Tensor'>
(12,) torch.Size([12])


## Ones and Zeros

In [9]:
# Containing only ones with same data types and size as before
y_numpy_ones = np.ones_like(x_numpy)
y_torch_ones = torch.ones_like(x_torch)

# Containing only zeros with same data types and size as before
z_numpy_zeros = np.zeros_like(y_numpy_ones)
z_torch_zeros = torch.zeros_like(y_torch_ones)

In [10]:
print(y_numpy_ones.sum(), y_torch_ones.sum())
print(z_numpy_zeros.sum(), z_torch_zeros.sum())

12.0 tensor(12.)
0.0 tensor(0.)


In [15]:
# Count the number of elements in the tensors/array. This is a case where the two libraries differ a bit
print(y_torch_ones.numel())
print(y_numpy_ones.size)

12
12


## Random Values

In [33]:
# Let's look at creating some random values. Let's say we want a 2D tensor of shape 5x5 with random 16-bit floating points values samples from a Gaussian distribution
r_numpy = np.random.randn(5, 5).astype(np.float16)
r_torch = torch.randn((5,5), dtype=torch.float16)

In [34]:
r_numpy

array([[-0.417  ,  0.754  , -1.606  , -0.6123 ,  1.52   ],
       [ 2.244  , -1.29   , -0.05856, -0.4785 , -0.679  ],
       [-0.5835 ,  2.047  , -0.1962 ,  0.1606 , -0.04477],
       [-0.789  , -0.9805 , -0.5522 , -0.394  ,  0.8716 ],
       [-0.0983 , -1.545  ,  0.0902 ,  1.551  ,  0.04935]], dtype=float16)

In [35]:
r_torch

tensor([[-2.0605,  0.1648,  1.2422,  0.5728, -1.2871],
        [ 1.0342,  0.1401,  0.3108,  0.2815, -0.0604],
        [-0.8438, -0.5664,  1.6270,  1.1523,  0.3770],
        [ 0.0344,  0.6152,  1.2246, -0.0669, -0.1643],
        [ 0.9082,  1.0449, -0.9507, -1.3271,  0.7451]], dtype=torch.float16)

# Reshaping Tensors

In [16]:
x_numpy.reshape(3,4)

array([[ 0.,  1.,  2.,  3.],
       [ 4.,  5.,  6.,  7.],
       [ 8.,  9., 10., 11.]], dtype=float32)

In [17]:
x_torch.reshape(3,4)

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.]])

In [21]:
# we can also ask Numpy/Torch to automatically infer one of the dimensions
x_numpy.reshape(3, -1)

array([[ 0.,  1.,  2.,  3.],
       [ 4.,  5.,  6.,  7.],
       [ 8.,  9., 10., 11.]], dtype=float32)

In [22]:
x_torch.reshape(-1, 4)

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.]])

# Indexing and Slicing

In [59]:
# Let's create tensors of shape 3,4,5, containing random integers between 0 and 10
# The indexing and slicing mechanisms are the same for Numpy and Torch, so we will just use Torch for now
t_torch = torch.randint(0, 10, (3,4,5), dtype=torch.int8)

In [60]:
t_torch

tensor([[[3, 4, 9, 0, 3],
         [5, 4, 0, 4, 5],
         [2, 1, 4, 1, 8],
         [5, 8, 7, 5, 0]],

        [[6, 3, 6, 4, 2],
         [0, 9, 3, 8, 3],
         [5, 0, 2, 3, 9],
         [4, 4, 4, 1, 3]],

        [[5, 3, 9, 4, 7],
         [1, 7, 9, 7, 2],
         [9, 9, 2, 0, 8],
         [9, 0, 2, 9, 3]]], dtype=torch.int8)

## Grab just the first 4x5 tensor

In [61]:
t_torch[0]

tensor([[3, 4, 9, 0, 3],
        [5, 4, 0, 4, 5],
        [2, 1, 4, 1, 8],
        [5, 8, 7, 5, 0]], dtype=torch.int8)

## Grab the last row in the last 4x5 tensor

In [62]:
t_torch[-1][-1]

tensor([9, 0, 2, 9, 3], dtype=torch.int8)

In [63]:
# Equivalent way of doing this with slicing
t_torch[-1,-1,:]

tensor([9, 0, 2, 9, 3], dtype=torch.int8)

## Grab just the first 2 columns in the first row of the second 4x5 tensor

In [64]:
t_torch[1,0,0:2]

tensor([6, 3], dtype=torch.int8)

## Grab the element in the 2nd row and 3rd column of the third 4x5 tensor

In [65]:
# this is equivalent to, but faster than t_torch[2][1][2]
t_torch[2,1,2]

tensor(9, dtype=torch.int8)

## Assigning Values
### You can rewrite values of a tensor using the same indexing and slicing mechanisms

In [66]:
my_tensor = torch.tensor([4, 4, 3, 3, 2, 2, 1, 1, 0], dtype=torch.float16)

In [67]:
my_tensor[0] = 5

In [68]:
my_tensor

tensor([5., 4., 3., 3., 2., 2., 1., 1., 0.], dtype=torch.float16)

In [69]:
my_tensor[-2:] = 0

In [70]:
my_tensor

tensor([5., 4., 3., 3., 2., 2., 1., 0., 0.], dtype=torch.float16)

In [73]:
my_other_tensor = torch.tensor([[1, 2, 3], [3, 2, 1], [0, 1, 0], [0, 0, 0]])

In [74]:
my_other_tensor.shape

torch.Size([4, 3])

In [76]:
my_other_tensor[1, 0:2] = torch.tensor([8, 9])

In [77]:
my_other_tensor

tensor([[1, 2, 3],
        [8, 9, 1],
        [0, 1, 0],
        [0, 0, 0]])

# Combining Tensors

In [79]:
tensor1 = torch.tensor([[1, 2, 3], [4, 5, 6]], dtype=torch.int8)
tensor2 = torch.tensor([[7, 8 ,9], [10, 11, 12]], dtype=torch.int8)

In [80]:
# we can concatenate tensors together. To do so, you tell Torch on which axis you want to concatenate
# let's first concatenate along rows
tensor3 = torch.cat((tensor1, tensor2), dim=0)
tensor3

tensor([[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8,  9],
        [10, 11, 12]], dtype=torch.int8)

In [81]:
# now let's concatenate along columns
tensor4 = torch.cat((tensor1, tensor2), dim=1)
tensor4

tensor([[ 1,  2,  3,  7,  8,  9],
        [ 4,  5,  6, 10, 11, 12]], dtype=torch.int8)

# Unary Operators

In [84]:
x = torch.tensor([0, 2, 4, 6, 8], dtype=torch.float64)
x

tensor([0., 2., 4., 6., 8.], dtype=torch.float64)

In [85]:
# absolute value
x.abs()

tensor([0., 2., 4., 6., 8.], dtype=torch.float64)

In [86]:
# sqrt
x.sqrt()

tensor([0.0000, 1.4142, 2.0000, 2.4495, 2.8284], dtype=torch.float64)

In [88]:
# e^x
x.exp()

tensor([1.0000e+00, 7.3891e+00, 5.4598e+01, 4.0343e+02, 2.9810e+03],
       dtype=torch.float64)

# Binary Operators

In [89]:
x = torch.tensor([0, 2, 4, 6, 8], dtype=torch.float64)
y = torch.tensor([1, 3, 2, 1, 4], dtype=torch.float64)

In [90]:
# elementwise addition of two vectors
x + y

tensor([ 1.,  5.,  6.,  7., 12.], dtype=torch.float64)

In [94]:
# elementwise subtraction of two vectors
x - y

tensor([-1., -1.,  2.,  5.,  4.], dtype=torch.float64)

In [91]:
# elementwise multiplication of two vectors
x * y

tensor([ 0.,  6.,  8.,  6., 32.], dtype=torch.float64)

In [92]:
# elementwise division of two vectors
x / y

tensor([0.0000, 0.6667, 2.0000, 6.0000, 2.0000], dtype=torch.float64)

In [93]:
# elementwise exponentiation of two vectors
x ** y

tensor([   0.,    8.,   16.,    6., 4096.], dtype=torch.float64)

# Broadcasting

### Elementwise vector operations can sometimes be applied to tensors of differing shape, using broadcasting

In [106]:
x = torch.tensor([[0], [1], [2]])
y = torch.tensor([[0, 1]])

In [107]:
x,y

(tensor([[0],
         [1],
         [2]]),
 tensor([[0, 1]]))

### In the following operation, x will expand from 1 to 2 columns to result in a 3x2 tensor, copying column 0 of x into column 1. Likewise, y will expand from a 1x2 tensor to a 3x2 tensor by copying row 0 twice. The expanded forms of x and y will then be added

In [108]:
x + y

tensor([[0, 1],
        [1, 2],
        [2, 3]])

# Memory Allocation

### Operations on tensors can create additional memory overhead in some cases

In [112]:
x = torch.tensor([0,1,2])
y = torch.tensor([0,1,2])

In [113]:
# id() gets the memory address
id(x), id(y)

(5770442800, 5770439520)

In [114]:
y = x + y

In [115]:
id(y)

5766704448

### As we can see, reassigning y to x + y ends up storing the result in a different memory location.

In [116]:
x = torch.tensor([0,1,2])
y = torch.tensor([0,1,2])
id(x), id(y)

(6131474304, 6131482304)

In [117]:
y += x

In [118]:
id(y)

6131482304

### Using y += x, which is also equivalent to y[:] = y + x, perhaps the operation in-place

# Numpy/Torch Conversions

In [119]:
# convert a torch tensor to a Numpy array
y_numpy = y.numpy()
type(y_numpy)

numpy.ndarray

In [120]:
# convert a Numpy array to a Torch tensor

In [121]:
x_numpy = np.arange(3)
x_torch = torch.from_numpy(x_numpy)
type(x_torch)

torch.Tensor

# Tensor Operations

In [126]:
A = torch.ones((3,4)) * 2
B = torch.ones((3,4)) * 3
A,B

(tensor([[2., 2., 2., 2.],
         [2., 2., 2., 2.],
         [2., 2., 2., 2.]]),
 tensor([[3., 3., 3., 3.],
         [3., 3., 3., 3.],
         [3., 3., 3., 3.]]))

## Hadamard Product

In [127]:
A * B

tensor([[6., 6., 6., 6.],
        [6., 6., 6., 6.],
        [6., 6., 6., 6.]])

## Reduction Operations
### These operations reduce the order of tensors along one or more axes

In [133]:
# Reduction of nth order tensor to 1st order tensor
# total sum and total mean
print(A.sum(), A.mean())

tensor(24.) tensor(2.)


In [135]:
# reduction of nth order tensor to n-1'th order tensor
# row sum and row mean
A.sum(axis=0), A.mean(axis=0)

(tensor([6., 6., 6., 6.]), tensor([2., 2., 2., 2.]))

In [136]:
# column sum and column mean
A.sum(axis=1), A.mean(axis=1)

(tensor([8., 8., 8.]), tensor([2., 2., 2.]))

## Non-reducing Sum or Mean
We can set keepdims=True to not get rid of the reduced axis

In [140]:
# reducing sum
C = A.sum(axis=1)
print(C, C.shape)

tensor([8., 8., 8.]) torch.Size([3])


In [141]:
# non-reducing sum
C = A.sum(axis=1, keepdim=True)
print(C, C.shape)

tensor([[8.],
        [8.],
        [8.]]) torch.Size([3, 1])


# Norms

In [144]:
x = torch.tensor([0, 4, 1, 3, 8], dtype=torch.float32)

## L2 Norm

In [145]:
x.norm()

tensor(9.4868)

## L1 Norm

In [146]:
x.abs().sum()

tensor(16.)

## Euclidean distance between two vectors

In [148]:
x = torch.tensor([0, 4, 1, 3, 8], dtype=torch.float32)
y = torch.tensor([-8, 5, 2, -1, 4], dtype=torch.float32)

torch.norm(x-y)

tensor(9.8995)

# Vector Dot Product

In [172]:
u = torch.tensor([1, 2, 3], dtype=torch.float32)
v = torch.tensor([4, 1, -5], dtype=torch.float32)

In [173]:
u@v

tensor(-9.)

In [174]:
# the following is identical to above
torch.dot(u, v)

tensor(-9.)

# Matrix Vector Product

## Example of dimensionality reduction

In [167]:
A = torch.tensor([[1, 0, 1], [0, 1, -1]], dtype=torch.float32)
x = torch.tensor([1, 2, 3], dtype=torch.float32)
A.shape, x.shape

(torch.Size([2, 3]), torch.Size([3]))

In [168]:
A@x

tensor([ 4., -1.])

In [169]:
# the following is identical to above
torch.mv(A, x)

tensor([ 4., -1.])

## Example of 90 degree rotation operation

In [170]:
A = torch.tensor([[0, -1], [1, 0]], dtype=torch.float32)
x = torch.tensor([1, 0], dtype=torch.float32)
A.shape, x.shape

(torch.Size([2, 2]), torch.Size([2]))

In [171]:
A@x

tensor([0., 1.])

# Matrix Matrix Product

In [175]:
A = torch.tensor([[1, 2, 3], [4, 5, 6]], dtype=torch.float32)
B = torch.tensor([[1, 1, 1, 1],[2, 2, 2, 2],[3, 3, 3, 3]], dtype=torch.float32)
A.shape, B.shape

(torch.Size([2, 3]), torch.Size([3, 4]))

In [176]:
A@B

tensor([[14., 14., 14., 14.],
        [32., 32., 32., 32.]])

In [177]:
# the follow is identical to above
torch.mm(A, B)

tensor([[14., 14., 14., 14.],
        [32., 32., 32., 32.]])

# Autograd

## Let's First Consider a Scalar-valued Function

### $y = 2 \mathbf{x}^\top \mathbf{x}$

Let $\mathbf{x}$ be a column vector in $\mathbb{R}^n$. The function is defined as:
$$
y = 2 \mathbf{x}^\top \mathbf{x}
$$
This is equivalent to:
$$
y = 2 \sum_{i=1}^n x_i^2
$$

In PyTorch, we can compute this function and its gradient using **Autograd**.


In [180]:
import torch

# Define x as a column vector with requires_grad=True to enable gradient computation
x = torch.tensor([[0.0], [1.0], [2.0], [3.0]], requires_grad=True)

# Define the function y = 2 * x^T * x
y = 2 * torch.matmul(x.T, x)

# Perform backpropagation
y.backward()

# Display the computed gradient
print("Gradient of y with respect to x:")
print(x.grad)


Gradient of y with respect to x:
tensor([[ 0.],
        [ 4.],
        [ 8.],
        [12.]])


In [181]:
x.grad.zero_()  # Reset the gradient
y = x.sum()  # Redefine a different function
y.backward()  # backpropagate
x.grad  # get the gradient

tensor([[1.],
        [1.],
        [1.],
        [1.]])

## Let's Now Consider a Vector-valued Function

Let:
$$
\mathbf{u}(\mathbf{x}) =
\begin{bmatrix}
u_1(\mathbf{x}) \\
u_2(\mathbf{x})
\end{bmatrix} =
\begin{bmatrix}
x_1^2 + x_2 \\
\sin(x_1) + x_2^3
\end{bmatrix}.
$$

The Jacobian matrix is:
$$
J_{\mathbf{u}}(\mathbf{x}) =
\begin{bmatrix}
\frac{\partial u_1}{\partial x_1} & \frac{\partial u_1}{\partial x_2} \\
\frac{\partial u_2}{\partial x_1} & \frac{\partial u_2}{\partial x_2}
\end{bmatrix}
=
\begin{bmatrix}
2x_1 & 1 \\
\cos(x_1) & 3x_2^2
\end{bmatrix}.
$$

At

$$\( \mathbf{x} = \begin{bmatrix} 1 \\ 2 \end{bmatrix} \)$$we have
$$
J_{\mathbf{u}}(\mathbf{x}) =
\begin{bmatrix}
2(1) & 1 \\
\cos(1) & 3(2)^2
\end{bmatrix}
=
\begin{bmatrix}
2 & 1 \\
\cos(1) & 12
\end{bmatrix}.
$$


In [183]:
import torch

# Define input vector x with gradients enabled
x = torch.tensor([1.0, 2.0], requires_grad=True)  # x = [x1, x2]

# Define the vector-valued function u(x)
u1 = x[0]**2 + x[1]  # u1 = x1^2 + x2
u2 = torch.sin(x[0]) + x[1]**3  # u2 = sin(x1) + x2^3
u = torch.stack([u1, u2])  # Stack outputs into a vector u = [u1, u2]

# Compute the Jacobian of u(x) with respect to x before computing the gradient of y
jacobian = []
for i in range(len(u)):
    grad_u = torch.autograd.grad(u[i], x, retain_graph=True, create_graph=True)[0]
    jacobian.append(grad_u)
jacobian = torch.stack(jacobian)

print("Jacobian of u with respect to x:")
print(jacobian)

# Define a scalar function y(u)
y = 2 * u[0] + 3 * u[1]  # y = 2*u1 + 3*u2

# Perform backpropagation
y.backward(retain_graph=True)  # Retain graph for subsequent operations

# Print the gradient of y with respect to x
print("Gradient of y with respect to x:", x.grad)


Jacobian of u with respect to x:
tensor([[ 2.0000,  1.0000],
        [ 0.5403, 12.0000]], grad_fn=<StackBackward0>)
Gradient of y with respect to x: tensor([ 5.6209, 38.0000])
