In [None]:
import numpy as np
import torch

# Creating Tensors

## Ranges of Values

In [None]:
# Create a 1D Numpy array containing the 32-bit floating point values from 0 to 11
x_numpy = np.arange(12, dtype=np.float32)
# Create a 1D Torch tensor containing the 32-bit floating point values from 0 to 11
x_torch = torch.arange(12, dtype=torch.float32)

In [None]:
print(type(x_numpy), type(x_torch))
print(x_numpy.shape, x_torch.shape)

## Ones and Zeros

In [None]:
# Containing only ones with same data types and size as before
y_numpy_ones = np.ones_like(x_numpy)
y_torch_ones = torch.ones_like(x_torch)

# Containing only zeros with same data types and size as before
z_numpy_zeros = np.zeros_like(y_numpy_ones)
z_torch_zeros = torch.zeros_like(y_torch_ones)

In [None]:
print(y_numpy_ones.sum(), y_torch_ones.sum())
print(z_numpy_zeros.sum(), z_torch_zeros.sum())

In [None]:
# Count the number of elements in the tensors/array. This is a case where the two libraries differ a bit
print(y_torch_ones.numel())
print(y_numpy_ones.size)

## Random Values

In [None]:
# Let's look at creating some random values. Let's say we want a 2D tensor of shape 5x5 with random 16-bit floating points values samples from a Gaussian distribution
r_numpy = np.random.randn(5, 5).astype(np.float16)
r_torch = torch.randn((5,5), dtype=torch.float16)

In [None]:
r_numpy

In [None]:
r_torch

# Reshaping Tensors

In [None]:
x_numpy.reshape(3,4)

In [None]:
x_torch.reshape(3,4)

In [None]:
# we can also ask Numpy/Torch to automatically infer one of the dimensions
x_numpy.reshape(3, -1)

In [None]:
x_torch.reshape(-1, 4)

# Indexing and Slicing

In [None]:
# Let's create tensors of shape 3,4,5, containing random integers between 0 and 10
# The indexing and slicing mechanisms are the same for Numpy and Torch, so we will just use Torch for now
t_torch = torch.randint(0, 10, (3,4,5), dtype=torch.int8)

In [None]:
t_torch

## Grab just the first 4x5 tensor

In [None]:
t_torch[0]

## Grab the last row in the last 4x5 tensor

In [None]:
t_torch[-1][-1]

In [None]:
# Equivalent way of doing this with slicing
t_torch[-1,-1,:]

## Grab just the first 2 columns in the first row of the second 4x5 tensor

In [None]:
t_torch[1,0,0:2]

## Grab the element in the 2nd row and 3rd column of the third 4x5 tensor

In [None]:
# this is equivalent to, but faster than t_torch[2][1][2]
t_torch[2,1,2]

## Assigning Values
### You can rewrite values of a tensor using the same indexing and slicing mechanisms

In [None]:
my_tensor = torch.tensor([4, 4, 3, 3, 2, 2, 1, 1, 0], dtype=torch.float16)

In [None]:
my_tensor[0] = 5

In [None]:
my_tensor

In [None]:
my_tensor[-2:] = 0

In [None]:
my_tensor

In [None]:
my_other_tensor = torch.tensor([[1, 2, 3], [3, 2, 1], [0, 1, 0], [0, 0, 0]])

In [None]:
my_other_tensor.shape

In [None]:
my_other_tensor[1, 0:2] = torch.tensor([8, 9])

In [None]:
my_other_tensor

# Combining Tensors

In [None]:
tensor1 = torch.tensor([[1, 2, 3], [4, 5, 6]], dtype=torch.int8)
tensor2 = torch.tensor([[7, 8 ,9], [10, 11, 12]], dtype=torch.int8)

In [None]:
# we can concatenate tensors together. To do so, you tell Torch on which axis you want to concatenate
# let's first concatenate along rows
tensor3 = torch.cat((tensor1, tensor2), dim=0)
tensor3

In [None]:
# now let's concatenate along columns
tensor4 = torch.cat((tensor1, tensor2), dim=1)
tensor4

# Unary Operators

In [None]:
x = torch.tensor([0, 2, 4, 6, 8], dtype=torch.float64)
x

In [None]:
# absolute value
x.abs()

In [None]:
# sqrt
x.sqrt()

In [None]:
# e^x
x.exp()

# Binary Operators

In [None]:
x = torch.tensor([0, 2, 4, 6, 8], dtype=torch.float64)
y = torch.tensor([1, 3, 2, 1, 4], dtype=torch.float64)

In [None]:
# elementwise addition of two vectors
x + y

In [None]:
# elementwise subtraction of two vectors
x - y

In [None]:
# elementwise multiplication of two vectors
x * y

In [None]:
# elementwise division of two vectors
x / y

In [None]:
# elementwise exponentiation of two vectors
x ** y

# Broadcasting

### Elementwise vector operations can sometimes be applied to tensors of differing shape, using broadcasting

In [None]:
x = torch.tensor([[0], [1], [2]])
y = torch.tensor([[0, 1]])

In [None]:
x,y

### In the following operation, x will expand from 1 to 2 columns to result in a 3x2 tensor, copying column 0 of x into column 1. Likewise, y will expand from a 1x2 tensor to a 3x2 tensor by copying row 0 twice. The expanded forms of x and y will then be added

In [None]:
x + y

# Memory Allocation

### Operations on tensors can create additional memory overhead in some cases

In [187]:
x = torch.tensor([0,1,2])
y = torch.tensor([0,1,2])

In [188]:
# id() gets the memory address
id(x), id(y)

(13047931792, 13047935792)

In [189]:
y = x + y

In [190]:
id(y)

13047990608

### As we can see, reassigning y to x + y ends up storing the result in a different memory location.

In [191]:
x = torch.tensor([0,1,2])
y = torch.tensor([0,1,2])
id(x), id(y)

(13047927712, 13047923792)

In [192]:
y += x

In [193]:
id(y)

13047923792

### Using y += x, which is also equivalent to y[:] = y + x, perhaps the operation in-place

# Numpy/Torch Conversions

In [None]:
# convert a torch tensor to a Numpy array
y_numpy = y.numpy()
type(y_numpy)

In [None]:
# convert a Numpy array to a Torch tensor

In [None]:
x_numpy = np.arange(3)
x_torch = torch.from_numpy(x_numpy)
type(x_torch)

# Tensor Operations

In [None]:
A = torch.ones((3,4)) * 2
B = torch.ones((3,4)) * 3
A,B

## Hadamard Product

In [None]:
A * B

## Reduction Operations
### These operations reduce the order of tensors along one or more axes

In [None]:
# Reduction of nth order tensor to 1st order tensor
# total sum and total mean
print(A.sum(), A.mean())

In [None]:
# reduction of nth order tensor to n-1'th order tensor
# row sum and row mean
A.sum(axis=0), A.mean(axis=0)

In [None]:
# column sum and column mean
A.sum(axis=1), A.mean(axis=1)

## Non-reducing Sum or Mean
We can set keepdims=True to not get rid of the reduced axis

In [None]:
# reducing sum
C = A.sum(axis=1)
print(C, C.shape)

In [None]:
# non-reducing sum
C = A.sum(axis=1, keepdim=True)
print(C, C.shape)

# Norms

In [None]:
x = torch.tensor([0, 4, 1, 3, 8], dtype=torch.float32)

## L2 Norm

In [None]:
x.norm()

## L1 Norm

In [None]:
x.abs().sum()

## Euclidean distance between two vectors

In [None]:
x = torch.tensor([0, 4, 1, 3, 8], dtype=torch.float32)
y = torch.tensor([-8, 5, 2, -1, 4], dtype=torch.float32)

torch.norm(x-y)

# Vector Dot Product

In [None]:
u = torch.tensor([1, 2, 3], dtype=torch.float32)
v = torch.tensor([4, 1, -5], dtype=torch.float32)

In [None]:
u@v

In [None]:
# the following is identical to above
torch.dot(u, v)

# Matrix Vector Product

## Example of dimensionality reduction

In [None]:
A = torch.tensor([[1, 0, 1], [0, 1, -1]], dtype=torch.float32)
x = torch.tensor([1, 2, 3], dtype=torch.float32)
A.shape, x.shape

In [None]:
A@x

In [None]:
# the following is identical to above
torch.mv(A, x)

## Example of 90 degree rotation operation

In [None]:
A = torch.tensor([[0, -1], [1, 0]], dtype=torch.float32)
x = torch.tensor([1, 0], dtype=torch.float32)
A.shape, x.shape

In [None]:
A@x

# Matrix Matrix Product

In [None]:
A = torch.tensor([[1, 2, 3], [4, 5, 6]], dtype=torch.float32)
B = torch.tensor([[1, 1, 1, 1],[2, 2, 2, 2],[3, 3, 3, 3]], dtype=torch.float32)
A.shape, B.shape

In [None]:
A@B

In [None]:
# the follow is identical to above
torch.mm(A, B)

# Autograd

## Let's First Consider a Scalar-valued Function

### $y = 2 \mathbf{x}^\top \mathbf{x}$

Let $\mathbf{x}$ be a column vector in $\mathbb{R}^n$. The function is defined as:
$$
y = 2 \mathbf{x}^\top \mathbf{x}
$$
This is equivalent to:
$$
y = 2 \sum_{i=1}^n x_i^2
$$

In PyTorch, we can compute this function and its gradient using **Autograd**.


In [None]:
import torch

# Define x as a column vector with requires_grad=True to enable gradient computation
x = torch.tensor([[0.0], [1.0], [2.0], [3.0]], requires_grad=True)

# Define the function y = 2 * x^T * x
y = 2 * torch.matmul(x.T, x)

# Perform backpropagation
y.backward()

# Display the computed gradient
print("Gradient of y with respect to x:")
print(x.grad)


In [None]:
x.grad.zero_()  # Reset the gradient
y = x.sum()  # Redefine a different function
y.backward()  # backpropagate
x.grad  # get the gradient

## Let's Now Consider a Vector-valued Function

Let:
$$
\mathbf{u}(\mathbf{x}) =
\begin{bmatrix}
u_1(\mathbf{x}) \\
u_2(\mathbf{x})
\end{bmatrix} =
\begin{bmatrix}
x_1^2 + x_2 \\
\sin(x_1) + x_2^3
\end{bmatrix}.
$$

The Jacobian matrix is:
$$
J_{\mathbf{u}}(\mathbf{x}) =
\begin{bmatrix}
\frac{\partial u_1}{\partial x_1} & \frac{\partial u_1}{\partial x_2} \\
\frac{\partial u_2}{\partial x_1} & \frac{\partial u_2}{\partial x_2}
\end{bmatrix}
=
\begin{bmatrix}
2x_1 & 1 \\
\cos(x_1) & 3x_2^2
\end{bmatrix}.
$$

At

$$\( \mathbf{x} = \begin{bmatrix} 1 \\ 2 \end{bmatrix} \)$$we have
$$
J_{\mathbf{u}}(\mathbf{x}) =
\begin{bmatrix}
2(1) & 1 \\
\cos(1) & 3(2)^2
\end{bmatrix}
=
\begin{bmatrix}
2 & 1 \\
\cos(1) & 12
\end{bmatrix}.
$$


In [None]:
import torch

# Define input vector x with gradients enabled
x = torch.tensor([1.0, 2.0], requires_grad=True)  # x = [x1, x2]

# Define the vector-valued function u(x)
u1 = x[0]**2 + x[1]  # u1 = x1^2 + x2
u2 = torch.sin(x[0]) + x[1]**3  # u2 = sin(x1) + x2^3
u = torch.stack([u1, u2])  # Stack outputs into a vector u = [u1, u2]

# Compute the Jacobian of u(x) with respect to x before computing the gradient of y
jacobian = []
for i in range(len(u)):
    grad_u = torch.autograd.grad(u[i], x, retain_graph=True, create_graph=True)[0]
    jacobian.append(grad_u)
jacobian = torch.stack(jacobian)

print("Jacobian of u with respect to x:")
print(jacobian)

# Define a scalar function y(u)
y = 2 * u[0] + 3 * u[1]  # y = 2*u1 + 3*u2

# Perform backpropagation
y.backward(retain_graph=True)  # Retain graph for subsequent operations

# Print the gradient of y with respect to x
print("Gradient of y with respect to x:", x.grad)
