In [5]:
import torch
torch.__version__

torch.mps.is_available() # For Mac OS
print(torch.backends.mps.is_available())
# Mac has an Apple Silicon chip, to accelerate PyTorch code.

True


#### 2. Common Tensor Operations 

In [15]:
# 1. Tensors are data-containers for array-like
tensor0d = torch.tensor(1) # 0d Tensor
tensor1d = torch.tensor([1, 2, 3]) # 1D Tensor
tensor2d = torch.tensor([[1, 2, 3], [3, 4, 4]]) # 2D Tensor, From Nested Python List
tensor3d = torch.tensor([[[1, 2], [3, 4]], [[1, 6], [2, 9]]])

# 2. Tensor DataTypes
print(tensor1d.dtype)

# Float Data Type
floatvec = torch.tensor([1.0, 3.0, 5.0, 6.9])
print(floatvec.dtype) # 32-bit
# A 32 bit offers sufficient precision, consume less memory & resources
# Most GPU Arch are optimized for 32-Bits computations.
# Hence Speed Up Model Trainig & inference

# Possible to change the precision using `.to` method
floatvec = tensor1d.to(torch.float32)
print(floatvec.dtype)

# 3 Tensor Operations
print(tensor2d)
print(tensor2d.shape) # Tensor has 2 rows and 3 columns
tensor2d_reshape = tensor2d.reshape(3, 2)
print(tensor2d_reshape)
print(tensor2d.shape)

tensor2d.view(3, 2) # Most common way to reshape

tensor2d.T # Transpose the tensor, flipping it across its diagonal

tensor2d.matmul(tensor2d.T) # matmul or @
tensor2d @ tensor2d.T

torch.int64
torch.float32
torch.float32
tensor([[1, 2, 3],
        [3, 4, 4]])
torch.Size([2, 3])
tensor([[1, 2],
        [3, 3],
        [4, 4]])
torch.Size([2, 3])


tensor([[14, 23],
        [23, 41]])

#### 3. Seeing Model as Computattional Graph 

In [18]:
# PyTorch’s automatic differentiation engine, also known as autograd
# to compute gradients in dynamic computational graphs automatically

# computation graph  -> lays out the sequence of calculations needed to 
# compute the output of a neural network – would be required 
# to compute the required gradients for backpropagation, 
# which is the main training algorithm for neural networks.
import torch.nn.functional as F

y = torch.tensor([1.0]) # True label
x1 = torch.tensor([1.1]) # Input Feature
w1 = torch.tensor([2.2]) # weight parameter
b1 = torch.tensor([0.0]) # bias unit

z = x1 * w1 + b1 # net input
a = torch.sigmoid(z) # sigmoid activation

loss = F.binary_cross_entropy(a, y)
print(loss)

# Can use gradient of loss function w.r.t w1 & b1 (model parameters), 
# to train model


tensor(0.0852)


#### 4. Automatic Differetiation Made Easy

In [23]:
# build such a graph internally by default if one of its terminal nodes 
# has the requires_grad attribute set to True.

# Gradients are required when training neural networks
# via the popular backpropagation algorithm

# Partial Derivative - Rate at which function changes w.r.t to one of its variables
# Gradient - Vector of Partial Derivative of mutivariate function
# Provides info to update each of the parameter that minimizes loss function (gradient descent)
# loss function serves as proxy for the model performance, 

# PyTorch’s autograd engine constructs a computational graph 
# in the background. Then, calling the grad function, 
# we can compute the gradient of the loss with respect to model parameter w1 

import torch.nn.functional as F
from torch.autograd import grad

y = torch.tensor([1.0])
x1 = torch.tensor([1.1])
w1 = torch.tensor([2.2], requires_grad=True)
b1 = torch.tensor([0.0], requires_grad=True)

z = w1 * x1 + b1
a = torch.sigmoid(z)

loss = F.binary_cross_entropy(a, y)
grad_L_w1 = grad(loss, w1, retain_graph=True)
grad_L_b1 = grad(loss, b1, retain_graph=True)

# PyTorch destroys the computation graph after calculating the gradients 
# to free memory, hence `retain_graph=True`

print(grad_L_w1)
print(grad_L_b1)

# we can call .backward on the loss, and PyTorch will compute 
# the gradients of all the leaf nodes in the graph, 
# which will be stored via the tensors’ .grad attributes:

loss.backward()
print(w1.grad)
print(b1.grad)

(tensor([-0.0898]),)
(tensor([-0.0817]),)
tensor([-0.0898])
tensor([-0.0817])


In [None]:
#### 5. Implementing Multilayer Neural Network
