In [5]:
import torch
torch.__version__

torch.mps.is_available() # For Mac OS
print(torch.backends.mps.is_available())
# Mac has an Apple Silicon chip, to accelerate PyTorch code.

True


#### 2. Common Tensor Operations 

In [15]:
# 1. Tensors are data-containers for array-like
tensor0d = torch.tensor(1) # 0d Tensor
tensor1d = torch.tensor([1, 2, 3]) # 1D Tensor
tensor2d = torch.tensor([[1, 2, 3], [3, 4, 4]]) # 2D Tensor, From Nested Python List
tensor3d = torch.tensor([[[1, 2], [3, 4]], [[1, 6], [2, 9]]])

# 2. Tensor DataTypes
print(tensor1d.dtype)

# Float Data Type
floatvec = torch.tensor([1.0, 3.0, 5.0, 6.9])
print(floatvec.dtype) # 32-bit
# A 32 bit offers sufficient precision, consume less memory & resources
# Most GPU Arch are optimized for 32-Bits computations.
# Hence Speed Up Model Trainig & inference

# Possible to change the precision using `.to` method
floatvec = tensor1d.to(torch.float32)
print(floatvec.dtype)

# 3 Tensor Operations
print(tensor2d)
print(tensor2d.shape) # Tensor has 2 rows and 3 columns
tensor2d_reshape = tensor2d.reshape(3, 2)
print(tensor2d_reshape)
print(tensor2d.shape)

tensor2d.view(3, 2) # Most common way to reshape

tensor2d.T # Transpose the tensor, flipping it across its diagonal

tensor2d.matmul(tensor2d.T) # matmul or @
tensor2d @ tensor2d.T

torch.int64
torch.float32
torch.float32
tensor([[1, 2, 3],
        [3, 4, 4]])
torch.Size([2, 3])
tensor([[1, 2],
        [3, 3],
        [4, 4]])
torch.Size([2, 3])


tensor([[14, 23],
        [23, 41]])

#### 3. Seeing Model as Computattional Graph 

In [18]:
# PyTorch’s automatic differentiation engine, also known as autograd
# to compute gradients in dynamic computational graphs automatically

# computation graph  -> lays out the sequence of calculations needed to 
# compute the output of a neural network – would be required 
# to compute the required gradients for backpropagation, 
# which is the main training algorithm for neural networks.
import torch.nn.functional as F

y = torch.tensor([1.0]) # True label
x1 = torch.tensor([1.1]) # Input Feature
w1 = torch.tensor([2.2]) # weight parameter
b1 = torch.tensor([0.0]) # bias unit

z = x1 * w1 + b1 # net input
a = torch.sigmoid(z) # sigmoid activation

loss = F.binary_cross_entropy(a, y)
print(loss)

# Can use gradient of loss function w.r.t w1 & b1 (model parameters), 
# to train model


tensor(0.0852)


#### 4. Automatic Differetiation Made Easy

In [23]:
# build such a graph internally by default if one of its terminal nodes 
# has the requires_grad attribute set to True.

# Gradients are required when training neural networks
# via the popular backpropagation algorithm

# Partial Derivative - Rate at which function changes w.r.t to one of its variables
# Gradient - Vector of Partial Derivative of mutivariate function
# Provides info to update each of the parameter that minimizes loss function (gradient descent)
# loss function serves as proxy for the model performance, 

# PyTorch’s autograd engine constructs a computational graph 
# in the background. Then, calling the grad function, 
# we can compute the gradient of the loss with respect to model parameter w1 

import torch.nn.functional as F
from torch.autograd import grad

y = torch.tensor([1.0])
x1 = torch.tensor([1.1])
w1 = torch.tensor([2.2], requires_grad=True)
b1 = torch.tensor([0.0], requires_grad=True)

z = w1 * x1 + b1
a = torch.sigmoid(z)

loss = F.binary_cross_entropy(a, y)
grad_L_w1 = grad(loss, w1, retain_graph=True)
grad_L_b1 = grad(loss, b1, retain_graph=True)

# PyTorch destroys the computation graph after calculating the gradients 
# to free memory, hence `retain_graph=True`

print(grad_L_w1)
print(grad_L_b1)

# we can call .backward on the loss, and PyTorch will compute 
# the gradients of all the leaf nodes in the graph, 
# which will be stored via the tensors’ .grad attributes:

loss.backward()
print(w1.grad)
print(b1.grad)

(tensor([-0.0898]),)
(tensor([-0.0817]),)
tensor([-0.0898])
tensor([-0.0817])


#### 5. Implementing Multilayer Neural Network


In [None]:
import torch
class NeuralNetwork(torch.nn.Module):
    # torch.nn.Module has a __call__ method. 
    # gets invoked when nn.Module instance - model(X)
    # __call__ method is responsible for calling `forward` method
    # 1. Hooks - pre-forward & post-forward hooks, fn to register executed before and after 
    # 2. Parameter Checks, # 3. Automatic Differntiation Setup # 4. Device Mgmt, if already inp & output moved

    def __init__(self, num_in, num_out):
        super().__init__()

        self.layers = torch.nn.Sequential(

            torch.nn.Linear(num_in, 30),
            torch.nn.ReLU(),

            torch.nn.Linear(30, 20),
            torch.nn.ReLU(),

            torch.nn.Linear(20, num_out),
            # Output of last layer
            # No passing to a nonlinear activation fn.
            # combine the softmax operation with negative log-likelihood loss in a single class
            # due to numerical efficiency and stability


        )
    def forward(self, x):
        logits = self.layers(x) # As Sequential is already part of __init__
        return logits

#=========Model Arch ============#
model = NeuralNetwork(50, 3)
print(model)

#============ Parameters =========#
num_params = sum(
    p.numel() for p in model.parameters() if p.requires_grad
)
print("Total number of trainable model parameters : ", num_params)
# each parameter which requires_grad=True, count as trainable parameter
# these are contained in nn.Linear layers (Fully Connected Layer)
print(model.layers[0].weight.shape)
# model weights are initalized with small random numbers - to break symmetry during training

#========== Model Call ============#
torch.manual_seed(123)
X = torch.rand((1, 50)) # a single random training example with 50 features
out = model(X) # it automatically executes the forward pass of the model ?
print(out)

# Returns three scores, and grad_fn - Which is used by PyTorch to compute gradients
# If we just use for prediction after training, constructing CP for backpropogation can be wasteful
# unnecessary computations and consumes additional memory
# Hence use torch.no_grad() context manager, does not keep track of gradient

#========== Inference =============#
with torch.no_grad():
#    out = model(X)
    out = torch.softmax(model(X), dim=1)
print(out)
# The values can be interpreted as class-membership that sum up to 1

NeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=50, out_features=30, bias=True)
    (1): ReLU()
    (2): Linear(in_features=30, out_features=20, bias=True)
    (3): ReLU()
    (4): Linear(in_features=20, out_features=3, bias=True)
  )
)
Total number of trainable model parameters :  2213
torch.Size([30, 50])
tensor([[-0.0879,  0.1729,  0.1534]], grad_fn=<AddmmBackward0>)
tensor([[0.2801, 0.3635, 0.3565]])


#### 6. Setting up efficient data loaders


In [None]:
# Dataset class is used to define how each record is loaded
# DataLoader handles how the data is shuffled and assembled into batches

X_train = torch.tensor(
    [-1.2, 3.1],
    [-0.9, 2.9],
    [-0.5, 2.6],
    [2.3, -1.1],
    [2.7, -1.5]
)
y_train = torch.tensor([0, 0, 0, 1, 1])
X_test = torch.tensor(
    [-0.8, 2.8],
    [2.6, -1.6],
)
y_test = torch.tensor([0, 1])
