In [34]:
import numpy as np

x = np.array([2.0])
y = x**3 + 5

grad_x = 3 * (x**2)

print(f"Gradient Value : {grad_x[0]}")


Gradient Value : 12.0


In [35]:
import torch

x = torch.tensor([2.0], requires_grad = True)

y = x**3 + 5

y.backward()

print(f"Gradient Value: {x.grad.item()}")

Gradient Value: 12.0


In [36]:
import torch.nn as nn

layer = nn.Linear(in_features = 10, out_features = 5)

print(f"Weight Shape: {layer.weight.shape}")
print(f"Bias Shape: {layer.bias.shape}")

# Automatically assigns Weights and bias respective to input features and hidden layer neuron counts
x = torch.randn(1,10)

output = layer(x)
print(output)

Weight Shape: torch.Size([5, 10])
Bias Shape: torch.Size([5])
tensor([[ 0.3628, -1.0431,  0.4777, -0.8565,  0.0311]],
       grad_fn=<AddmmBackward0>)


In [37]:
print(x)
x.relu()

tensor([[ 0.3739,  1.3974,  0.0834, -0.4271, -1.0409, -0.4697,  1.4135, -0.5688,
          0.4498,  0.0772]])


tensor([[0.3739, 1.3974, 0.0834, 0.0000, 0.0000, 0.0000, 1.4135, 0.0000, 0.4498,
         0.0772]])

**Out of Place Memory Operation vs In Place Memory Operation**

    *By adding _(Underscore) after a function we can make the same function work as In place operation*

In [38]:
print("Out of Place Calculation...")

a = torch.tensor([1.0, 2.0, 3.0])

print(f"Original A Memory Address: {id(a)}")
print(f"Original A Values: {a}")


b = torch.exp(a)
print(f"New B memory address:{id(b)}")
print(f"Values of B: {b}")
print(f"Values of A (unchanged):   {a}")

Out of Place Calculation...
Original A Memory Address: 139987236432272
Original A Values: tensor([1., 2., 3.])
New B memory address:139987237401488
Values of B: tensor([ 2.7183,  7.3891, 20.0855])
Values of A (unchanged):   tensor([1., 2., 3.])


In [39]:
print("In Place Calculation...")

X = torch.tensor([1.0, 2.0, 3.0])

print(f"Original X Memory Address: {id(X)}")
print(f"Original X Values: {X}")


y = torch.exp_(X)
print(f"New Y memory address:{id(y)}")
print(f"Values of Y: {y}")
print(f"Values of X (changed): {X}")

In Place Calculation...
Original X Memory Address: 139987636180928
Original X Values: tensor([1., 2., 3.])
New Y memory address:139987636180928
Values of Y: tensor([ 2.7183,  7.3891, 20.0855])
Values of X (changed): tensor([ 2.7183,  7.3891, 20.0855])


In [40]:
import torch

x = torch.tensor([2.0], requires_grad=True)

try:
    y = x.pow_(2)
    y.backward()
except RuntimeError as e:
    print(f"Caught Expected Error:\n{e}")

# This is using in place calculation  while autograd where it gives error due to change in the function value

Caught Expected Error:
a leaf Variable that requires grad is being used in an in-place operation.


In [41]:
device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
device
# it will assign cuda if there is GPU

device(type='cuda')

In [42]:
x = torch.tensor([1.0, 2.0, 3.0], device = device)
print(x.device)

cuda:0


In [43]:
cpu_tensor = torch.tensor([10.0, 25.0, 20.0])
cpu_tensor.device

device(type='cpu')

In [44]:
gpu_tensor = cpu_tensor.to(device)
gpu_tensor.device

# changing to gpu using to function which helps in convert data to gpu after loading the data
# We need to assign the changed variable else it will stay in CPU

device(type='cuda', index=0)

In [45]:
x = torch.tensor([2.0], requires_grad = True)
print(x.requires_grad)

True


In [46]:
with torch.no_grad():
  y = x*3
  print(f"Y requires grad:{y.requires_grad}")
  print(f"Y grad Function: {y.grad_fn}")

Y requires grad:False
Y grad Function: None


In [47]:
z = x * 2
print(f"Outside no_grad: {z.requires_grad}")

# If we perform operations outside the no_grad the grad computations all be calculated

Outside no_grad: True


In [48]:
with torch.inference_mode():

  y = x ** 2
  print(f"Inside Inference_node: {y.requires_grad}")

  # By using inference_mode we completely cutoff the tracking such as version and view tracking by removing extra logs

Inside Inference_node: False


In [49]:
w = torch.tensor([10.0], requires_grad=True)
loss = w * 5

detached_loss = loss.detach()

print(f"Original Loss: {loss.requires_grad}")
print(f"Detached Loss: {detached_loss.requires_grad}")

Original Loss: True
Detached Loss: False


In [50]:
detached_loss.add_(1)
print(loss)

tensor([51.], grad_fn=<MulBackward0>)


In [51]:
import torch

x = torch.tensor([2.0], requires_grad=True)

y = x**2

z = y**3

z.backward()

print(f"Gradient dx: {x.grad}")

Gradient dx: tensor([192.])


In [53]:
x = torch.tensor([2.0], requires_grad=True)

y = x**2

# y_detached has the same value as y (4.0) but NO relation to x.
y_detached = y.detach()

z = y_detached**3

z.backward()

print(f"Gradient dx: {x.grad}")

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [54]:
import torch

a = torch.tensor([1.0, 2.0, 3.0], requires_grad=True)

b = a.detach()

print(f"a before: {a}")
print(f"b before: {b}")

b.zero_() # In place operation will change the original also

print(f"b after change: {b}")
print(f"a after change: {a}")

# The detach function will be usefull when we need a part of gradient step to calculate some function without altering the gradient or computation graphs

a before: tensor([1., 2., 3.], requires_grad=True)
b before: tensor([1., 2., 3.])
b after change: tensor([0., 0., 0.])
a after change: tensor([0., 0., 0.], requires_grad=True)


In [55]:
t = torch.tensor(2.0, requires_grad=True)
z = t.cos().exp_()
z.backward()
z

tensor(0.6596, grad_fn=<ExpBackward0>)

In [56]:
t = torch.tensor(2.0, requires_grad=True)
z = t.cos_().exp()
z.backward()

# we changed the input tensor by calculating in memory operation using directly with t.cos_() so it will cause error

RuntimeError: a leaf Variable that requires grad is being used in an in-place operation.

In [57]:
t = torch.tensor(2.0, requires_grad=True)
z = t.exp().cos_()
z.backward()

# any function that changes the input values which required during back propagation of chain rule should not be performed using in place operations

RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor []], which is output 0 of ExpBackward0, is at version 1; expected version 0 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).

In [58]:
import torch
import torch.nn as nn

# In_features = 100, Out_features = 200
layer = nn.Linear(in_features=100, out_features=200)

print(f"Weight Shape: {layer.weight.shape}")

print(f"Bias Shape:   {layer.bias.shape}")

Weight Shape: torch.Size([200, 100])
Bias Shape:   torch.Size([200])


In [59]:
import torch
import torch.nn as nn
import torch.optim as optim

X = torch.tensor([[1.0], [2.0], [3.0], [4.0]])
y_true = torch.tensor([[12.0], [14.0], [16.0], [18.0]])

model = nn.Linear(1, 1)

criterion = nn.MSELoss() # Mean Squared Error
optimizer = optim.SGD(model.parameters(), lr=0.01) # Stochastic Gradient Descent

epochs = 100

print(f"Initial params: Weight={model.weight.item():.2f}, Bias={model.bias.item():.2f}")

for epoch in range(epochs):
    #Set Mode
    model.train()

    # Forward Pass
    y_pred = model(X)

    # Compute Loss
    loss = criterion(y_pred, y_true)

    # We clear previous gradients BEFORE calculating new ones
    optimizer.zero_grad()

    # Calculate gradients
    loss.backward()

    # Update weights
    optimizer.step()

    if (epoch + 1) % 20 == 0:
        print(f"Epoch {epoch+1}: Loss = {loss.item():.4f}")

print(f"Final params: Weight={model.weight.item():.2f}, Bias={model.bias.item():.2f}")


# A simple feed forward neural network with backpropagation


Initial params: Weight=-0.95, Bias=0.34
Epoch 20: Loss = 9.2603
Epoch 40: Loss = 7.9649
Epoch 60: Loss = 7.0645
Epoch 80: Loss = 6.2661
Epoch 100: Loss = 5.5579
Final params: Weight=3.96, Bias=4.25


In [60]:
import torch
from torch.utils.data import DataLoader
import os

count_cpu = os.cpu_count()
optimal_workers = min(count_cpu, 16)

train_loader = DataLoader(
    dataset = demo,
    batch_size = 64,
    shuffle = True,
    num_workers = optimal_workers,   # create multiple process to handle multiple tasks such as text processing and data loading at the same time
    persistent_workers = True,       # The memory allocated for each epoch didnt kill and ready to process the next epoch
    pin_memory = True,                # Fast RAM to VRAM transfer as it always stores the data batches in cpu fixed memory and ready to load
    prefetch_factor=2                  # This assigns two batches per worker
)



# how to handle data loading with efficient and fast loading time to GPU

NameError: name 'demo' is not defined

In [61]:
import torch.nn as nn
import torch

model = nn.Linear(10,1)
criterion = nn.BCEWithLogitsLoss()

input_data = torch.randn(32, 10)
target = torch.empty(32, 1).random_(2)

logits = model(input_data)
loss = criterion(logits, target)

print(loss)


# Used for binary class classification to calculate directly from logits then it also apply by default sigmoid while using BCEwith LogitLoss

tensor(0.8415, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)


In [62]:
model = nn.Linear(10, 3)

criterion = nn.CrossEntropyLoss()

input_data = torch.randn(5, 10)
target = torch.tensor([0, 2, 1, 0, 2], dtype=torch.long)

logits = model(input_data)
loss = criterion(logits, target)
loss

# This does Multiclassclassification

tensor(1.2440, grad_fn=<NllLossBackward0>)

In [63]:
import torch
import torch.nn as nn

class SensitivityNet(nn.Module):
    def __init__(self):
        super().__init__()

        # Dropout: 50% chance to zero out a value
        self.dropout = nn.Dropout(p=0.5)

        # BatchNorm: Normalizes data
        self.bn = nn.BatchNorm1d(num_features=1)
        self.linear = nn.Linear(1, 1, bias=False)

        # Manually setting weight to 1.0
        with torch.no_grad():
            self.linear.weight.fill_(1.0)

    def forward(self, x):
        x = self.bn(x)
        x = self.dropout(x)
        x = self.linear(x)
        return x

model = SensitivityNet()
input_data = torch.tensor([[10.0], [10.0]]) # Batch of two identical inputs

print("--- TRAINING MODE ---")
model.train() # SWITCH Training mode
output_train = model(input_data)
# Dropout is ACTIVE: Some values might be 0.
# BatchNorm is using THIS BATCH to calculate mean/std.

print(output_train)

print("\n--- EVALUATION MODE ---")
model.eval() # SWITCH Evaluation mode
output_eval = model(input_data)

print(output_eval)

--- TRAINING MODE ---
tensor([[-0.0001],
        [ 0.0000]], grad_fn=<MmBackward0>)

--- EVALUATION MODE ---
tensor([[9.4868],
        [9.4868]], grad_fn=<MmBackward0>)


In [64]:

def logic_gate(x):
    if x.sum() > 0:
        return x
    else:
        return -x

dummy_input = torch.tensor([10.0])

# It records ONLY: "return x". It ignores the "else" block completely.
traced_func = torch.jit.trace(logic_gate, dummy_input)

test_input = torch.tensor([-10.0])

print(f"Traced Output: {traced_func(test_input)}")

Traced Output: tensor([-10.])


  if x.sum() > 0:


In [65]:
# The compiler reads the source code. It sees the "if". It preserves the logic.
scripted_func = torch.jit.script(logic_gate)

test_input = torch.tensor([-10.0])

print(f"Scripted Output: {scripted_func(test_input)}")

print(scripted_func.code)

Scripted Output: tensor([10.])
def logic_gate(x: Tensor) -> Tensor:
  if bool(torch.gt(torch.sum(x), 0)):
    _0 = x
  else:
    _0 = torch.neg(x)
  return _0



In [66]:
x = torch.tensor(1.2, requires_grad=True)
y = torch.tensor(3.4, requires_grad=True)

# PyTorch builds the computational graph dynamically here
f = torch.sin((x**2) * y)
f.backward()

dx = x.grad
dy = y.grad

print(f"Function value f(1.2, 3.4): {f.item():.4f}")
print(f"Gradient vector [df/dx, df/dy]: [{dx.item():.4f}, {dy.item():.4f}]")

Function value f(1.2, 3.4): -0.9832
Gradient vector [df/dx, df/dy]: [1.4899, 0.2629]
