<a href="https://colab.research.google.com/github/Arpitsinghvampire/PyTorch_codes/blob/main/TRANSFORMS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
#we use  transforms to perform some manipulation of teh data and make it suitable for training

#all torchvision  datasets hasd two parameters  -transform to modify features
#-target_transform to modify the labels  (modify target vectors)
import torch
from torchvision import datasets
from torchvision.transforms import ToTensor , Lambda
from torch import nn

dataset = datasets.FashionMNIST(
    root = "data",
    train = True,
    download = True,
    transform = ToTensor(),
    target_transform = Lambda(lambda y: torch.zeros(10, dtype = torch.float).scatter_(0,torch.tensor(y) , value = 1)) #this basically does  one hot encoded part
)

dataset = datasets.FashionMNIST(
    root = "data",
    train = False,
    download = True,
    transform = ToTensor(),
    target_transform = Lambda(lambda y: torch.zeros(10, dtype = torch.float).scatter_(0,torch.tensor(y) , value = 1)) #this basically does  one hot encoded part
)

In [6]:
seq_module = nn.Sequential(
    nn.Flatten(),
    nn.Sequential(
        nn.Linear(28*28 , 200),
        nn.ReLU() ,
        nn.Linear(200,100)
    ),
    nn.Linear(100,10)
)

input_image = torch.rand(3,28,28)
logits = seq_module(input_image)

nn.SOFTMAX

In [9]:
softmax = nn.Softmax(dim = 1) #the column sum should sum upto 1
pred_proba = softmax(logits) #applies softmax to the logits


#now lets plot the models
print(f"Model Structure {seq_module} \n")
for name , param in seq_module.named_parameters():
    print(f"Layer: {name} , Size = {param.size()} , values = {param[:2]} \n")

Model Structure Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Sequential(
    (0): Linear(in_features=784, out_features=200, bias=True)
    (1): ReLU()
    (2): Linear(in_features=200, out_features=100, bias=True)
  )
  (2): Linear(in_features=100, out_features=10, bias=True)
) 

Layer: 1.0.weight , Size = torch.Size([200, 784]) , values = tensor([[-0.0114,  0.0007,  0.0291,  ...,  0.0344, -0.0128,  0.0287],
        [ 0.0087, -0.0356, -0.0270,  ..., -0.0224, -0.0263, -0.0332]],
       grad_fn=<SliceBackward0>) 

Layer: 1.0.bias , Size = torch.Size([200]) , values = tensor([-0.0094,  0.0081], grad_fn=<SliceBackward0>) 

Layer: 1.2.weight , Size = torch.Size([100, 200]) , values = tensor([[ 0.0680, -0.0473, -0.0173,  0.0424,  0.0323,  0.0348,  0.0413, -0.0577,
         -0.0159,  0.0066,  0.0308, -0.0581, -0.0564, -0.0605,  0.0045,  0.0623,
         -0.0702,  0.0118, -0.0457, -0.0410,  0.0659, -0.0573,  0.0531, -0.0029,
          0.0674, -0.0411, -0.0403, -0.0109, -0.0419,  0

AUTOMATIC DIFFERENTIATION WITH torch.autograd

In [18]:
import torch

x = torch.ones(5)
y = torch.zeros(3)
w = torch.randn(5 , 3 , requires_grad = True)
b = torch.randn(3,requires_grad = True)

z = torch.matmul(x , w)+b
loss = torch.nn.functional.binary_cross_entropy_with_logits(z, y)



In [19]:

print(f"Gradient function for z = {z.grad_fn}")
print(f"Gradient function for loss = {loss.grad_fn}")

loss.backward(retain_graph = True)

print(w.grad)
print(b.grad)

Gradient function for z = <AddBackward0 object at 0x7c4c32148f40>
Gradient function for loss = <BinaryCrossEntropyWithLogitsBackward0 object at 0x7c4c32148700>
tensor([[0.2815, 0.0065, 0.3312],
        [0.2815, 0.0065, 0.3312],
        [0.2815, 0.0065, 0.3312],
        [0.2815, 0.0065, 0.3312],
        [0.2815, 0.0065, 0.3312]])
tensor([0.2815, 0.0065, 0.3312])


In [24]:
#by default gradient tracking is always on
z = torch.matmul(x,w) + b
print(z.requires_grad)
#in some cases we do not want to track the gradients accumulated so far , we might want to get the predictions of the current model , in that case
#we do not want the gradients  thus we stop the gradient tracking
with torch.no_grad():
  z = torch.matmul(x,w)+b
print(z.requires_grad)

True
False


In [26]:
#another way to get teh same result is known as detach()  method on the tensor
z = torch.matmul(x,w) + b
z_detached = z.detach() #stop the gradient tracking
print(z_detached.requires_grad)

#the backward pas kicks off when .backward() is called on the DAG root .Autograd then

#computes the gradient from each  gradient_fn
#accumulates thme in ot respective tesnors .grad  attribute


False


In [4]:
import torch

inp = torch.eye(4,5 , requires_grad = True) #the gradient is tracked for all variables where the requires_grad part is True
out = (inp+1).pow(2).t()
#.backward() takes in a  scalar value  , but since we have a matrix , the pytorch needs to know how much each element contributes to the graph
#torch.ones means that all the elements contribute equally
#sum all gradients into inp.grad
out.backward(torch.ones_like(out) , retain_graph = True) #this takes the  gradient of the output
#this basically gives how much importance to be given to each location of the matrix  , finds the gradient of input
print("FIRST TIME GRADIENT")
print(inp.grad)

#now we do this one more time
out.backward(torch.ones_like(out) , retain_graph = True)
print("SECOND TIME  GRADIENT")
print(inp.grad)

print("GRADIENT AFTER ZEROING THE GRADIENT")
#now lets give the gradient as 0
inp.grad.zero_()
#this makes the gradient 0
#Now lets print the gradient
print(inp.grad)






FIRST TIME GRADIENT
tensor([[4., 2., 2., 2., 2.],
        [2., 4., 2., 2., 2.],
        [2., 2., 4., 2., 2.],
        [2., 2., 2., 4., 2.]])
SECOND TIME  GRADIENT
tensor([[8., 4., 4., 4., 4.],
        [4., 8., 4., 4., 4.],
        [4., 4., 8., 4., 4.],
        [4., 4., 4., 8., 4.]])
GRADIENT AFTER ZEROING THE GRADIENT
tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]])


Notice that when we call backward for the second time with the same argument, the value of the gradient is different. This happens because when doing backward propagation, PyTorch accumulates the gradients, i.e. the value of computed gradients is added to the grad property of all leaf nodes of computational graph. If you want to compute the proper gradients, you need to zero out the grad property before. In real-life training an optimizer helps us to do this.



Previously we were calling backward() function without any parameters .This is equivalent to  backward(torch.tensor(1.0)) , whiuch is used for calaculating the gradients  in case of a scalar valued function .

now lets use and load the model


In [None]:
import torch
import torch.vision.models as models  #from here we will use the torch vision models

model = models.vgg(weights = "IMAGENET1K_V1")
#now we save the models
torch.save(model.state_dict() , 'model_weights.pth')

In [5]:
model = models.vgg16()
model.load_state_dict(torch.load('model_weights.pth') , weights_only = True)
model.eval()

NameError: name 'models' is not defined