### grad of parameters in Pytorch

In [9]:
# 모델 선언

import torch

class sampleClass(torch.nn.Module):
	def __init__(self) -> None:
		super().__init__()
		self.linear1 = torch.nn.Linear(3, 4)
		self.relu = torch.nn.ReLU()
		self.linear2 = torch.nn.Linear(4, 2)
		self.ff = torch.nn.Sequential(
		self.linear1,
		self.relu,
		self.linear2
	)
	def forward(self, x):
		return self.ff(x)



In [14]:
# named_parameters()

x = torch.randn(15).reshape(5,3)
target = torch.tensor([1., -1.0])
model = sampleClass()
y = model(x)
loss = torch.square(target-y).mean(dim=-1)
loss = loss.mean()
loss.backward()

for name, param in model.named_parameters():
	print(name, '\n', param.grad, '\n')

linear1.weight 
 tensor([[-0.0211,  0.0067, -0.0182],
        [-0.1526, -0.0175,  0.1424],
        [ 0.0459, -0.0131,  0.0071],
        [ 0.2053, -0.0216, -0.2782]]) 

linear1.bias 
 tensor([-0.0190,  0.1522,  0.0158, -0.5933]) 

linear2.weight 
 tensor([[-0.1196, -0.2009, -0.1631, -0.4590],
        [ 0.0122,  0.0975, -0.0093,  0.1346]]) 

linear2.bias 
 tensor([-1.0909,  0.4438]) 



backward() vs autograd.grad()

In [17]:
# loss.backward()
model = sampleClass()
model.train()

target = torch.tensor([1., -1.0])

input = torch.randn(2,3 , requires_grad=True)
output = model(input)
loss = torch.square(target-output).mean(dim=-1)
loss = torch.sum(loss)
loss.backward()

# 모델의 Weight와 Bias와 Input들의 grad에 미분 값들이 저장된다.
print("Input\n",input.grad)
for name, param in model.named_parameters():
    print(name, '\n', param.grad)
    print()


Input
 tensor([[-0.0122, -0.0181,  0.0241],
        [ 0.0000,  0.0000,  0.0000]])
linear1.weight 
 tensor([[ 0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000],
        [-0.0460,  0.0031,  0.0577]])

linear1.bias 
 tensor([0.0000, 0.0000, 0.0000, 0.0428])

linear2.weight 
 tensor([[ 0.0000,  0.0000,  0.0000, -0.6578],
        [ 0.0000,  0.0000,  0.0000,  0.7662]])

linear2.bias 
 tensor([-2.3440,  2.3521])



In [18]:
# torch.autograd.grad
model2 = sampleClass()
model2.train()

input = torch.randn(2,3 , requires_grad=True)
output = model2(input)
loss = torch.square(target-output).mean(dim=-1)
loss = torch.sum(loss)
# input에 대한 loss의 기울기만 반환만 함 (grad에 저장 X)
print(torch.autograd.grad(outputs=loss, inputs=input))

# 모델의 Weight와 Bias와 Input들의 grad에 미분 값들이 저장되지 않는다.
print("Input\n",input.grad)
for name, param in model2.named_parameters():
    print(name, '\n', param.grad, '\n')


(tensor([[ 0.0637,  0.0095, -0.1384],
        [-0.0573, -0.0337,  0.0637]]),)
Input
 None
linear1.weight 
 None 

linear1.bias 
 None 

linear2.weight 
 None 

linear2.bias 
 None 



In [None]:
# optimizer.zero_grad() # Reset gradients
# for i, (inputs, labels) in enumerate(dataloader):
#     outputs = model(inputs)
#     loss = criterion(outputs, labels)
#     loss.backward() # Gradients are accumulated
    
#     if (i + 1) % gradient_accumulation_steps == 0:
#         optimizer.step() # Update weights
#         optimizer.zero_grad() # Reset gradients for the next iteration

