# Simple Autograd Function

In [None]:
import torch
from torch.autograd import Function

In [None]:
class ModifiedGradient(Function):
    @staticmethod
    def forward(ctx, input):
        ctx.save_for_backward(input)
        return input * -10000

    @staticmethod
    def backward(ctx, grad_output):
        input = ctx.saved_tensors[0]
        # Add a constant to the gradient
        # constant = 1.0
        # modified_grad = grad_output + constant
        return grad_output * 10000 * input


In [None]:
input = torch.tensor(2.0, requires_grad=True)
output = ModifiedGradient.apply(input)
output.backward()
print(input.grad)  # Should print 2.0


In [None]:
output

# Autograd Function with Network

In [9]:
import numpy as np
import random
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.autograd import Function
from torch.optim.optimizer import Optimizer
from operators import operators

class grad_generator(Function):
    @staticmethod
    def forward(ctx, input):
        ctx.save_for_backward(input)
        return input 

    @staticmethod
    def backward(ctx, grad_output):
        # Add a constant to the gradient
        # return grad_output * 1000
        return grad_output


class MyNetwork(nn.Module):
    def __init__(self):
        super(MyNetwork, self).__init__()
        self.linear = nn.Linear(1, 1)

    def forward(self, x):
        x = self.linear(x)
        return x


class SGD(Optimizer):
    def __init__(self, params, operator=None, lr=0.03):

        defaults = dict(lr=lr, operator=operator, old_params={})
        super(SGD, self).__init__(params, defaults)

    def step(self, closure=None):
        loss = None
        if closure is not None:
            with torch.enable_grad():
                loss = closure()

        for group in self.param_groups:
            for l, p in enumerate(group['params']):
                if p.grad is None:
                    continue
                
                if group['operator'] is None:
                    p.data.add_(p.grad, alpha=-group['lr'])
                
                else:
                    if l not in group['old_params']:#FIRST_ITERATION
                        group['old_params'][l] = p.data.clone().detach()
                        group['old_params'][l].grad = p.grad.clone()
                        grad_values = group['operator'](p, group['old_params'][l], group['lr'])
                        p.data.add_(torch.tensor(grad_values), alpha=-group['lr'])
                    else: #continue_of_the_iterations
                        grad_values = group['operator'](p, group['old_params'][l], group['lr'])
                        group['old_params'][l] = p.data.clone().detach()
                        group['old_params'][l].grad = p.grad.clone()
                        print(p.data)
                        p.data.add_(torch.tensor(grad_values), alpha=-group['lr'])
                        print(p.data)
                    
        return loss


torch.manual_seed(1)
np.random.seed(1)
random.seed(1)
model = MyNetwork()
for param in model.parameters():
    print(param)

Parameter containing:
tensor([[0.5153]], requires_grad=True)
Parameter containing:
tensor([-0.4414], requires_grad=True)


In [10]:
criterion = torch.nn.MSELoss()
# optim = SGD(model.parameters(), operators(None).fractional_temp)
optim = SGD(model.parameters(), None)
# optim = Adam(model.parameters())
input = torch.tensor([[1.]], requires_grad=True)
target = torch.tensor([[1.]])
optim.zero_grad()
output = model(input)

In [11]:
w = 0.5153
b = - 0.4414
O = w * input[0] + b
print(f"O: {O}")
print(f"rL/rW: {(2 * O - 2 * target[0]) * input[0]}")
print(f"Loss: {(target[0] - output)**2}")

O: tensor([0.0739], grad_fn=<AddBackward0>)
rL/rW: tensor([-1.8522], grad_fn=<MulBackward0>)
Loss: tensor([[0.8577]], grad_fn=<PowBackward0>)


In [12]:
loss = criterion(output, target)

In [13]:
loss = grad_generator.apply(loss)

In [14]:
for param in model.parameters():
    print(param)

Parameter containing:
tensor([[0.5153]], requires_grad=True)
Parameter containing:
tensor([-0.4414], requires_grad=True)


In [15]:
loss

tensor(0.8577, grad_fn=<grad_generatorBackward>)

In [16]:
loss.backward()
for param in model.parameters():
    print(param.grad)

tensor([[-1.8522]])
tensor([-1.8522])


In [17]:
optim.step()
for param in model.parameters():
    print(param)

Parameter containing:
tensor([[0.5708]], requires_grad=True)
Parameter containing:
tensor([-0.3858], requires_grad=True)


In [18]:
w - (-1.8522) * 0.03

0.570866

In [19]:
b - (-1.8522) * 0.03

-0.385834

In [20]:
input = torch.tensor([[3.]], requires_grad=True)
target = torch.tensor([[3.]])
optim.zero_grad()
output = model(input)


In [21]:
loss = criterion(output, target)
loss.backward()
for param in model.parameters():
    print(param.grad)

tensor([[-10.0399]])
tensor([-3.3466])


In [22]:
optim.step()
for param in model.parameters():
    print(param)

Parameter containing:
tensor([[0.8720]], requires_grad=True)
Parameter containing:
tensor([-0.2854], requires_grad=True)
