In [1]:
import numpy as np
import torch
import torch.nn as nn
from scipy.misc import derivative 
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
a1 = np.linspace(2, 20, 100)
a2 = np.linspace(7, 40, 100)

In [3]:
t1, t2 = torch.from_numpy(a1.reshape(10,10)), torch.from_numpy(a2.reshape(10,10))

In [4]:
batch = torch.cat((t1.unsqueeze(0), t2.unsqueeze(0)), dim=0)

In [5]:
class Fmodel(nn.Module):
    
    def __init__(self, ):
        super().__init__()
        self.layer1 = nn.Linear(10, 6)
        self.act = nn.ReLU()
        self.layer2 = nn.Linear(6, 4)
        
    def forward(self, x):
        x = self.layer1(x)
        x = self.act(x)
        x = self.layer2(x)
        
        if not self.training:
            x = nn.Softmax()(x)
            
        return x

In [6]:
model = Fmodel()
model.forward(batch.float())

tensor([[[ 0.4590,  0.2191, -0.7105, -0.5532],
         [ 0.8409,  0.1559, -1.1488, -0.8073],
         [ 1.2228,  0.0927, -1.5870, -1.0613],
         [ 1.6048,  0.0295, -2.0253, -1.3153],
         [ 1.9940, -0.0580, -2.4415, -1.5860],
         [ 2.3837, -0.1473, -2.8561, -1.8579],
         [ 2.7734, -0.2366, -3.2708, -2.1297],
         [ 3.1631, -0.3259, -3.6854, -2.4016],
         [ 3.5529, -0.4152, -4.1000, -2.6734],
         [ 3.9426, -0.5045, -4.5146, -2.9453]],

        [[ 1.5043, -0.0634, -1.9354, -1.3272],
         [ 2.2068, -0.1870, -2.7319, -1.7982],
         [ 2.9213, -0.3507, -3.4920, -2.2966],
         [ 3.6358, -0.5144, -4.2522, -2.7950],
         [ 4.3503, -0.6781, -5.0123, -3.2934],
         [ 5.0647, -0.8418, -5.7725, -3.7919],
         [ 5.7792, -1.0055, -6.5326, -4.2903],
         [ 6.4937, -1.1692, -7.2928, -4.7887],
         [ 7.2082, -1.3329, -8.0529, -5.2871],
         [ 7.9227, -1.4966, -8.8131, -5.7855]]], grad_fn=<ViewBackward0>)

**Task 2**
* Realize Adagrad optimizer
* accumulated += gradient^2
* adapt_lr = lr / sqrt(accumulated)
* w = w - adapt_lr*gradient

In [7]:
class Adagrad:
    def __init__(self, model: nn.Linear, lr=0.001):
        self.lr = lr
        self.model = model

        self.vel_w = np.zeros_like(model.w)
        self.vel_b = np.zeros_like(model.b)
        self.accum_w += self.model.d_w**2
        self.accum_b += self.model.d_b**2
        self.adapt_lr_w = lr / np.sqrt(self.accum_w)
        self.adapt_lr_b = lr / np.sqrt(self.accum_b)

    def step(self):
        self.vel_w = self.m * self.vel_w - self.adapt_lr_w * self.model.d_w
        self.vel_b = self.m * self.vel_b - self.adapt_lr_b * self.model.d_b

        self.model.w += self.vel_w
        self.model.b += self.vel_b

    def zero_grad(self):
        self.model.d_w = np.zeros_like(self.model.d_w)
        self.model.d_b = np.zeros_like(self.model.d_b)

**Task 3**

In [8]:
#ax2 + bx + c = 0

function = lambda x: x**2-9


def step(x_new, x_prev, iterations=100):
        
    x_list, y_list = [x_new], [function(x_new)]
    while iterations > 0 and function(x_new) != 0:
        
        x_prev = x_new
        x_new = x_prev - function(x_prev)/derivative(function, x_prev)
        x_list.append(x_new)
        y_list.append(function(x_new))
        iterations -= 1
    print(f"step {len(x_list)}: {x_list[-1]}")

In [9]:
step(-4, 0)

step 6: -3.0
