In [None]:
# Домашнее задание



In [None]:
# задача 1

# Найти корни квадратного уравнения методом градиентного спуска
# x ** 2 - 5 * x + 4 = 0

# надо начать движение от начальной точки в направлении антградиента с заданным шагом
# x = x - lr * grad(x)
# всегда ли сойдемся за приемлемое количество шагов?
# важна ли начальная точка?
# как найти второй корень?
# как влияет ЛР?

In [None]:
# Задача 2

# Реализовать адаптивний оптимизатор с подстраивающимся LR

In [4]:
import numpy as np

In [None]:
# Task 2
# Realize forward and backward pass for linear layer with sigmoid activation

In [14]:
def sigmoid(x):
    return 1. / (1 + np.exp(-x))

def sigmoid_backward(da, x):
    sig = sigmoid(x)
    
    return da * sig * (1 - sig)

def relu(x):
    return np.maximum(0., x)

def relu_backward(da, x):
    da = np.array(da, copy = True)
    da[x <= 0] = 0
    return da

In [6]:
def mse_loss(t, y):
    return (t - y) ** 2

def d_mse_loss(t, y):
    return 2 * (y - t) 


In [7]:
class LinearLayer:
    def __init__(self, n_inp, n_out, activation='sigmoid'):
        self.w = np.random.randn(n_out, n_inp) * 0.1
        self.b = np.random.randn(n_out, 1) * 0.1
        if activation == 'sigmoid':
            self.activ = sigmoid
        if activation == 'relu':
            self.activ = relu
        elif activation == 'None':
            self.activ = None
        else:
            raise Exception(f'Unknown activation "{activation}"')
        self._clear_state()

    def _clear_state(self):
        self.lin = None
        self.inp = None
        self.d_w = None
        self.d_b = None

    def forward(self, x):
        self.inp = x
        self.lin = np.dot(self.w, x) + self.b
        activ = self.activ(self.lin) if self.activ is not None else self.lin
        
        return activ

    def backward(self, grad): # grad = d L / d z    Dout 
        # grad * dz / d lin
        if self.activ == sigmoid:
            grad_lin = sigmoid_backward(grad, self.lin)
        if self.activ == relu:
            grad_lin = relu_backward(grad, self.lin)
        else:
            grad_lin = grad
        # grad_lin * d lin / d w 
        m = self.inp.shape[1]
        self.d_w = grad_lin @ self.inp.T / m
        # grad_lin * d lin / d b 
        self.d_b = np.sum(grad_lin, axis=1, keepdims=True) / m
        
        grad = np.dot(self.w.T, grad_lin)
        
        return grad

In [16]:
from typing import Tuple

class Model:
    def __init__(self, arch: Tuple[Tuple[int, int]], activation):
        self.layers = []
        for i, p in enumerate(arch):
            self.layers.append(
                LinearLayer(p[0], p[1], 
                            activation=activation if i < len(arch)-1 else 'None')
                )
        self._clear_state()
    
    def _clear_state(self):
        for l in self.layers:
            l._clear_state()

    def forward(self, x):
        for l in self.layers:
            x = l.forward(x)
        
        return x

    def backward(self, grad):
        for l in reversed(self.layers):
            grad = l.backward(grad)

        return grad 

In [None]:
# Task 3
# Realize SGD Momentum optimizer
# velocity = momentum * velocity - lr * gradient
# w = w + velocity

In [9]:
#для всей модели
class SGDMomentum:
    def __init__(self, model: Model, lr= 0.0001, momentum=0.9):
        self.model = model
        self.lr = lr
        self.m = momentum
        self.vel = [[np.zeros_like(layer.w), 
                     np.zeros_like(layer.b)] for layer in model.layers]

    def step(self):
        for i, layer in enumerate(self.model.layers):
            self.vel[i][0] = self.vel[i][0] * self.m - self.lr * layer.d_w
            self.vel[i][1] = self.vel[i][1] * self.m - self.lr * layer.d_b
            layer.w += self.vel[i][0]
            layer.b += self.vel[i][1]
    
    def zero_grad(self):
        self.model._clear_state()

In [10]:
x = np.random.uniform(-3, 3, 20000)
y = x**2 + np.random.randn()*0.01


In [26]:
model = Model(((1, 100), (100, 1)), activation='relu')
optim = SGDMomentum(model, lr=0.00001)
for e in range(20):
    print(e, model.forward([[1]]), model.forward([[2]]), model.forward([[-1]]), model.forward([[-2]]))
    for i, (val, t) in enumerate(zip(x, y)):
        optim.zero_grad()
        pred = model.forward(np.array([[val]]))
        loss = mse_loss(t, pred)
        grad = d_mse_loss(t, pred)
        model.backward(grad)
        optim.step()
              


0 [[0.14292589]] [[0.15361447]] [[-0.06802049]] [[-0.25212235]]
1 [[1.73691623]] [[4.52343168]] [[1.71785465]] [[4.4722784]]
2 [[1.27641013]] [[4.52847582]] [[1.24859775]] [[4.47038131]]
3 [[1.07475161]] [[4.49743378]] [[1.04777576]] [[4.44256043]]
4 [[0.99191461]] [[4.45324514]] [[1.02059846]] [[4.40566313]]
5 [[1.01759544]] [[4.40409629]] [[1.00609816]] [[4.36605054]]
6 [[1.02322281]] [[4.35743983]] [[0.98782629]] [[4.32870643]]
7 [[1.01726055]] [[4.31567394]] [[0.97312012]] [[4.29390448]]
8 [[1.01804996]] [[4.27841353]] [[0.96933206]] [[4.26155704]]
9 [[1.01842707]] [[4.24453055]] [[0.9759856]] [[4.23142456]]
10 [[1.01748672]] [[4.21364916]] [[0.97994257]] [[4.20315236]]
11 [[1.0175353]] [[4.18512827]] [[0.98562422]] [[4.17610405]]
12 [[1.01881712]] [[4.1587174]] [[0.99191542]] [[4.15092475]]
13 [[1.01894928]] [[4.13438639]] [[0.99592802]] [[4.12752445]]
14 [[1.01840589]] [[4.11202279]] [[0.9984531]] [[4.10520499]]
15 [[1.01746739]] [[4.09109989]] [[1.00000837]] [[4.08437488]]
16 [[

In [22]:
print(e, model.forward([[1]]), model.forward([[2]]), model.forward([[-1]]), model.forward([[103]]))

19 [[1.01222796]] [[4.01537406]] [[1.02153453]] [[553.49301296]]
