In [None]:
# Домашнее задание



In [None]:
# задача 1

# Найти корни квадратного уравнения методом градиентного спуска
# x ** 2 - 5 * x + 4 = 0

# надо начать движение от начальной точки в направлении антградиента с заданным шагом
# x = x - lr * grad(x)
# всегда ли сойдемся за приемлемое количество шагов?
# важна ли начальная точка?
# как найти второй корень?
# как влияет ЛР?

In [15]:
import pandas as pd
import numpy as np

In [2]:
def func(x):
    return x ** 2 - 5 * x + 4

def grad(x):
    return 2 * x - 5

def gradient_descent(starting_x, lr, epochs):
    x = starting_x
    for i in range(epochs):
        gradient = grad(x)
        x = x - lr * gradient
        if abs(gradient) < 1e-6:  # условие остановки, если градиент близок к нулю
            break
    return x

# Параметры
starting_x = 0  # начальная точка
lr = 0.01       # величина шага
epochs = 1000   # количество итераций

root1 = gradient_descent(starting_x, lr, epochs)
print(f"Найденный корень: {root1}")

# Для нахождения второго корня, можно изменить начальную точку
starting_x2 = 10  # другая начальная точка
root2 = gradient_descent(starting_x2, lr, epochs)
print(f"Найденный второй корень: {root2}")

Найденный корень: 2.499999514836731
Найденный второй корень: 2.5000004888968688


In [None]:
# Задача 2

# Реализовать адаптивний оптимизатор с подстраивающимся LR

In [None]:
# Task 2
# Realize forward and backward pass for linear layer with sigmoid activation

In [16]:
def sigmoid(x):
    return 1. / (1 + np.exp(-x))

def sigmoid_backward(da, x):
    sig = sigmoid(x)
    
    return da * sig * (1 - sig)

def relu(x):
    return np.maximum(0., x)

def relu_backward(da, x):
    da = np.array(da, copy = True)
    da[x <= 0] = 0
    return da

In [17]:
def mse_loss(t, y):
    return (t - y) ** 2

def d_mse_loss(t, y):
    return 2 * (y - t) 


In [18]:
class LinearLayer:
    def __init__(self, n_inp, n_out, activation='sigmoid'):
        self.w = np.random.randn(n_out, n_inp) * 0.1
        self.b = np.random.randn(n_out, 1) * 0.1
        if activation == 'sigmoid':
            self.activ = sigmoid
        if activation == 'relu':
            self.activ = relu
        elif activation == 'None':
            self.activ = None
        else:
            raise Exception(f'Unknown activation "{activation}"')
        self._clear_state()

    def _clear_state(self):
        self.lin = None
        self.inp = None
        self.d_w = None
        self.d_b = None

    def forward(self, x):
        self.inp = x
        self.lin = np.dot(self.w, x) + self.b
        activ = self.activ(self.lin) if self.activ is not None else self.lin
        
        return activ

    def backward(self, grad): # grad = d L / d z    Dout 
        # grad * dz / d lin
        if self.activ == sigmoid:
            grad_lin = sigmoid_backward(grad, self.lin)
        if self.activ == relu:
            grad_lin = relu_backward(grad, self.lin)
        else:
            grad_lin = grad
        # grad_lin * d lin / d w 
        m = self.inp.shape[1]
        self.d_w = grad_lin @ self.inp.T / m
        # grad_lin * d lin / d b 
        self.d_b = np.sum(grad_lin, axis=1, keepdims=True) / m
        
        grad = np.dot(self.w.T, grad_lin)
        
        return grad

In [19]:
from typing import Tuple

class Model:
    def __init__(self, arch: Tuple[Tuple[int, int]], activation):
        self.layers = []
        for i, p in enumerate(arch):
            self.layers.append(
                LinearLayer(p[0], p[1], 
                            activation=activation if i < len(arch)-1 else 'None')
                )
        self._clear_state()
    
    def _clear_state(self):
        for l in self.layers:
            l._clear_state()

    def forward(self, x):
        for l in self.layers:
            x = l.forward(x)
        
        return x

    def backward(self, grad):
        for l in reversed(self.layers):
            grad = l.backward(grad)

        return grad 

In [None]:
# Task 3
# Realize SGD 
# velocity = SGD * velocity - lr * gradient
# w = w + velocity

In [12]:
class SGD:
    def __init__(self, model, lr=0.0001):
        self.model = model
        self.lr = lr

    def step(self):
        for layer in self.model.layers:
            layer.w -= self.lr * layer.d_w
            layer.b -= self.lr * layer.d_b

    def zero_grad(self):
        self.model._clear_state()

# Пример использования оптимизатора SGD
model = Model(((1, 100), (100, 1)), activation='relu')
optim = SGD(model, lr=0.00001)
for e in range(20):
    print(e, model.forward([[1]]), model.forward([[2]]), model.forward([[-1]]), model.forward([[-2]]))
    for i, (val, t) in enumerate(zip(x, y)):
        optim.zero_grad()
        pred = model.forward(np.array([[val]]))
        loss = mse_loss(t, pred)
        grad = d_mse_loss(t, pred)
        model.backward(grad)
        optim.step()

0 [[-0.16162085]] [[-0.21652563]] [[-0.060777]] [[-0.03738077]]
1 [[1.86022001]] [[2.76312979]] [[2.18840731]] [[3.33059818]]
2 [[2.50503733]] [[4.10633088]] [[2.55191573]] [[4.17771888]]
3 [[2.43576914]] [[4.32115588]] [[2.42342654]] [[4.29521622]]
4 [[2.29616482]] [[4.38141625]] [[2.27300725]] [[4.34876254]]
5 [[2.16104474]] [[4.41558102]] [[2.13320263]] [[4.3823481]]
6 [[2.03733733]] [[4.43945792]] [[2.00541798]] [[4.4036934]]
7 [[1.92558706]] [[4.45607778]] [[1.89111635]] [[4.41969409]]
8 [[1.82619793]] [[4.4677268]] [[1.79043316]] [[4.431811]]
9 [[1.73869118]] [[4.47618792]] [[1.70222216]] [[4.44072034]]
10 [[1.66214826]] [[4.48237257]] [[1.62547847]] [[4.44778599]]
11 [[1.59540231]] [[4.48679181]] [[1.55890164]] [[4.45287245]]
12 [[1.53732645]] [[4.48989759]] [[1.50128098]] [[4.45656507]]
13 [[1.48675231]] [[4.49196248]] [[1.45121616]] [[4.45899567]]
14 [[1.44261975]] [[4.49321944]] [[1.40768949]] [[4.46062291]]
15 [[1.40389316]] [[4.4938533]] [[1.36953734]] [[4.46150711]]
16 [[1

In [14]:
print(e, model.forward([[1]]), model.forward([[2]]), model.forward([[-1]]), model.forward([[103]]))

19 [[1.26110143]] [[4.48985439]] [[1.22942235]] [[326.19511883]]
