In [169]:
import torch
import torch.nn as nn

In [172]:
use_gpu = False
device = 'cuda' if torch.cuda.is_available() and use_gpu else 'cpu'

**Задание 1**

Написать на PyTorch forward и backward полносвязного слоя без использования autograd

In [173]:
# Функция активации и ее производная
def sigmoid (x):
    return 1/(1 + torch.exp(-x))

def derivatives_sigmoid(x):
    return x * (1 - x)

In [174]:
class NN(nn.Module):
    def __init__(self, input_dim: int, output_dim: int):
        super().__init__()
        self.linear = nn.Linear(in_features=input_dim, out_features=output_dim)
        self.activation = sigmoid
        self.saved_tensors = None

    def forward(self, input_x):
        self.saved_tensors = input_x
        return self.activation(self.linear(input_x))

    def backward(self, grad_output):
        return grad_output * derivatives_sigmoid(self.saved_tensors)

In [175]:
test_tensor = torch.rand([9, 9]).to(device=device)

In [176]:
model = NN(input_dim=9, output_dim=9)
model.train()
model.to(device=device)

NN(
  (linear): Linear(in_features=9, out_features=9, bias=True)
)

In [177]:
result = model(test_tensor)
result

tensor([[0.5541, 0.5383, 0.4738, 0.6457, 0.5474, 0.6664, 0.5918, 0.4006, 0.4379],
        [0.5459, 0.5670, 0.5318, 0.5698, 0.4788, 0.6731, 0.5361, 0.3977, 0.4624],
        [0.5440, 0.5672, 0.4988, 0.5366, 0.5123, 0.6452, 0.4958, 0.3895, 0.4970],
        [0.5569, 0.5453, 0.4865, 0.5086, 0.4844, 0.6084, 0.5532, 0.4196, 0.4661],
        [0.5443, 0.6556, 0.4524, 0.5720, 0.4954, 0.6671, 0.5224, 0.4532, 0.4831],
        [0.5482, 0.6074, 0.4579, 0.5372, 0.4936, 0.6083, 0.5206, 0.4317, 0.4671],
        [0.5610, 0.5971, 0.4463, 0.6407, 0.5313, 0.6478, 0.5520, 0.4025, 0.4591],
        [0.5688, 0.5629, 0.4711, 0.5275, 0.5205, 0.6105, 0.5831, 0.3888, 0.4759],
        [0.5879, 0.5134, 0.4587, 0.5353, 0.5178, 0.6085, 0.6095, 0.3873, 0.4556]],
       grad_fn=<MulBackward0>)

**Задание 2**

Написать 1-2 адаптивных оптимизатора


In [181]:
# Пример из лекции
class SDGMomentum:
    def __init__(self, momentum, lr, model):
        self.momentum = momentum
        self.lr = lr
        self.velocity = torch.zeros_like(model)
        self.model = model

    def step(self, grad):
        self.velocity = self.momentum * self.velocity - self.lr * grad
        self.model += self.velocity

In [220]:
class AdaGrad:
    def __init__(self, lr, model):
        self.accumulated = torch.zeros_like(model)
        self.lr = lr
        self.adapt_lr = lr
        self.model = model

    def step(self, grad):
        self.accumulated += grad**2
        self.adapt_lr = self.lr / torch.sqrt(self.accumulated)
        self.model -= self.adapt_lr * grad

In [235]:
class RMSprop:
    def __init__(self, rho, lr, model):
        self.accumulated = torch.zeros_like(model)
        self.rho = rho
        self.lr = lr
        self.adapt_lr = lr
        self.model = model

    def step(self, grad):
        self.accumulated += self.rho * self.accumulated + (1 - self.rho) * grad**2
        self.adapt_lr = self.lr / torch.sqrt(self.accumulated)
        self.model -= self.adapt_lr * grad

**Задание 3**

Решить задачу нахождения корней квадратного уравнения методом градиентного спуска

In [263]:
def func(x):
    return (3 * x + 9) ** 2

def grad_f(x):
    return 2 * (3 * x + 9)

def solver(init_x, optimizer, max_iter=1000):
    g = grad_f(init_x)
    optim = optimizer
    for i in range(max_iter):
        optim.step(g)
        g = grad_f(optim.model)
    print(optim.model)
    return optim.model

In [288]:
x = torch.tensor(6.)
_ = solver(init_x=x, optimizer=SDGMomentum(lr=0.01, momentum=0.95, model=x))

tensor(-3.)


In [294]:
x = torch.tensor(6.)
_ = solver(init_x=x, optimizer=AdaGrad(lr=0.3, model=x), max_iter=10000)

tensor(-3.0000)


In [307]:
x = torch.tensor(6.)
_ = solver(init_x=x, optimizer=RMSprop(lr=0.9, rho=0.99, model=x), max_iter=1000)

tensor(-3.)
