In [1]:
import numpy as np

In [2]:
def sigmoid(x):
    return 1. / (1 + np.exp(-x))

def sigmoid_backward(da, x):
    sig = sigmoid(x)
    
    return da * sig * (1 - sig)

def relu(x):
    return np.maximum(0., x)

def relu_backward(da, x):
    da = np.array(da, copy = True)
    da[x <= 0] = 0;
    return da;

In [3]:
def mse_loss(t, y):
    return (t - y) ** 2

def d_mse_loss(t, y):
    return 2 * (y - t) 


In [4]:
class LinearLayer:
    def __init__(self, n_inp, n_out, activation='sigmoid'):
        self.w = np.random.randn(n_out, n_inp) * 0.1
        self.b = np.random.randn(n_out, 1) * 0.1
        if activation == 'sigmoid':
            self.activ = sigmoid
        if activation == 'relu':
            self.activ = relu
        elif activation == 'None':
            self.activ = None
        else:
            raise Exception(f'Unknown activation "{activation}"')
        self._clear_state()

    def _clear_state(self):
        self.lin = None
        self.inp = None
        self.d_w = None
        self.d_b = None

    def forward(self, x):
        self.inp = x
        self.lin = np.dot(self.w, x) + self.b
        activ = self.activ(self.lin) if self.activ is not None else self.lin

        return activ

    def backward(self, grad): # grad = d L / d z    Dout 
        # grad * dz / d lin
        if self.activ == sigmoid:
            grad_lin = sigmoid_backward(grad, self.lin) 
        elif self.activ == relu:
            grad_lin = relu_backward(grad, self.lin)
        else:
            grad_lin = grad
        # grad_lin * d lin / d w 
        m = self.inp.shape[1]
        self.d_w = np.dot(grad_lin, self.inp.T) / m    # d_in dOut
        # grad_lin * d lin / d b 
        self.d_b = np.sum(grad_lin, axis=1, keepdims=True) / m

        grad = np.dot(self.w.T, grad_lin)

        return grad


In [11]:
from typing import Tuple

class Model:
    def __init__(self, arch: Tuple[Tuple[int, int]], activation):
        self.layers = []
        for i, p in enumerate(arch):
            self.layers.append(
                LinearLayer(p[0], p[1], 
                            activation=activation if i < len(arch)-1 else 'None')
                )
        self._clear_state()
    
    def _clear_state(self):
        for l in self.layers:
            l._clear_state()

    def forward(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        
        return x

    def backward(self, grad):
        for layer in reversed(self.layers):
            grad = layer.backward(grad)

        return grad 

In [None]:
# Task 2
# Realize SGD Momentum optimizer
# velocity = momentum * velocity - lr * gradient
# w = w + velocity

In [8]:
#для одного слоя
class SGDMomentum:
    def __init__(self, model: LinearLayer, lr=0.001, momentum=0.99):
        self.lr = lr
        self.m = momentum
        self.model = model

        self.vel_w = np.zeros_like(model.w)
        self.vel_b = np.zeros_like(model.b)

    def step(self):
        self.vel_w = self.m * self.vel_w - self.lr * self.model.d_w
        self.vel_b = self.m * self.vel_b - self.lr * self.model.d_b

        self.model.w += self.vel_w
        self.model.b += self.vel_b

    def zero_grad(self):
        self.model.d_w = np.zeros_like(self.model.d_w)
        self.model.d_b = np.zeros_like(self.model.d_b)




In [9]:
x = np.random.uniform(-2, 2, 20000)
y = x**2 + np.random.randn()*0.1


In [10]:
model = Model(((1, 100), (100, 1)), activation='relu')
optim = SGDMomentum(model)
for e in range(20):
    for i, (val, t) in enumerate(zip(x, y)):
        optim.zero_grad()
        pred = model.forward(np.array([[val]]))
        loss = mse_loss(t, pred)
        grad = d_mse_loss(t, pred)
        model.backward(grad)
        optim.step()
        
    print(e, model.forward([[1]]), model.forward([[2]]), model.forward([[-1]]), model.forward([[-2]]))

AttributeError: 'Model' object has no attribute 'w'

In [None]:
# Task 3
# Find the roots of square equation by gradient descent
# x ** 2 - 6 * x + 4 = 0


# посчитать производную от преобразованной функции
# надо начать движение от начальной точки в направлении антиградиента с заданным шагом
# x = x - lr * grad(x)
# всегда ли сойдемся за приемлемое количество шагов?
# важна ли начальная точка?
# как найти второй корень?
# как вляет ЛР?


In [None]:
# Знакомство с PyTorch
import torch
import torch.nn as nn

In [None]:
t1 = torch.tensor([1, 2, 3])
t2 = torch.tensor([2, 4, 8])

In [None]:
t.size(), t.dim()

(torch.Size([3]), 1)

In [None]:
t1.unsqueeze(0)

tensor([[1, 2, 3]])

In [None]:
batch = torch.cat((t1.unsqueeze(0), t2.unsqueeze(0)), dim=0)

In [None]:
linear = nn.Linear(3, 5)

linear(batch.float())

tensor([[ 0.0818, -0.2182,  0.5167, -0.1722,  2.6926],
        [ 0.2587, -0.9833,  1.4870, -1.0197,  5.7247]],
       grad_fn=<AddmmBackward0>)

In [None]:
class MyModel(nn.Module):
    def __init__(self,):
        super().__init__()

        self.layer1 = nn.Linear(3, 5)
        self.activation = nn.Sigmoid()
        self.layer2 = nn.Linear(5, 3)
    
    def forward(self, x):
        x = self.layer1(x)
        x = self.activation(x)
        x = self.layer2(x)
        
        if not self.training:
            x = nn.Softmax()(x)
        
        return x


model = MyModel()

model.train()
# model.eval() 

model(batch.float())

tensor([[0.2445, 0.2656, 0.5629],
        [0.1066, 0.2646, 0.5433]], grad_fn=<AddmmBackward0>)