# Technique about DeepLearning

In [3]:
import numpy as np

### 1, SGD(stochastic gradient descent)

$$
W \leftarrow W - \eta \frac{\sigma L}{\sigma W}
$$

This mean to update the value on the left side with the value on the right side

In [4]:
class SGD:
    def __init__(self, lr=0.01): # lr: Learning rate
        self.lr = lr

    def update(self, params, grads):
        for key in params.keys():
            params[key] -= self.lr * grads[key]

### 2, Momentum

$$
v \leftarrow \alpha v - \eta \frac{\sigma L}{\sigma W}
$$

$$
W \leftarrow W + v
$$

In [5]:
class Momentum:
    def __init__(self, lr=0.01, momentum=0.9): # momentum: Deserves alpha in the above formula
        self.lr = lr
        self.momentum = momentum
        self.v = None

    def __update(self, params, grads):
        if self.v is None:
            self.v = {}
            for key, val in params.items():
                self.v[key] = np.zeros_like(val)

        for key in params.keys():
            self.v[key] = self.momentum * self.v[key] - self.lr * grads[key]
            params[key] += self.v[key]

### 3, AdaGrad

$$
h \leftarrow h + \frac{\sigma L}{\sigma W} \times \frac{\sigma L}{\sigma W}
$$

$$
W \leftarrow W - \eta \frac{1}{\sqrt{h}} \frac{\sigma L}{\sigma W}
$$

In [None]:
class AdaGrad:
    def __init__(self, lr=0.01):
        self.lr = lr
        self.h = None

    def update(self, params, grads):
        if self.h is None:
            self.h = {}
            for key, val in params.items():
                self.h[key] = np.zeros_like(val)

        for key in params.keys():
            self.h[key] += grads[key] * grads[key]
            params[key] -= self.lr * grads[key] / (np.sqrt(self.h[key]) + 1e-7)