In [1]:
!git clone https://github.com/Leejunho123/5G_project.git
%cd 5G_project/DeepLearning

Cloning into '5G_project'...
remote: Enumerating objects: 441, done.[K
remote: Counting objects: 100% (441/441), done.[K
remote: Compressing objects: 100% (278/278), done.[K
remote: Total 11469 (delta 333), reused 263 (delta 162), pack-reused 11028[K
Receiving objects: 100% (11469/11469), 245.94 MiB | 31.13 MiB/s, done.
Resolving deltas: 100% (2871/2871), done.
Checking out files: 100% (248/248), done.
/content/5G_project/DeepLearning


## 46 Optimizer로 수행하는 매개변수 갱신

### 46.1 Optimizer 클래스

In [2]:
class Optimizer:
    def __init__(self):
        self.target = None
        self.hooks = []

    def setup(self, target):
        self.target = target
        return self
    
    def update(self):
        params = [p for p in self.target.params() if p.grad is not None]
        for f in self.hooks:
            f(params)
        
        for param in params:
            self.update_one(param)

    def update_one(self, param):    
        raise NotImplementedError()

    def add_hook(self, f):
        self.hooks.append(f)
    

### 46.2 SGD 클래스 구현

In [None]:
class SGD(Optimizer):
    def __init__(self, lr=0.01):
      super().__init__()
      self.lr = lr

    def update_one(self, param):
      param.data -= self.lr * param.grad.data

### 46.3 SGD 클래스를 사용한 문제 해결

In [2]:
import numpy as np
from dezero import Variable
from dezero import optimizers
import dezero.functions as F
from dezero.models import MLP



np.random.seed(0)
x = np.random.rand(100,1)
y = np.sin(2 * np.pi * x) + np.random.rand(100,1)

lr = 0.2
max_iter = 10000
hidden_size = 10

model = MLP((hidden_size, 1))
optimizer = optimizers.SGD(lr)
optimizer.setup(model)

<dezero.optimizers.SGD at 0x7f09ebe48950>

In [3]:
for i in range(max_iter):
  y_pred = model(x)
  loss = F.mean_squared_error(y,y_pred)

  model.cleargrads()
  loss.backward()

  optimizer.update()

  if i % 1000 ==0:
    print(loss)

variable(0.8165178492839196)
variable(0.24990280802148895)
variable(0.24609876581126014)
variable(0.2372159081431807)
variable(0.20793216413350177)
variable(0.12311905720649349)
variable(0.07888166506355149)
variable(0.07655073683421633)
variable(0.07637803086238223)
variable(0.07618764131185574)


### 46.4 SGD 이외의 최적화 기법

In [6]:
import numpy as np
from dezero import Optimizer

class MomentumSGD(Optimizer):
    def __init__(self, lr = 0.01, momentum = 0.9):
        super().__init__()
        self.lr = lr
        self.momentum = momentum
        self.vs = {}

    def update_one(self, param):
        v_key = id(param)
        if v_key not in self.vs:
            self.vs[v_key] = np.zeros_like(param.data)
        v = self.vs[v_key]
        v *= self.momentum
        v -= self.lr * param.grad.data
        param.data += v