# 파이토치 과적합 방지를 위한 방법들 
1. weight decay (가중치 감소)
2. dropout (드롭아웃)
3. early stopping (조기 종료)

In [59]:
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

In [60]:
import pandas as pd
X_train = pd.read_excel("https://ds-lecture-data.s3.ap-northeast-2.amazonaws.com/MouseProtein/mouse_protein_X.xls", header=None)
y_train = pd.read_excel("https://ds-lecture-data.s3.ap-northeast-2.amazonaws.com/MouseProtein/mouse_protein_label.xls", header=None)

X_train = torch.Tensor(X_train.values)
y_train = torch.Tensor(y_train.values)


In [61]:
# 파이토치 튜토리얼을 위한 기본 모델
class PytorchBaselineModel(nn.Module):
  def __init__(self, input_dim):
    super().__init__()
    self.linear = nn.Linear(input_dim, 1)
    self.sigmoid = nn.Sigmoid()

  def forward(self, x):
    x = self.linear(x)
    x = self.sigmoid(x)
    return x

In [62]:
# 모델 선언
input_dim = X_train.shape[1]
model = PytorchBaselineModel(input_dim)

In [63]:
# 훈련 파라미터 선언
epochs = 10000
optimizer = optim.Adam(params=model.parameters(),
                       lr=0.1)

### 1. weight decay  
가중치 값이 너무 커지지 않도록 손실함수나 옵티마이저에 규제항을 추가함

1. L1 규제 : 가중치 w들의 절대값 합계를 비용 함수에 추가

2. L2 규제 : 모든 가중치 w들의 제곱합을 비용 함수에 추가  

In [64]:
# 'weight_decay'을 옵티마이저에 추가한다 (l2노름이 디폴트)
optimizer = optim.Adam(params=model.parameters(),
                       lr=0.1,
                       weight_decay=1e-5)

### 2. dropout


In [65]:
# 파이토치 튜토리얼을 위한 기본 모델
class PytorchBaselineModel(nn.Module):
  def __init__(self, input_dim):
    super().__init__()
    self.linear = nn.Linear(input_dim, 1)
    self.dropout = nn.Dropout(0.25)  # 드롭아웃 시킬 비율
    self.sigmoid = nn.Sigmoid()

  def forward(self, x):
    x = self.linear(x)
    x = self.dropout(x)
    x = self.sigmoid(x)
    return x

### 3. early stopping
파이토치는 케라스와 다르게 조기종료 기능을 직접 제공하지 않는다  
class로 구현해야 한다! 

In [70]:
# 조기종료 클래스
class EarlyStopping:
  def __init__(self, patience=5):
    self.loss = np.inf
    self.patience = 0
    self.patience_limit = patience
        
  def step(self, loss):
    if self.loss > loss:
      self.loss = loss
      self.patience = 0
    else:
      self.patience += 1
  
  def is_stop(self):
    return self.patience >= self.patience_limit

In [71]:
# 학습 파라미터 설정
epochs = 10000
optimizer = optim.Adam(model.parameters(),
                       lr=0.001)

In [74]:
# 조기종료가 들어간 학습과정
early_stop = EarlyStopping(patience=5)

for epoch in range(epochs):
  hypothesis = model(X_train)
  loss = F.mse_loss(hypothesis, y_train)
  optimizer.zero_grad()
  loss.backward()

  early_stop.step(loss.item())
  if early_stop.is_stop():
    break

  optimizer.step()

  print(f'Epoch {epoch:4d}/{epochs} Cost: {loss.item()}')


Epoch    0/10000 Cost: 0.47075343132019043
Epoch    1/10000 Cost: 0.47075343132019043
Epoch    2/10000 Cost: 0.47075343132019043
Epoch    3/10000 Cost: 0.47075340151786804
Epoch    4/10000 Cost: 0.47075334191322327
Epoch    5/10000 Cost: 0.47075334191322327
Epoch    6/10000 Cost: 0.4707533121109009
Epoch    7/10000 Cost: 0.4707533121109009
Epoch    8/10000 Cost: 0.4707532823085785
Epoch    9/10000 Cost: 0.4707532823085785
Epoch   10/10000 Cost: 0.4707532823085785
Epoch   11/10000 Cost: 0.4707532823085785
Epoch   12/10000 Cost: 0.4707532823085785
Epoch   13/10000 Cost: 0.4707532525062561
Epoch   14/10000 Cost: 0.4707532525062561
Epoch   15/10000 Cost: 0.4707532525062561
Epoch   16/10000 Cost: 0.4707532525062561
Epoch   17/10000 Cost: 0.4707532525062561
Epoch   18/10000 Cost: 0.4707532227039337
Epoch   19/10000 Cost: 0.4707532227039337
Epoch   20/10000 Cost: 0.4707532227039337
Epoch   21/10000 Cost: 0.4707532227039337
Epoch   22/10000 Cost: 0.4707532227039337
