# 파이토치 미니 프로젝트: 보스턴 집값 예측
유명한 데이터셋인 보스턴 집값 데이터셋으로 배운 것들을 활용해보겠습니다.

1. 파이토치 모델 선언
2. 가중치 초기화
3. 과적합 방지기술 3종 

In [360]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [361]:
# 데이터 임포트
from sklearn.datasets import load_boston
df = load_boston()
X_train = pd.DataFrame(df['data'], columns=df['feature_names'])
y_train = pd.DataFrame(df['target'], columns=['target'])


    The Boston housing prices dataset has an ethical problem. You can refer to
    the documentation of this function for further details.

    The scikit-learn maintainers therefore strongly discourage the use of this
    dataset unless the purpose of the code is to study and educate about
    ethical issues in data science and machine learning.

    In this special case, you can fetch the dataset from the original
    source::

        import pandas as pd
        import numpy as np


        data_url = "http://lib.stat.cmu.edu/datasets/boston"
        raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
        data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
        target = raw_df.values[1::2, 2]

    Alternative datasets include the California housing dataset (i.e.
    :func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing
    dataset. You can load the datasets as follows::

        from sklearn.datasets import fetch_california_h

In [362]:
X_train.head(3)

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03


In [363]:
y_train.head(3)

Unnamed: 0,target
0,24.0
1,21.6
2,34.7


In [364]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X_train, y_train,
                                                    test_size=0.1)

X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train,
                                                      test_size=0.2)

In [365]:
X_train = torch.Tensor(X_train.values)
X_valid = torch.Tensor(X_valid.values)
X_test = torch.Tensor(X_test.values)
y_train = torch.Tensor(y_train.values)
y_valid = torch.Tensor(y_valid.values)
y_test = torch.Tensor(y_test.values)

In [366]:
X_train.shape, y_train.shape

(torch.Size([364, 13]), torch.Size([364, 1]))

In [367]:
class Model(nn.Module):
  def __init__(self):
    super().__init__()
    self.layer1 = nn.Sequential(
        nn.Linear(13, 32)    
    )
    self.layer2 = nn.Sequential(
        nn.Linear(32, 16)
    )
    self.layer3 = nn.Sequential(
        nn.Linear(16, 1)
    )

  def forward(self, x):
    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    return x

  def weight_initialization(self):
    self.layer1[0].weight.data = nn.init.kaiming_uniform_(self.layer1[0].weight.data)
    self.layer2[0].weight.data = nn.init.kaiming_uniform_(self.layer2[0].weight.data)

In [368]:
model = Model()

In [369]:
model.weight_initialization()

In [370]:
model

Model(
  (layer1): Sequential(
    (0): Linear(in_features=13, out_features=32, bias=True)
  )
  (layer2): Sequential(
    (0): Linear(in_features=32, out_features=16, bias=True)
  )
  (layer3): Sequential(
    (0): Linear(in_features=16, out_features=1, bias=True)
  )
)

In [371]:
epochs=1000
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [372]:
for epoch in range(epochs):
  prediction = model(X_train)
  loss = F.mse_loss(prediction, y_train)
  # early_stop.step(loss.item())
  # if early_stop.is_stop():
  #   break
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()

  print(f'epoch: {epoch:4d}/{epochs}  loss: {loss.item():.6f}')

epoch:    0/1000  loss: 932.285217
epoch:    1/1000  loss: 26983.765625
epoch:    2/1000  loss: 2004.777954
epoch:    3/1000  loss: 5632.539551
epoch:    4/1000  loss: 13666.547852
epoch:    5/1000  loss: 9625.527344
epoch:    6/1000  loss: 2700.078613
epoch:    7/1000  loss: 109.267265
epoch:    8/1000  loss: 2096.355469
epoch:    9/1000  loss: 4852.842285
epoch:   10/1000  loss: 5339.650879
epoch:   11/1000  loss: 3570.031494
epoch:   12/1000  loss: 1309.992920
epoch:   13/1000  loss: 124.192322
epoch:   14/1000  loss: 421.121918
epoch:   15/1000  loss: 1518.475952
epoch:   16/1000  loss: 2364.759766
epoch:   17/1000  loss: 2347.887451
epoch:   18/1000  loss: 1594.168945
epoch:   19/1000  loss: 675.949097
epoch:   20/1000  loss: 130.004974
epoch:   21/1000  loss: 147.679108
epoch:   22/1000  loss: 547.533936
epoch:   23/1000  loss: 965.517090
epoch:   24/1000  loss: 1109.308350
epoch:   25/1000  loss: 915.417480
epoch:   26/1000  loss: 535.377075
epoch:   27/1000  loss: 200.043716
ep

In [373]:
# 모델 테스트
from sklearn.metrics import mean_squared_error, r2_score

with torch.no_grad():
  prediction = model(X_valid)
  mse = mean_squared_error(y_valid, prediction)
  r2 = r2_score(y_valid, prediction)

mse, r2

(32.51355, 0.5812235194320388)

In [375]:
# 드롭아웃의 추가

class Model(nn.Module):
  def __init__(self):
    super().__init__()
    self.layer1 = nn.Sequential(
        nn.Linear(13, 32),
        nn.Dropout(0.2)    
    )
    self.layer2 = nn.Sequential(
        nn.Linear(32, 16),
        nn.Dropout(0.2)
    )
    self.layer3 = nn.Sequential(
        nn.Linear(16, 1)
    )

  def forward(self, x):
    x = self.layer1(x)
    x = self.layer2(x)
    x = self.layer3(x)
    return x

  def weight_initialization(self):
    self.layer1[0].weight.data = nn.init.kaiming_uniform_(self.layer1[0].weight.data)
    self.layer2[0].weight.data = nn.init.kaiming_uniform_(self.layer2[0].weight.data)

In [376]:
model = Model()
model.weight_initialization()
model

Model(
  (layer1): Sequential(
    (0): Linear(in_features=13, out_features=32, bias=True)
    (1): Dropout(p=0.2, inplace=False)
  )
  (layer2): Sequential(
    (0): Linear(in_features=32, out_features=16, bias=True)
    (1): Dropout(p=0.2, inplace=False)
  )
  (layer3): Sequential(
    (0): Linear(in_features=16, out_features=1, bias=True)
  )
)

In [377]:
# 조기종료의 추가

class EarlyStopping:
  def __init__(self, patience=5):
    self.loss = np.inf
    self.patience = 0
    self.patience_limit = patience
        
  def step(self, loss):
    if self.loss > loss:
      self.loss = loss
      self.patience = 0
    else:
      self.patience += 1
  
  def is_stop(self):
    return self.patience >= self.patience_limit

In [378]:
early_stop = EarlyStopping(patience=15)

In [379]:
# 규제항 추가

epochs=1000
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-5)

In [380]:
for epoch in range(epochs):
  prediction = model(X_train)
  loss = F.mse_loss(prediction, y_train)
  early_stop.step(loss.item())
  if early_stop.is_stop():
    break
  optimizer.zero_grad()
  loss.backward()
  optimizer.step()

  print(f'epoch: {epoch:4d}/{epochs}  loss: {loss.item():.6f}')

epoch:    0/1000  loss: 47759.003906
epoch:    1/1000  loss: 16481.246094
epoch:    2/1000  loss: 15606.392578
epoch:    3/1000  loss: 22969.232422
epoch:    4/1000  loss: 17331.005859
epoch:    5/1000  loss: 11777.899414
epoch:    6/1000  loss: 8292.807617
epoch:    7/1000  loss: 6207.759766
epoch:    8/1000  loss: 6724.564453
epoch:    9/1000  loss: 8174.574707
epoch:   10/1000  loss: 8274.653320
epoch:   11/1000  loss: 6756.573730
epoch:   12/1000  loss: 5354.865723
epoch:   13/1000  loss: 3782.728271
epoch:   14/1000  loss: 2602.690918
epoch:   15/1000  loss: 2126.123779
epoch:   16/1000  loss: 1669.335205
epoch:   17/1000  loss: 1548.643188
epoch:   18/1000  loss: 1776.994873
epoch:   19/1000  loss: 1763.064697
epoch:   20/1000  loss: 1616.739868
epoch:   21/1000  loss: 1722.711914
epoch:   22/1000  loss: 1835.878906
epoch:   23/1000  loss: 1649.150391
epoch:   24/1000  loss: 1440.202637
epoch:   25/1000  loss: 1276.914185
epoch:   26/1000  loss: 1163.411255
epoch:   27/1000  loss

In [381]:
# 모델 테스트
from sklearn.metrics import mean_squared_error, r2_score

with torch.no_grad():
  prediction = model(X_valid)
  mse = mean_squared_error(y_valid, prediction)
  r2 = r2_score(y_valid, prediction)

mse, r2

(98.112915, -0.2637001309392699)

확실히 학습이 덜 되는 모습을 볼 수 있다... 너무 성능이 떨어지네. 적당한 지점을 찾는게 능력일 듯 하다. 