# Lab 7-1: Tips

## imports

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

#For reproducibility
torch.manual_seed(1)

<torch._C.Generator at 0x7fb44a014850>

## Training and Test Datasets

In [2]:
x_train = torch.FloatTensor([[1, 2, 1],
                             [1, 3, 2],
                             [1, 3, 4],
                             [1, 5, 5],
                             [1, 7, 5],
                             [1, 2, 5],
                             [1, 6, 6],
                             [1, 7, 7]
                            ])
y_train = torch.LongTensor([2, 2, 2, 1, 1, 1, 0, 0])

In [3]:
x_test = torch.FloatTensor([[2, 1, 1], [3, 1, 2], [3, 3, 4]])
y_test = torch.LongTensor([2, 2, 2])

## Model

In [4]:
class SoftmaxClassifierModel(nn.Module):
  def __init__(self):
    super().__init__()
    self.linear = nn.Linear(3, 3)

  def forward(self, x):
    return self.linear(x)

model = SoftmaxClassifierModel()

In [5]:
#optimizer 설정
optimizer = optim.SGD(model.parameters(), lr= 0.1)

In [6]:
#train
def train(model, optimizer, x_train, y_train):
  nb_epochs= 60
  for epoch in range(nb_epochs + 1):
    #H 계산
    prediction = model(x_train)
    #cost 계산
    cost = F.cross_entropy(prediction, y_train)

    #개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    #로그 출력
    if epoch % 10 == 0:
      print('Epoch {:4d}/{} Cost: {:.6f}'.format(
        epoch, nb_epochs, cost.item()
      ))

In [7]:
#test
def test(model, optimizer, x_test, y_test):
  prediction = model(x_test)
  prediction_classes = prediction.max(1)[1] #max(dim)[1]은 인덱스를 반환함
  correct_count = (prediction_classes == y_test).sum().item()
  cost = F.cross_entropy(prediction, y_test)

  print('Accuracy: {}% Cost: {:.6f}'.format(
      correct_count / len(y_test) * 100, cost.item()
  ))

In [8]:
train(model, optimizer, x_train, y_train)

Epoch    0/60 Cost: 2.203667
Epoch   10/60 Cost: 1.048378
Epoch   20/60 Cost: 0.983424
Epoch   30/60 Cost: 0.929585
Epoch   40/60 Cost: 0.884410
Epoch   50/60 Cost: 0.846174
Epoch   60/60 Cost: 0.813527


In [9]:
test(model, optimizer, x_test, y_test)

Accuracy: 100.0% Cost: 0.428444


## Learning Rate
Gradient Descent에서 $\alpha$값  
learning rate가 너무 크면 cost가 발산(diverge)하여, 점점 커진다.

In [10]:
model = SoftmaxClassifierModel()
optimizerLRe5 = optim.SGD(model.parameters(), lr=1e5) #lr=1e5 <- 이 부분에서 learning rate를 결정한다.

train(model, optimizerLRe5, x_train, y_train)

Epoch    0/60 Cost: 1.280268
Epoch   10/60 Cost: 1397263.250000
Epoch   20/60 Cost: 200090.921875
Epoch   30/60 Cost: 1676443.125000
Epoch   40/60 Cost: 400614.625000
Epoch   50/60 Cost: 1282693.125000
Epoch   60/60 Cost: 660331.375000


learning rate가 너무 작으면 cost가 거의 줄어들지 않는다.

In [11]:
model = SoftmaxClassifierModel()
optimizerLRe_10 = optim.SGD(model.parameters(), lr=1e-10) #lr=1e-10 <- 이 부분에서 learning rate를 결정한다.

train(model, optimizerLRe_10, x_train, y_train)

Epoch    0/60 Cost: 3.187324
Epoch   10/60 Cost: 3.187324
Epoch   20/60 Cost: 3.187324
Epoch   30/60 Cost: 3.187324
Epoch   40/60 Cost: 3.187324
Epoch   50/60 Cost: 3.187324
Epoch   60/60 Cost: 3.187324


따라서, 적절한 숫자로 시작하여 cost가 발산하면 작게. cost가 줄어들지 않으면 크게 조정해야한다.

## Data Preprocessing - 데이터 전처리

훈련 데이터를 전처리 하면 더 좋은 결과를 얻을 수도 있다.[\[#\]](https://youtu.be/1jPjVoDV_uo?t=260)  
이 예제에서는 데이터를 standardization(표준화)하는 과정을 다룬다(원본이 되는 문서에는 zero-center하고 normalize한다고 되어있는데, 잘못 작성된 듯 하다).  
standardization은 데이터의 중심이 0이 되고, 표준편차가 1이 되도록 만드는 것이다.normalize와는 다르다.

In [12]:
x_train = torch.FloatTensor([[73, 80, 75],
                             [93, 88, 93],
                             [89, 91, 90],
                             [96, 98, 100],
                             [73, 66, 70]])
y_train = torch.FloatTensor([[152], [185], [180], [196], [142]])

$$ x'_j=\cfrac{x_j-\mu_j}{\sigma_j} $$

여기서 $\sigma$는 standard deviation(표준 편차), $\mu$는 평균값이다.  


In [13]:
print(x_train.mean(dim=0))
print(x_train.std(dim=0))

tensor([84.8000, 84.6000, 85.6000])
tensor([11.0544, 12.2393, 12.6214])


In [14]:
norm_x_train = (x_train - x_train.mean(dim= 0)) / x_train.std(dim= 0)
print(norm_x_train)

tensor([[-1.0674, -0.3758, -0.8398],
        [ 0.7418,  0.2778,  0.5863],
        [ 0.3799,  0.5229,  0.3486],
        [ 1.0132,  1.0948,  1.1409],
        [-1.0674, -1.5197, -1.2360]])


standardization한 데이터로 학습하여 성능을 보자  


In [15]:
class MultivariateLinearRegressionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(3, 1)

    def forward(self, x):
        return self.linear(x)

model = MultivariateLinearRegressionModel()

In [16]:
#optimizer
optimizer = optim.SGD(model.parameters(), lr=1e-1)

In [17]:
def train(model, optimizer, x_train, y_train):
    nb_epochs = 20
    for epoch in range(nb_epochs +1):

        # H(x) 계산
        prediction = model(x_train)

        # cost 계산
        cost = F.mse_loss(prediction, y_train)

        # cost로 H(x) 개선
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()

        print('Epoch {:4d}/{} Cost: {:.6f}'.format(
            epoch, nb_epochs, cost.item()
        ))

In [18]:
train(model, optimizer, norm_x_train, y_train)

Epoch    0/20 Cost: 29729.949219
Epoch    1/20 Cost: 18889.082031
Epoch    2/20 Cost: 12048.976562
Epoch    3/20 Cost: 7699.844727
Epoch    4/20 Cost: 4924.701660
Epoch    5/20 Cost: 3151.021240
Epoch    6/20 Cost: 2016.563110
Epoch    7/20 Cost: 1290.709229
Epoch    8/20 Cost: 826.215942
Epoch    9/20 Cost: 528.952271
Epoch   10/20 Cost: 338.703308
Epoch   11/20 Cost: 216.940033
Epoch   12/20 Cost: 139.006989
Epoch   13/20 Cost: 89.125130
Epoch   14/20 Cost: 57.196075
Epoch   15/20 Cost: 36.757317
Epoch   16/20 Cost: 23.672049
Epoch   17/20 Cost: 15.293401
Epoch   18/20 Cost: 9.927165
Epoch   19/20 Cost: 6.488902
Epoch   20/20 Cost: 4.284752


## Overfitting
너무 학습 데이터에 맞게 학습할 경우, 테스트 데이터에 대해서는 좋은 성능을 내지 못할 수 있다.
이를 막기 위해서는 다음 세 가지 방법을 주로 사용한다:
1. 더 많은 학습데이터
2. 더 적은 양의 feature(input 차원 개수)
3.**Regularization**

Regularization: 가중치(weight)가 너무 크지 않도록 하는 것  
여기서는 Regularization을 적용해 본다.

In [19]:
def train_with_regularization(model, optimizer, x_train, y_train):
  nb_epochs = 20
  for epoch in range(nb_epochs + 1):
    prediction = model(x_train)
    cost = F.mse_loss(prediction, y_train)

    #l2 regularization
    l2_reg = 0
    regLambda = 0.01
    for param in model.parameters():
      l2_reg += param.sum()
    l2_reg *= regLambda

    cost += l2_reg

    #H 개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    print('Epoch {:4d}/{} Cost: {:.6f}'.format(
      epoch, nb_epochs, cost.item()
    ))

In [21]:
model = MultivariateLinearRegressionModel()
optimizer = optim.SGD(model.parameters(), lr= 1e-1)

train_with_regularization(model, optimizer, norm_x_train, y_train)

Epoch    1/20 Cost: 29615.738281
Epoch    2/20 Cost: 18804.660156
Epoch    3/20 Cost: 11992.252930
Epoch    4/20 Cost: 7663.357422
Epoch    5/20 Cost: 4901.892090
Epoch    6/20 Cost: 3137.173096
Epoch    7/20 Cost: 2008.507446
Epoch    8/20 Cost: 1286.373169
Epoch    9/20 Cost: 824.261475
Epoch   10/20 Cost: 528.518188
Epoch   11/20 Cost: 339.238495
Epoch   12/20 Cost: 218.091675
Epoch   13/20 Cost: 140.549042
Epoch   14/20 Cost: 90.913239
Epoch   15/20 Cost: 59.137978
Epoch   16/20 Cost: 38.793537
Epoch   17/20 Cost: 25.765297
Epoch   18/20 Cost: 17.419693
Epoch   19/20 Cost: 12.071230
Epoch   20/20 Cost: 8.641249
Epoch   21/20 Cost: 6.439342
