# Lab 7-1: Tips

Author: Seungjae Lee (이승재)

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
# For reproducibility
torch.manual_seed(1)

<torch._C.Generator at 0x1edd7269230>

## Training and Test Datasets

In [3]:
x_train = torch.FloatTensor([[1, 2, 1],
                             [1, 3, 2],
                             [1, 3, 4],
                             [1, 5, 5],
                             [1, 7, 5],
                             [1, 2, 5],
                             [1, 6, 6],
                             [1, 7, 7]
                            ])
y_train = torch.LongTensor([2, 2, 2, 1, 1, 1, 0, 0])

In [4]:
x_test = torch.FloatTensor([[2, 1, 1], [3, 1, 2], [3, 3, 4]])
y_test = torch.LongTensor([2, 2, 2])

## Model

In [13]:
class SoftmaxClassifierModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(3, 8) #x_train의 shape과 y_train의 class의 갯수를 고려해보세요.
    def forward(self, x):
        return self.linear(x)

In [14]:
model = SoftmaxClassifierModel()# #SoftmaxClassifierModel()로 모델을 초기화하세요.

In [15]:
# optimizer 설정
optimizer = optim.SGD(model.parameters(),lr=0.1)# #SGD optimizer를 사용하고 learning rate는 0.1로 적용하세요.

In [16]:
def train(model, optimizer, x_train, y_train):
    nb_epochs = 20
    for epoch in range(nb_epochs):

        # H(x) 계산
        prediction = model(x_train) #모델을 생성해서 예측값을 구해보세요.

        # cost 계산
        cost = F.cross_entropy(prediction,y_train) #PyTorch에서 제공하는 F.cross_entropy함수를 사용해 cost를 구현해보세요.

        # cost로 H(x) 개선
        optimizer.zero_grad()  # gradient를 0으로 초기화
        cost.backward()  # 비용 함수를 미분하여 gradient 계산
        optimizer.step()  # W와 b를 업데이트

        print('Epoch {:4d}/{} Cost: {:.6f}'.format(
            epoch, nb_epochs, cost.item()
        ))

In [17]:
def test(model, optimizer, x_test, y_test):
    prediction = model(x_test)
    predicted_classes = prediction.max(1)[1] # 각각의 데이터 마다 가장 큰 값의 인덱스를 취함 => class를 예측하는 것
    correct_count = (predicted_classes == y_test).sum().item()
    cost = F.cross_entropy(prediction, y_test)

    print('Accuracy: {}% Cost: {:.6f}'.format(
         correct_count / len(y_test) * 100, cost.item()
    ))

In [18]:
train(model, optimizer, x_train, y_train)

Epoch    0/20 Cost: 1.991479
Epoch    1/20 Cost: 1.663707
Epoch    2/20 Cost: 1.484954
Epoch    3/20 Cost: 1.387691
Epoch    4/20 Cost: 1.340652
Epoch    5/20 Cost: 1.308806
Epoch    6/20 Cost: 1.284747
Epoch    7/20 Cost: 1.264902
Epoch    8/20 Cost: 1.247684
Epoch    9/20 Cost: 1.232249
Epoch   10/20 Cost: 1.218112
Epoch   11/20 Cost: 1.204964
Epoch   12/20 Cost: 1.192604
Epoch   13/20 Cost: 1.180891
Epoch   14/20 Cost: 1.169724
Epoch   15/20 Cost: 1.159025
Epoch   16/20 Cost: 1.148738
Epoch   17/20 Cost: 1.138815
Epoch   18/20 Cost: 1.129221
Epoch   19/20 Cost: 1.119926


In [19]:
test(model, optimizer, x_test, y_test)

Accuracy: 0.0% Cost: 2.241292


## Learning Rate

Gradient Descent 에서의 $\alpha$ 값

`optimizer = optim.SGD(model.parameters(), lr=0.1)` 에서 `lr=0.1` 이다

Q1. Learning Rate가 다음과 같을 때 어떤 일이 발생할까요?

Learing rate가 클 경우 : 변화값이 커 가장 작은 gradient를 지나칠 수 있음

Learing rate가 작을 경우 : gradient값을 작게 하는데 시간이 너무 오래 걸림

Large learning rate

In [20]:
model = SoftmaxClassifierModel()

In [21]:
optimizer = optim.SGD(model.parameters(), lr=1e5)

In [22]:
train(model, optimizer, x_train, y_train)

Epoch    0/20 Cost: 2.610557
Epoch    1/20 Cost: 859732.187500
Epoch    2/20 Cost: 2240830.000000
Epoch    3/20 Cost: 103330.054688
Epoch    4/20 Cost: 1747232.250000
Epoch    5/20 Cost: 1649857.625000
Epoch    6/20 Cost: 792392.500000
Epoch    7/20 Cost: 1869107.250000
Epoch    8/20 Cost: 388137.812500
Epoch    9/20 Cost: 1000217.750000
Epoch   10/20 Cost: 982670.125000
Epoch   11/20 Cost: 1387705.000000
Epoch   12/20 Cost: 1519107.250000
Epoch   13/20 Cost: 951767.500000
Epoch   14/20 Cost: 786155.250000
Epoch   15/20 Cost: 887357.687500
Epoch   16/20 Cost: 1579892.500000
Epoch   17/20 Cost: 1253482.250000
Epoch   18/20 Cost: 1143955.000000
Epoch   19/20 Cost: 584492.812500


Small learning rate

In [23]:
model = SoftmaxClassifierModel()

In [24]:
optimizer = optim.SGD(model.parameters(), lr=1e-10)

In [25]:
train(model, optimizer, x_train, y_train)

Epoch    0/20 Cost: 3.777051
Epoch    1/20 Cost: 3.777051
Epoch    2/20 Cost: 3.777051
Epoch    3/20 Cost: 3.777051
Epoch    4/20 Cost: 3.777051
Epoch    5/20 Cost: 3.777051
Epoch    6/20 Cost: 3.777051
Epoch    7/20 Cost: 3.777051
Epoch    8/20 Cost: 3.777051
Epoch    9/20 Cost: 3.777051
Epoch   10/20 Cost: 3.777051
Epoch   11/20 Cost: 3.777051
Epoch   12/20 Cost: 3.777051
Epoch   13/20 Cost: 3.777051
Epoch   14/20 Cost: 3.777051
Epoch   15/20 Cost: 3.777051
Epoch   16/20 Cost: 3.777051
Epoch   17/20 Cost: 3.777051
Epoch   18/20 Cost: 3.777051
Epoch   19/20 Cost: 3.777051


적절한 숫자로 시작해 발산하면 작게, cost가 줄어들지 않으면 크게 조정하자.

In [26]:
model = SoftmaxClassifierModel()

In [27]:
optimizer = optim.SGD(model.parameters(), lr=1e-1)

In [28]:
train(model, optimizer, x_train, y_train)

Epoch    0/20 Cost: 4.354907
Epoch    1/20 Cost: 2.100595
Epoch    2/20 Cost: 1.474521
Epoch    3/20 Cost: 1.095100
Epoch    4/20 Cost: 1.039693
Epoch    5/20 Cost: 1.008386
Epoch    6/20 Cost: 0.985914
Epoch    7/20 Cost: 0.968357
Epoch    8/20 Cost: 0.953931
Epoch    9/20 Cost: 0.941626
Epoch   10/20 Cost: 0.930854
Epoch   11/20 Cost: 0.921229
Epoch   12/20 Cost: 0.912500
Epoch   13/20 Cost: 0.904484
Epoch   14/20 Cost: 0.897053
Epoch   15/20 Cost: 0.890110
Epoch   16/20 Cost: 0.883581
Epoch   17/20 Cost: 0.877408
Epoch   18/20 Cost: 0.871546
Epoch   19/20 Cost: 0.865958


## Data Preprocessing (데이터 전처리)

데이터를 zero-center하고 normalize하자.

In [29]:
x_train = torch.FloatTensor([[73, 80, 75],
                             [93, 88, 93],
                             [89, 91, 90],
                             [96, 98, 100],
                             [73, 66, 70]])
y_train = torch.FloatTensor([[152], [185], [180], [196], [142]])

$$ x'_j = \frac{x_j - \mu_j}{\sigma_j} $$

여기서 $\sigma$ 는 standard deviation, $\mu$ 는 평균값 이다.

In [30]:
mu = x_train.mean(dim=0)

In [31]:
sigma = x_train.std(dim=0)

In [32]:
norm_x_train = (x_train - mu) / sigma

In [33]:
print(norm_x_train)

tensor([[-1.0674, -0.3758, -0.8398],
        [ 0.7418,  0.2778,  0.5863],
        [ 0.3799,  0.5229,  0.3486],
        [ 1.0132,  1.0948,  1.1409],
        [-1.0674, -1.5197, -1.2360]])


Normalize와 zero center한 X로 학습해서 성능을 보자

In [34]:
class MultivariateLinearRegressionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(3, 1)

    def forward(self, x):
        return self.linear(x)

In [35]:
model = MultivariateLinearRegressionModel()

In [36]:
optimizer = optim.SGD(model.parameters(), lr=1e-1)

In [37]:
def train(model, optimizer, x_train, y_train):
    nb_epochs = 20
    for epoch in range(nb_epochs):

        # H(x) 계산
        prediction = model(x_train)

        # cost 계산
        cost = F.mse_loss(prediction, y_train)

        # cost로 H(x) 개선
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()

        print('Epoch {:4d}/{} Cost: {:.6f}'.format(
            epoch, nb_epochs, cost.item()
        ))

In [45]:
train(model, optimizer,norm_x_train, y_train)

Epoch    0/20 Cost: 6.655849
Epoch    1/20 Cost: 4.381054
Epoch    2/20 Cost: 2.947375
Epoch    3/20 Cost: 2.033244
Epoch    4/20 Cost: 1.446264
Epoch    5/20 Cost: 1.067212
Epoch    6/20 Cost: 0.820901
Epoch    7/20 Cost: 0.659572
Epoch    8/20 Cost: 0.552737
Epoch    9/20 Cost: 0.480899
Epoch   10/20 Cost: 0.431577
Epoch   11/20 Cost: 0.396801
Epoch   12/20 Cost: 0.371456
Epoch   13/20 Cost: 0.352255
Epoch   14/20 Cost: 0.337107
Epoch   15/20 Cost: 0.324669
Epoch   16/20 Cost: 0.314054
Epoch   17/20 Cost: 0.304717
Epoch   18/20 Cost: 0.296290
Epoch   19/20 Cost: 0.288540


Q2. 이렇게 Data Preprocessing을 하는 이유가 무엇일까요?

=> 데이터에서 누락된 부분, 오차, 가공할 부분있는지 확인하기 위해서

## Overfitting

Q3. Overfitting의 개념은?

=> 훈련 데이터에 지나치게 맞춰져서 다른 데이터로 테스트할 때 오류가 발생하는 것

Q4. 그렇다면 Overfitting을 방지하는 방법에는 무엇이 있을까요?

=> train data, test data, validation data를 구분하고 훈련시킨다.

Regularization: Let's not have too big numbers in the weights

In [41]:
def train_with_regularization(model, optimizer, x_train, y_train):
    nb_epochs = 20
    for epoch in range(nb_epochs):

        # H(x) 계산
        prediction = model(x_train)

        # cost 계산
        cost = F.mse_loss(prediction, y_train)
        
        # l2 norm 계산
        l2_reg = 0
        for param in model.parameters():
            l2_reg += torch.norm(param)
            
        cost += l2_reg

        # cost로 H(x) 개선
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()

        print('Epoch {:4d}/{} Cost: {:.6f}'.format(
            epoch+1, nb_epochs, cost.item()
        ))

In [42]:
model = MultivariateLinearRegressionModel()

In [43]:
optimizer = optim.SGD(model.parameters(), lr=1e-1)

In [44]:
train_with_regularization(model, optimizer, norm_x_train, y_train)

Epoch    1/20 Cost: 29657.029297
Epoch    2/20 Cost: 18852.949219
Epoch    3/20 Cost: 12092.840820
Epoch    4/20 Cost: 7794.446777
Epoch    5/20 Cost: 5051.640625
Epoch    6/20 Cost: 3298.616455
Epoch    7/20 Cost: 2177.366211
Epoch    8/20 Cost: 1459.961182
Epoch    9/20 Cost: 1000.875244
Epoch   10/20 Cost: 707.071472
Epoch   11/20 Cost: 519.036621
Epoch   12/20 Cost: 398.690338
Epoch   13/20 Cost: 321.664398
Epoch   14/20 Cost: 272.363312
Epoch   15/20 Cost: 240.806305
Epoch   16/20 Cost: 220.605591
Epoch   17/20 Cost: 207.673233
Epoch   18/20 Cost: 199.392700
Epoch   19/20 Cost: 194.089783
Epoch   20/20 Cost: 190.692474
