## **Imports**

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# For reproducibility
torch.manual_seed(1)

<torch._C.Generator at 0x7f35caed3850>

## **Traning and Test Datasets**

In [22]:
x_train = torch.FloatTensor([[1, 2, 1],
                             [1, 3, 2],
                             [1, 3, 4],
                             [1, 5, 5],
                             [1, 7, 5],
                             [1, 2, 5],
                             [1, 6, 6],
                             [1, 7, 7]
                            ])
y_train = torch.LongTensor([2, 2, 2, 1, 1, 1, 0, 0])

In [23]:
x_test = torch.FloatTensor([[2, 1, 1], [3, 1, 2], [3, 3, 4]])
y_test = torch.LongTensor([2, 2, 2])

## **Model**

In [24]:
class SoftmaxClassifierModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(3, 3)
    def forward(self, x):
        return self.linear(x)

In [25]:
model = SoftmaxClassifierModel()
optimizer = optim.SGD(model.parameters(), lr=0.1)

In [43]:
def train(model, optimizer, x_train, y_train, loss_func='CE', regularization=False):
    nb_epochs = 20
    for epoch in range(nb_epochs):

        # H(x) 계산
        prediction = model(x_train)

        # cost 계산
        if loss_func == 'CE':
            cost = F.cross_entropy(prediction, y_train)
        elif loss_func == 'MSE':
            cost = F.mse_loss(prediction, y_train)

        # l2 norm 계산
        if regularization:
            l2_reg = 0
            for param in model.parameters():
                l2_reg += torch.norm(param)
                
            cost += l2_reg

        # cost로 H(x) 개선
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()

        print('Epoch {:4d}/{} Cost: {:.6f}'.format(
            epoch, nb_epochs, cost.item()
        ))

In [27]:
def test(model, optimizer, x_test, y_test, loss_func='CE'):
    prediction = model(x_test)
    predicted_classes = prediction.max(1)[1]
    correct_count = (predicted_classes == y_test).sum().item()

    if loss_func == 'CE':
        cost = F.cross_entropy(prediction, y_test)
    elif loss_func == 'MSE':
        cost = F.mse_loss(prediction, y_test)

    print('Accuracy: {}% Cost: {:.6f}'.format(
         correct_count / len(y_test) * 100, cost.item()
    ))

## Train & Test

In [28]:
train(model, optimizer, x_train, y_train, loss_func='CE')

Epoch    0/20 Cost: 1.566329
Epoch    1/20 Cost: 1.273856
Epoch    2/20 Cost: 1.185181
Epoch    3/20 Cost: 1.164505
Epoch    4/20 Cost: 1.151169
Epoch    5/20 Cost: 1.139832
Epoch    6/20 Cost: 1.129630
Epoch    7/20 Cost: 1.120178
Epoch    8/20 Cost: 1.111199
Epoch    9/20 Cost: 1.102585
Epoch   10/20 Cost: 1.094248
Epoch   11/20 Cost: 1.086148
Epoch   12/20 Cost: 1.078252
Epoch   13/20 Cost: 1.070544
Epoch   14/20 Cost: 1.063008
Epoch   15/20 Cost: 1.055635
Epoch   16/20 Cost: 1.048418
Epoch   17/20 Cost: 1.041350
Epoch   18/20 Cost: 1.034427
Epoch   19/20 Cost: 1.027643


In [29]:
test(model, optimizer, x_test, y_test, loss_func='CE')

Accuracy: 0.0% Cost: 1.654612


## **Learning rate**

In [30]:
model = SoftmaxClassifierModel()
optimizer = optim.SGD(model.parameters(), lr=1e5)

In [31]:
train(model, optimizer, x_train, y_train, loss_func='CE')

Epoch    0/20 Cost: 2.221630
Epoch    1/20 Cost: 1538446.875000
Epoch    2/20 Cost: 1726642.000000
Epoch    3/20 Cost: 534731.812500
Epoch    4/20 Cost: 258759.343750
Epoch    5/20 Cost: 1871575.125000
Epoch    6/20 Cost: 1154767.125000
Epoch    7/20 Cost: 947821.812500
Epoch    8/20 Cost: 1993450.125000
Epoch    9/20 Cost: 511884.375000
Epoch   10/20 Cost: 1207824.250000
Epoch   11/20 Cost: 1396954.500000
Epoch   12/20 Cost: 1179072.000000
Epoch   13/20 Cost: 1468450.125000
Epoch   14/20 Cost: 758040.375000
Epoch   15/20 Cost: 769030.562500
Epoch   16/20 Cost: 630706.125000
Epoch   17/20 Cost: 1144696.875000
Epoch   18/20 Cost: 1602825.125000
Epoch   19/20 Cost: 723665.375000


learning rate이 너무 작으면 cost가 거의 줄어들지 않는다.

In [33]:
model = SoftmaxClassifierModel()
optimizer = optim.SGD(model.parameters(), lr=1e-10)

In [34]:
train(model, optimizer, x_train, y_train, loss_func='CE')

Epoch    0/20 Cost: 2.678931
Epoch    1/20 Cost: 2.678931
Epoch    2/20 Cost: 2.678931
Epoch    3/20 Cost: 2.678931
Epoch    4/20 Cost: 2.678931
Epoch    5/20 Cost: 2.678931
Epoch    6/20 Cost: 2.678931
Epoch    7/20 Cost: 2.678931
Epoch    8/20 Cost: 2.678931
Epoch    9/20 Cost: 2.678931
Epoch   10/20 Cost: 2.678931
Epoch   11/20 Cost: 2.678931
Epoch   12/20 Cost: 2.678931
Epoch   13/20 Cost: 2.678931
Epoch   14/20 Cost: 2.678931
Epoch   15/20 Cost: 2.678931
Epoch   16/20 Cost: 2.678931
Epoch   17/20 Cost: 2.678931
Epoch   18/20 Cost: 2.678931
Epoch   19/20 Cost: 2.678931


적절한 숫자로 시작해 발산하면 작게, cost가 줄어들지 않으면 크게 조정하자.

## **Data Preprocessing**

데이터를 zero-center하고 normalize하자.

In [35]:
x_train = torch.FloatTensor([[73, 80, 75],
                             [93, 88, 93],
                             [89, 91, 90],
                             [96, 98, 100],
                             [73, 66, 70]])
y_train = torch.FloatTensor([[152], [185], [180], [196], [142]])

In [36]:
mu = x_train.mean(dim=0)
sigma = x_train.std(dim=0)
norm_x_train = (x_train - mu) / sigma
print(norm_x_train)

tensor([[-1.0674, -0.3758, -0.8398],
        [ 0.7418,  0.2778,  0.5863],
        [ 0.3799,  0.5229,  0.3486],
        [ 1.0132,  1.0948,  1.1409],
        [-1.0674, -1.5197, -1.2360]])


In [54]:
class MultivariateLinearRegressionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(3, 1)

    def forward(self, x):
        return self.linear(x)  

In [55]:
model = MultivariateLinearRegressionModel()
optimizer = optim.SGD(model.parameters(), lr=1e-1)

In [56]:
train(model, optimizer, norm_x_train, y_train, loss_func="MSE")

Epoch    0/20 Cost: 29549.496094
Epoch    1/20 Cost: 18774.693359
Epoch    2/20 Cost: 11976.083984
Epoch    3/20 Cost: 7653.282227
Epoch    4/20 Cost: 4894.922852
Epoch    5/20 Cost: 3131.966309
Epoch    6/20 Cost: 2004.366455
Epoch    7/20 Cost: 1282.899292
Epoch    8/20 Cost: 821.213989
Epoch    9/20 Cost: 525.747437
Epoch   10/20 Cost: 336.648041
Epoch   11/20 Cost: 215.621246
Epoch   12/20 Cost: 138.159622
Epoch   13/20 Cost: 88.579605
Epoch   14/20 Cost: 56.843933
Epoch   15/20 Cost: 36.528706
Epoch   16/20 Cost: 23.522770
Epoch   17/20 Cost: 15.195030
Epoch   18/20 Cost: 9.861450
Epoch   19/20 Cost: 6.444228


## **Prevent Overfitting**

In [60]:
model = MultivariateLinearRegressionModel()
optimizer = optim.SGD(model.parameters(), lr=1e-1)

In [61]:
train(model, optimizer, norm_x_train, y_train, loss_func='MSE', regularization=True)

Epoch    0/20 Cost: 29490.474609
Epoch    1/20 Cost: 18806.806641
Epoch    2/20 Cost: 12064.233398
Epoch    3/20 Cost: 7776.406738
Epoch    4/20 Cost: 5040.172363
Epoch    5/20 Cost: 3291.299316
Epoch    6/20 Cost: 2172.690186
Epoch    7/20 Cost: 1456.970093
Epoch    8/20 Cost: 998.961365
Epoch    9/20 Cost: 705.846252
Epoch   10/20 Cost: 518.252075
Epoch   11/20 Cost: 398.187592
Epoch   12/20 Cost: 321.342346
Epoch   13/20 Cost: 272.156982
Epoch   14/20 Cost: 240.673645
Epoch   15/20 Cost: 220.520340
Epoch   16/20 Cost: 207.618347
Epoch   17/20 Cost: 199.357346
Epoch   18/20 Cost: 194.066711
Epoch   19/20 Cost: 190.677444
