# Multivariate Linear Regression
- Matrix Data
- Use nn.Module
- Mini-Batch GD


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import handcalcs.render

## Matrix data
$$
\begin{pmatrix}
x_1  x_2  x_3
\end{pmatrix}
\cdot
\begin{pmatrix}
w_1 \\
w_2 \\
w_3 \\
\end{pmatrix}
=
\begin{pmatrix}
x_1w_1 + x_2w_2 + x_3w_3
\end{pmatrix}
$$
$$ H(X) = XW $$

In [None]:
# data set
x_train = torch.FloatTensor([[73, 80, 75],
                             [93, 88, 93],
                             [89, 91, 90],
                             [96, 98, 100],
                             [73, 66, 70]])
y_train = torch.FloatTensor([[152], [185], [180], [196], [142]])
print(x_train.shape, y_train.shape)

torch.Size([5, 3]) torch.Size([5, 1])


In [None]:
# 모델 초기화
W = torch.zeros((3,1), requires_grad=True)
b = torch.zeros(1,requires_grad=True)
# optimizer 설정
optimizer = optim.SGD([W,b], lr = 1e-5)

nb_epochs = 20
for epoch in range(nb_epochs + 1):
    
    # H(x) 계산
    hypo = x_train.matmul(W) + b
    # cost 계산
    cost = torch.mean((hypo - y_train)**2)

    # cost로 H(x) 개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    print("Epoch {:4d}/{} H: {} Cost: {:.6f}".format(epoch,nb_epochs,hypo.squeeze().detach(),cost.item()))


Epoch    0/20 H: tensor([0., 0., 0., 0., 0.]) Cost: 29661.800781
Epoch    1/20 H: tensor([67.2578, 80.8397, 79.6523, 86.7394, 61.6605]) Cost: 9298.520508
Epoch    2/20 H: tensor([104.9128, 126.0990, 124.2466, 135.3015,  96.1821]) Cost: 2915.712402
Epoch    3/20 H: tensor([125.9942, 151.4381, 149.2133, 162.4896, 115.5097]) Cost: 915.040527
Epoch    4/20 H: tensor([137.7967, 165.6247, 163.1911, 177.7112, 126.3307]) Cost: 287.936096
Epoch    5/20 H: tensor([144.4044, 173.5674, 171.0168, 186.2332, 132.3891]) Cost: 91.371071
Epoch    6/20 H: tensor([148.1035, 178.0143, 175.3980, 191.0042, 135.7812]) Cost: 29.758249
Epoch    7/20 H: tensor([150.1744, 180.5042, 177.8509, 193.6753, 137.6805]) Cost: 10.445267
Epoch    8/20 H: tensor([151.3336, 181.8983, 179.2240, 195.1707, 138.7440]) Cost: 4.391237
Epoch    9/20 H: tensor([151.9824, 182.6789, 179.9928, 196.0079, 139.3396]) Cost: 2.493121
Epoch   10/20 H: tensor([152.3454, 183.1161, 180.4231, 196.4765, 139.6732]) Cost: 1.897688
Epoch   11/20 H: 

## Use nn.Module

In [None]:
class MultivariateRegressionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(3,1)    # 입력 차원 3, 출력 차원 1

    def forward(self,x):
        return self.linear(x)

In [None]:
# 모델 초기화
model = MultivariateRegressionModel()
# optimizer 설정
optimizer = optim.SGD(model.parameters(), lr = 1e-5)

nb_epochs = 20
for epoch in range(nb_epochs + 1):
    # h(x) 계산
    hypo = model(x_train)
    # cost 계산
    cost = F.mse_loss(hypo, y_train)
    # h(x) 개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    print('Epoch {:4d}/{} Cost: {:.6f}'.format(
        epoch, nb_epochs, cost.item()
    ))


Epoch    0/20 Cost: 11805.096680
Epoch    1/20 Cost: 3701.325684
Epoch    2/20 Cost: 1161.223267
Epoch    3/20 Cost: 365.035400
Epoch    4/20 Cost: 115.472191
Epoch    5/20 Cost: 37.247196
Epoch    6/20 Cost: 12.727512
Epoch    7/20 Cost: 5.041341
Epoch    8/20 Cost: 2.631833
Epoch    9/20 Cost: 1.876171
Epoch   10/20 Cost: 1.638941
Epoch   11/20 Cost: 1.564190
Epoch   12/20 Cost: 1.540381
Epoch   13/20 Cost: 1.532529
Epoch   14/20 Cost: 1.529690
Epoch   15/20 Cost: 1.528409
Epoch   16/20 Cost: 1.527630
Epoch   17/20 Cost: 1.526990
Epoch   18/20 Cost: 1.526437
Epoch   19/20 Cost: 1.525856
Epoch   20/20 Cost: 1.525290


## Mini-Batch GD
 DataLoader(dataset,batch_size=2,shuffle=True,)  
    - batch_size : 각 mini batch의 크기, 통상적으로 2의 제곱수  
    - shuffle : Epoch 마다 데이터 셋을 섞어서 순서 바꿈

In [49]:
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

class CustomDataset(Dataset):
    def __init__(self):
        self.x_data = [[73, 80, 75],
                             [93, 88, 93],
                             [89, 91, 90],
                             [96, 98, 100],
                             [73, 66, 70]]
        self.y_data = [[152], [185], [180], [196], [142]]
    
    def __len__(self):  # dataset의 총 data 수
        return len(self.x_data)

    def __getitem__(self, idx):     # index 입력 받을 시, 상응하는 데이터 반환
        x = torch.FloatTensor(self.x_data[idx])
        y = torch.FloatTensor(self.y_data[idx])
        return x, y
    
dataset = CustomDataset()
dataloader = DataLoader(dataset,batch_size=2,shuffle=True)

for epoch in range(nb_epochs + 1):
    for batch_idx, samples in enumerate(dataloader):
        x_train, y_train = samples
        # h(x)
        hypo = model(x_train)
        # cost -> mse 방식
        cost = F.mse_loss(hypo,y_train)
        # cost 로 h(x) 개선
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()

        print('Epoch {:4d}/{} Batch {}/{} hypothesis: {} Cost: {:.6f}'.format(
            epoch, nb_epochs, batch_idx+1, len(dataloader), hypo.squeeze().detach(), cost.item()
        ))

Epoch    0/20 Batch 1/3 hypothesis: tensor([180.2531, 142.9367]) Cost: 0.470684
Epoch    0/20 Batch 2/3 hypothesis: tensor([196.8991, 185.9913]) Cost: 0.895543
Epoch    0/20 Batch 3/3 hypothesis: 149.28053283691406 Cost: 7.395502
Epoch    1/20 Batch 1/3 hypothesis: tensor([180.6496, 186.6332]) Cost: 1.544689
Epoch    1/20 Batch 2/3 hypothesis: tensor([142.8073, 149.7513]) Cost: 2.854131
Epoch    1/20 Batch 3/3 hypothesis: 197.31605529785156 Cost: 1.732002
Epoch    2/20 Batch 1/3 hypothesis: tensor([142.5072, 149.4253]) Cost: 3.443201
Epoch    2/20 Batch 2/3 hypothesis: tensor([186.1096, 197.0290]) Cost: 1.145004
Epoch    2/20 Batch 3/3 hypothesis: 179.5895538330078 Cost: 0.168466
Epoch    3/20 Batch 1/3 hypothesis: tensor([179.7891, 196.6517]) Cost: 0.234626
Epoch    3/20 Batch 2/3 hypothesis: tensor([142.4794, 185.6348]) Cost: 0.316393
Epoch    3/20 Batch 3/3 hypothesis: 149.1889190673828 Cost: 7.902176
Epoch    4/20 Batch 1/3 hypothesis: tensor([143.1799, 180.5753]) Cost: 0.861552
Ep