# Lab04-1 Multivariate Linear Regression

In [12]:
import torch

## Multivariate Linear Regression
입력이 여러개일 때 쓴다.

## Data

In [13]:
x_train = torch.FloatTensor([[73, 80, 75],
                             [93, 88, 93],
                             [89, 91, 90],
                             [96, 98, 100],
                             [73, 66, 70]])
y_train = torch.FloatTensor([[152], [185], [180], [196], [142]])

## Hypothesis Function
$$
H(x) = \mathbf{W} x + b
$$
$x$는 벡터 형태의 입력이고, $\mathbf{W}$는 matrix 형태이다.

$$
H(x) = w_1x_1 + w_2x_2 + w_3x_3 + b
$$
입력 변수가 3개라면 weight도 3개이다.

```py
# 단순 H(x)계산
hypothesis = x1_train * w1 + x2_train * w2 + x3_train * w3 + b

# matmul()함수 사용
hypothesis = x_train.matmul(W) + b
```

## Cost function: MSE

```py
cost = torch.mean((hypothesis - y_train) ** 2)
```

## Gradient Descent with torch.optim
```py
# optimizer 설정
optimizer = torch.optim.SGD([W, b], lr=1e-5)

optimizer.zero_grad()
cost.backward()
optimizer.step()
```

## Full Code with torch.optim

In [14]:
# 데이터
x_train = torch.FloatTensor([[73, 80, 75],
                             [93, 88, 93],
                             [89, 91, 90],
                             [96, 98, 100],
                             [73, 66, 70]])
y_train = torch.FloatTensor([[152], [185], [180], [196], [142]])

# 모델 초기화
W = torch.zeros((3, 1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)

# optimizer 설정
optimizer = torch.optim.SGD([W, b], lr=1e-5)

nb_epochs = 20
for epoch in range(nb_epochs+1):
    # H(x) 계산
    hypothesis = x_train.matmul(W) + b

    # cost 계산
    cost = torch.mean((hypothesis - y_train) ** 2)
    
    # cost 로 H(x) 개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    print(f'Epoch {epoch:4d}/{nb_epochs} hypothesis: {hypothesis.squeeze().detach()}, Cost: {cost.item():.6f}')

Epoch    0/20 hypothesis: tensor([0., 0., 0., 0., 0.]), Cost: 29661.800781
Epoch    1/20 hypothesis: tensor([67.2578, 80.8397, 79.6523, 86.7394, 61.6605]), Cost: 9298.520508
Epoch    2/20 hypothesis: tensor([104.9128, 126.0990, 124.2466, 135.3015,  96.1821]), Cost: 2915.712402
Epoch    3/20 hypothesis: tensor([125.9942, 151.4381, 149.2133, 162.4896, 115.5097]), Cost: 915.040527
Epoch    4/20 hypothesis: tensor([137.7968, 165.6247, 163.1911, 177.7112, 126.3307]), Cost: 287.936005
Epoch    5/20 hypothesis: tensor([144.4044, 173.5674, 171.0168, 186.2332, 132.3891]), Cost: 91.371010
Epoch    6/20 hypothesis: tensor([148.1035, 178.0144, 175.3980, 191.0042, 135.7812]), Cost: 29.758139
Epoch    7/20 hypothesis: tensor([150.1744, 180.5042, 177.8508, 193.6753, 137.6805]), Cost: 10.445305
Epoch    8/20 hypothesis: tensor([151.3336, 181.8983, 179.2240, 195.1707, 138.7440]), Cost: 4.391228
Epoch    9/20 hypothesis: tensor([151.9824, 182.6789, 179.9928, 196.0079, 139.3396]), Cost: 2.493135
Epoch   

## nn.Module
```py
# 모델 초기화
W = torch.zeros((3, 1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)

# H(x) 계산
hypothesis = x_train.matmul(W) + b

# 대신

import torch.nn as nn

class MultivariateLinearRegressionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(3, 1)

    def forward(self, x):
        return self.linear(x)

hypothesis = model(x_train)
```

## F.mse_loss
```py
# cost 계산
cost = torch.mean((hypothesis - y_train) ** 2)

# 대신 

import torch.nn.functional as F

# cost 계산
cost = F.mse_loss(prediction, y_train)
```

## Full Code 

In [15]:
import torch.nn as nn
import torch.nn.functional as F

class MultivariateLinearRegressionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(3, 1, bias=True)

    def forward(self, x):
        return self.linear.forward(x)

In [16]:
# 데이터
x_train = torch.FloatTensor([[73, 80, 75],
                             [93, 88, 93],
                             [89, 91, 90],
                             [96, 98, 100],
                             [73, 66, 70]])
y_train = torch.FloatTensor([[152], [185], [180], [196], [142]])

# 모델 초기화
model = MultivariateLinearRegressionModel()
W = model.linear.weight
b = model.linear.bias

# optimizer 설정
optimizer = torch.optim.SGD([W, b], lr=1e-5)

nb_epochs = 20
for epoch in range(nb_epochs+1):
    # H(x) 계산
    hypothesis = model(x_train)
    # cost 계산
    cost = F.mse_loss(hypothesis, y_train)
    
    # cost 로 H(x) 개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    print(f'Epoch {epoch:4d}/{nb_epochs} hypothesis: {hypothesis.squeeze().detach()}, Cost: {cost.item():.6f}')

Epoch    0/20 hypothesis: tensor([-32.4065, -37.8621, -37.7747, -42.1342, -27.8076]), Cost: 43328.320312
Epoch    1/20 hypothesis: tensor([48.8843, 59.8438, 58.4964, 62.7026, 46.7175]), Cost: 13581.401367
Epoch    2/20 hypothesis: tensor([ 94.3960, 114.5457, 112.3950, 121.3969,  88.4414]), Cost: 4257.321289
Epoch    3/20 hypothesis: tensor([119.8762, 145.1714, 142.5708, 154.2576, 111.8011]), Cost: 1334.718628
Epoch    4/20 hypothesis: tensor([134.1416, 162.3177, 159.4652, 172.6551, 124.8795]), Cost: 418.636963
Epoch    5/20 hypothesis: tensor([142.1282, 171.9173, 168.9236, 182.9552, 132.2016]), Cost: 131.494110
Epoch    6/20 hypothesis: tensor([146.5995, 177.2918, 174.2191, 188.7218, 136.3011]), Cost: 41.490135
Epoch    7/20 hypothesis: tensor([149.1027, 180.3009, 177.1838, 191.9503, 138.5963]), Cost: 13.278364
Epoch    8/20 hypothesis: tensor([150.5041, 181.9856, 178.8436, 193.7578, 139.8814]), Cost: 4.435555
Epoch    9/20 hypothesis: tensor([151.2886, 182.9288, 179.7728, 194.7698, 14

# Lab04-2 Loading Data


## "Minibatch" Gradient Descent
데이터 일부로 학습한다.

## PyTorch Dataset and DataLoader

In [22]:
from torch.utils.data import Dataset

class CustomDataset(Dataset):
    def __init__(self) -> None:
        super().__init__()
        self.x_data = [[73, 80, 75],
                       [93, 88, 93],
                       [89, 91, 90],
                       [96, 98, 100],
                       [73, 66, 70]]
        self.y_data = [[152], [185], [180], [196], [142]]

    def __len__(self):
        return len(self.x_data)
    
    def __getitem__(self, idx):
        x = torch.FloatTensor(self.x_data[idx])
        y = torch.FloatTensor(self.y_data[idx])
        return x, y

dataset = CustomDataset()

In [23]:
from torch.utils.data import DataLoader

dataLoader = DataLoader(
    dataset,
    batch_size=2, # 미니배치 크기, 일반적으로 2의 제곱수로 설정한다.(2, 4, 8, ...)
    shuffle=True,
)

## Full code with Dataset and DataLoader

In [38]:
nb_epochs = 20
for epoch in range(nb_epochs+1):
    for batch_idx, samples in enumerate(dataLoader):
        x_train, y_train = samples
        # H(x) 계산
        prediction = model(x_train)

        # cost 계산
        cost = F.mse_loss(prediction, y_train)
        
        # cost 로 H(x) 개선
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()

        print(f'Epoch {epoch:4d}/{nb_epochs} Batch {batch_idx+1}/{len(dataLoader)} Cost: {cost.item():.6f}')

Epoch    0/20 Batch 1/3 Cost: 0.155487
Epoch    0/20 Batch 2/3 Cost: 1.003635
Epoch    0/20 Batch 3/3 Cost: 0.000126
Epoch    1/20 Batch 1/3 Cost: 0.306942
Epoch    1/20 Batch 2/3 Cost: 0.606459
Epoch    1/20 Batch 3/3 Cost: 0.202264
Epoch    2/20 Batch 1/3 Cost: 0.438182
Epoch    2/20 Batch 2/3 Cost: 0.157852
Epoch    2/20 Batch 3/3 Cost: 0.688954
Epoch    3/20 Batch 1/3 Cost: 0.166257
Epoch    3/20 Batch 2/3 Cost: 0.793525
Epoch    3/20 Batch 3/3 Cost: 0.008346
Epoch    4/20 Batch 1/3 Cost: 0.275853
Epoch    4/20 Batch 2/3 Cost: 0.629544
Epoch    4/20 Batch 3/3 Cost: 0.193495
Epoch    5/20 Batch 1/3 Cost: 0.050203
Epoch    5/20 Batch 2/3 Cost: 0.246860
Epoch    5/20 Batch 3/3 Cost: 1.225606
Epoch    6/20 Batch 1/3 Cost: 0.654072
Epoch    6/20 Batch 2/3 Cost: 0.059007
Epoch    6/20 Batch 3/3 Cost: 0.919358
Epoch    7/20 Batch 1/3 Cost: 0.895725
Epoch    7/20 Batch 2/3 Cost: 0.414997
Epoch    7/20 Batch 3/3 Cost: 0.057230
Epoch    8/20 Batch 1/3 Cost: 0.094718
Epoch    8/20 Batch 2/3 C