# Lab 4-2: Loading Data - 데이터 불러오기


## Review

### Slicing 2D Array - 2차원 배열 슬라이싱
1차원 배열에 대한 내용은 생략

In [1]:
import numpy as np

In [2]:
b = np.array([[1, 2, 3, 4],
              [5, 6, 7, 8],
              [9, 10, 11, 12]])
print(b)

[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]


In [3]:
b[:, 1]

array([ 2,  6, 10])

In [4]:
b[-1:]

array([[ 9, 10, 11, 12]])

In [5]:
b[-1, ...]

array([ 9, 10, 11, 12])

In [6]:
b[0:2, :]

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

### Loading Data from `.csv` file

In [7]:
import numpy as np

In [8]:
xy = np.loadtxt('data-01-test-score.csv', delimiter=',', dtype=np.float32)
x_data = xy[:,0:-1]
y_data = xy[:, [-1]]

In [9]:
print(x_data.shape) # x_data shape
print(len(x_data))  # x_data 길이
print(x_data[:5])   # 첫 다섯 개

(25, 3)
25
[[ 73.  80.  75.]
 [ 93.  88.  93.]
 [ 89.  91.  90.]
 [ 96.  98. 100.]
 [ 73.  66.  70.]]


In [10]:
print(y_data.shape) # y_data shape
print(len(y_data))  # y_data 길이
print(y_data[:5])   # 첫 다섯 개

(25, 1)
25
[[152.]
 [185.]
 [180.]
 [196.]
 [142.]]


## Imports

In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [12]:
# For reproducibility
torch.manual_seed(1)

<torch._C.Generator at 0x7fe611858f90>

## Low-level Implementation

In [13]:
# 데이터
x_train = torch.FloatTensor(x_data)
y_train = torch.FloatTensor(y_data)
# 모델 초기화
W = torch.zeros((3, 1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)
# optimizer 설정
optimizer = optim.SGD([W, b], lr=1e-5)

nb_epochs = 20
for epoch in range(nb_epochs + 1):
    
    # H(x) 계산
    hypothesis = x_train.matmul(W) + b # or .mm or @

    # cost 계산
    cost = torch.mean((hypothesis - y_train) ** 2)

    # cost로 H(x) 개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    #로그 출력
    print('Epoch {:4d}/{} Cost: {:.6f}'.format(
        epoch, nb_epochs, cost.item()
    ))

Epoch    0/20 Cost: 26811.960938
Epoch    1/20 Cost: 9920.530273
Epoch    2/20 Cost: 3675.298340
Epoch    3/20 Cost: 1366.260498
Epoch    4/20 Cost: 512.542480
Epoch    5/20 Cost: 196.896637
Epoch    6/20 Cost: 80.190987
Epoch    7/20 Cost: 37.038696
Epoch    8/20 Cost: 21.081343
Epoch    9/20 Cost: 15.178760
Epoch   10/20 Cost: 12.993679
Epoch   11/20 Cost: 12.183023
Epoch   12/20 Cost: 11.880535
Epoch   13/20 Cost: 11.765958
Epoch   14/20 Cost: 11.720851
Epoch   15/20 Cost: 11.701438
Epoch   16/20 Cost: 11.691514
Epoch   17/20 Cost: 11.685116
Epoch   18/20 Cost: 11.680005
Epoch   19/20 Cost: 11.675380
Epoch   20/20 Cost: 11.670952


## High-level Implementation with `nn.Module`

In [14]:
class MultivariateLinearRegressionModel(nn.Module):
  def __init__(self):
    super().__init__()
    self.linear = nn.Linear(3,1)
  
  def forward(self, x):
    return self.linear(x)

In [15]:
#데이터
x_train = torch.FloatTensor(x_data)
y_train = torch.FloatTensor(y_data)
#모델 초기화
model = MultivariateLinearRegressionModel()
#optimizer 설정
optimizer = optim.SGD(model.parameters(), lr = 1e-5)

nb_epochs = 20
for epoch in range(nb_epochs + 1):
  #H(x) 계산
  prediction = model(x_train)
  #cost 계산
  cost = F.mse_loss(prediction, y_train)

  #H(x) 개선
  optimizer.zero_grad()
  cost.backward()
  optimizer.step()

  #로그 출력
  print('Epoch {:4d}/{} Cost: {:.6f}'.format(
    epoch, nb_epochs, cost.item()
  ))

Epoch    0/20 Cost: 28693.490234
Epoch    1/20 Cost: 10618.750000
Epoch    2/20 Cost: 3936.015381
Epoch    3/20 Cost: 1465.219727
Epoch    4/20 Cost: 551.693726
Epoch    5/20 Cost: 213.934616
Epoch    6/20 Cost: 89.052269
Epoch    7/20 Cost: 42.876007
Epoch    8/20 Cost: 25.799639
Epoch    9/20 Cost: 19.482420
Epoch   10/20 Cost: 17.143093
Epoch   11/20 Cost: 16.274508
Epoch   12/20 Cost: 15.949721
Epoch   13/20 Cost: 15.825986
Epoch   14/20 Cost: 15.776565
Epoch   15/20 Cost: 15.754660
Epoch   16/20 Cost: 15.742919
Epoch   17/20 Cost: 15.734917
Epoch   18/20 Cost: 15.728307
Epoch   19/20 Cost: 15.722219
Epoch   20/20 Cost: 15.716357


## PyTorch Dataset and DataLoader
너무 데이터가 크면 미니배치를 이용한다.

In [16]:
from torch.utils.data import Dataset

class CustomDataset(Dataset):
  def __init__(self):
    self.x_data = x_data
    self.y_data = y_data
  
  def __len__(self):
    return len(self.x_data)
  
  def __getitem__(self, index):
    x = torch.FloatTensor(self.x_data[index])
    y = torch.FloatTensor(self.y_data[index])

    return x, y 

dataset = CustomDataset()

In [17]:
from torch.utils.data import DataLoader

dataloader = DataLoader(
    dataset,
    batch_size= 5,
    shuffle= True
)

In [18]:
class MultivariateLinearRegressionModel(nn.Module):
  def __init__(self):
    super().__init__()
    self.linear = nn.Linear(3,1)
  
  def forward(self, x):
    return self.linear(x)

#모델 초기화
model = MultivariateLinearRegressionModel()
#optimizer 설정
optimizer = optim.SGD(model.parameters(), lr = 1e-5)

nb_epochs = 20
for epoch in range(nb_epochs + 1):
  for batch_index, sample in enumerate(dataloader):
    x_train, y_train = sample
    
    #H(x) 계산
    prediction = model(x_train)
    #cost 계산
    cost = F.mse_loss(prediction, y_train)

    #H(x) 개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    #로그 출력
    print('Epoch {:4d}/{} Batch{}/{} Cost: {:.6f}'.format(
      epoch, nb_epochs, batch_index + 1, len(dataloader), cost.item()
    ))

Epoch    0/20 Batch1/5 Cost: 32866.136719
Epoch    0/20 Batch2/5 Cost: 16468.919922
Epoch    0/20 Batch3/5 Cost: 4846.291992
Epoch    0/20 Batch4/5 Cost: 1476.777344
Epoch    0/20 Batch5/5 Cost: 501.053619
Epoch    1/20 Batch1/5 Cost: 230.399734
Epoch    1/20 Batch2/5 Cost: 131.033630
Epoch    1/20 Batch3/5 Cost: 44.817348
Epoch    1/20 Batch4/5 Cost: 13.150156
Epoch    1/20 Batch5/5 Cost: 38.699871
Epoch    2/20 Batch1/5 Cost: 15.076666
Epoch    2/20 Batch2/5 Cost: 38.589161
Epoch    2/20 Batch3/5 Cost: 11.174902
Epoch    2/20 Batch4/5 Cost: 6.427827
Epoch    2/20 Batch5/5 Cost: 30.469776
Epoch    3/20 Batch1/5 Cost: 10.188444
Epoch    3/20 Batch2/5 Cost: 18.896860
Epoch    3/20 Batch3/5 Cost: 10.976049
Epoch    3/20 Batch4/5 Cost: 13.436320
Epoch    3/20 Batch5/5 Cost: 40.988941
Epoch    4/20 Batch1/5 Cost: 17.055258
Epoch    4/20 Batch2/5 Cost: 11.556127
Epoch    4/20 Batch3/5 Cost: 21.871704
Epoch    4/20 Batch4/5 Cost: 14.151070
Epoch    4/20 Batch5/5 Cost: 34.843025
Epoch    5/20