# lab04-1 multi-variable linear regression 의 Pytorch 구현

### 2018.09.25(화)

#### DATA 
--> "data-01-test-score.csv"

x_1|x_2|x_3|Y|
-----|-----|-----|-----|
73|80|75|152
93|88|93|185
89|91|90|180
96|98|100|196
73|66|70|142
53|46|55|101


In [84]:
import torch
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable
from torch import nn, optim
import numpy as np

In [91]:
class ScoreDataset(Dataset):
    # DataLoader를 쓰기 위해서는 Torch의 Dataset Instance가 필요한데, 이는 우리 데이터에 맞게 orverride해서 써야함
    # Initialize your data, download, etc.
    def __init__(self):
        xy = np.loadtxt('data-01-test-score.csv',
                        delimiter=',', dtype=np.float32)
        self.len = xy.shape[0]
        self.x_data = torch.from_numpy(xy[:, 0:-1])
        self.y_data = torch.from_numpy(xy[:, [-1]])

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.len

In [95]:
dataset = ScoreDataset()
train_loader = DataLoader(dataset=dataset,
                          batch_size=3,
                          shuffle=False,
                          num_workers=2)

In [96]:
iter_data = enumerate(train_loader,0)

In [97]:
model = nn.Linear(3,1,bias=True)
criterion = torch.nn.MSELoss(size_average = False)
optimizer = torch.optim.SGD(model.parameters(),lr = 1e-5)



In [102]:
# batch 에서, 각 데이터를 모두 학습시키지 않고,  cross로 진행한다면.
for t in range(2001):
    i, data = next(iter_data)
    x_batch, y_batch = data
    if i == (len(train_loader)-1): iter_data = enumerate(train_loader,0)
    y_pred = model(x_batch)
    cost = criterion(y_pred, y_batch)
    
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    if t % 20 == 0:
        print(t,'cost: ',cost.data.numpy(),"\nweight : ",model.weight.data.numpy(),model.bias.data.numpy())
        print("Prediction : \n",model.forward(Variable(dataset.x_data)).data.numpy())

0 cost:  2.2985172 
weight :  [[1.090768 0.693    0.225755]] [0.4155948]
Prediction : 
 [[152.41327]
 [183.83623]
 [180.8749 ]
 [195.61882]
 [141.5825 ]
 [102.52083]]
20 cost:  2.297848 
weight :  [[1.0911762  0.6930474  0.22530356]] [0.41546136]
Prediction : 
 [[152.41289]
 [183.83626]
 [180.87477]
 [195.61737]
 [141.58371]
 [102.51967]]
40 cost:  2.2971444 
weight :  [[1.0915834 0.6930947 0.2248533]] [0.41532794]
Prediction : 
 [[152.41249]
 [183.83627]
 [180.87466]
 [195.61595]
 [141.5849 ]
 [102.51853]]
60 cost:  2.2964892 
weight :  [[1.0919894  0.6931419  0.22440422]] [0.41519454]
Prediction : 
 [[152.4121 ]
 [183.83629]
 [180.87454]
 [195.61449]
 [141.58607]
 [102.51739]]
80 cost:  2.2957885 
weight :  [[1.0923945 0.6931891 0.2239562]] [0.41506118]
Prediction : 
 [[152.4117 ]
 [183.8363 ]
 [180.87442]
 [195.61307]
 [141.58727]
 [102.51626]]
100 cost:  2.2950876 
weight :  [[1.0927985  0.6932359  0.22350927]] [0.4149278]
Prediction : 
 [[152.41129]
 [183.8363 ]
 [180.87428]
 [195

In [82]:
# batch 로 처리하되, 모든 데이터를 사용한다면

for t in range(2001):
    for i,data in enumerate(train_loader,0):
        x_batch, y_batch = data
        y_pred = model(x_batch)
        cost = criterion(y_pred, y_batch) 
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
    if t % 20 == 0:
        print(t,'cost: ',cost.data.numpy(),"\nweight : ",model.weight.data.numpy(),model.bias.data.numpy())
        print("Prediction : \n",model.forward(Variable(dataset.x_data)).data.numpy())

0 cost:  8.648543 
weight :  [[0.87907785 0.5296523  0.5926111 ]] [-0.03012688]
Prediction : 
 [[150.96057 ]
 [183.44635 ]
 [179.74117 ]
 [195.52838 ]
 [140.5824  ]
 [103.518616]]
20 cost:  8.518045 
weight :  [[0.87977    0.53286976 0.5888615 ]] [-0.03039496]
Prediction : 
 [[150.987  ]
 [183.44487]
 [179.75783]
 [195.53491]
 [140.58252]
 [103.4968 ]]
40 cost:  8.392651 
weight :  [[0.88052046 0.5359688  0.5851669 ]] [-0.03065939]
Prediction : 
 [[151.01236]
 [183.44351]
 [179.77385]
 [195.54094]
 [140.58296]
 [103.47566]]
60 cost:  8.272191 
weight :  [[0.88132614 0.5389552  0.58152485]] [-0.03092034]
Prediction : 
 [[151.03667]
 [183.44229]
 [179.78928]
 [195.54648]
 [140.58368]
 [103.45517]]
80 cost:  8.156147 
weight :  [[0.8821842  0.54183346 0.57793397]] [-0.03117795]
Prediction : 
 [[151.06   ]
 [183.44116]
 [179.80412]
 [195.55159]
 [140.58466]
 [103.43529]]
100 cost:  8.044422 
weight :  [[0.88309175 0.54460853 0.57439244]] [-0.03143238]
Prediction : 
 [[151.08238]
 [183.4401