## **Imports**

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [30]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# For reproducibility
torch.manual_seed(1)

<torch._C.Generator at 0x7fd7d7950630>

In [7]:
cd /content/drive/Shareddrives/Data/Kaggle/D.COM_AI_COMPETITION_1th/pre_study

/content/drive/Shareddrives/Data/Kaggle/ D.COM_AI_COMPETITION_1th/pre_study


## **Slicing Array**

In [4]:
nums = [0, 1, 2, 3, 4]

nums[2:4] = [8, 9]  # assign
print(nums)

[0, 1, 8, 9, 4]


## **Load data from ```.csv``` file**

In [8]:
xy = np.loadtxt('data/data-01-test-score.csv', delimiter=',', dtype=np.float32)

In [16]:
print(xy[:3])

[[ 73.  80.  75. 152.]
 [ 93.  88.  93. 185.]
 [ 89.  91.  90. 180.]]


In [13]:
x_data = xy[:, 0:-1]
y_data = xy[:, [-1]]

In [17]:
print(xy[:, -1].shape, xy[:, [-1]].shape)

(25,) (25, 1)


In [18]:
print(x_data.shape) # x_data shape
print(len(x_data))  # x_data 길이
print(x_data[:5])   # 첫 다섯 개

(25, 3)
25
[[ 73.  80.  75.]
 [ 93.  88.  93.]
 [ 89.  91.  90.]
 [ 96.  98. 100.]
 [ 73.  66.  70.]]


In [19]:
print(y_data.shape) # y_data shape
print(len(y_data))  # y_data 길이
print(y_data[:5])   # 첫 다섯 개

(25, 1)
25
[[152.]
 [185.]
 [180.]
 [196.]
 [142.]]


## **Low-level implementation**

In [25]:
# 데이터
x_train = torch.FloatTensor(x_data)
y_train = torch.FloatTensor(y_data)
# 모델 초기화
W = torch.zeros((3, 1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)
# optimizer 설정
optimizer = optim.SGD([W, b], lr=1e-5)

nb_epochs = 1000
for epoch in range(nb_epochs + 1):
    
    # H(x) 계산
    hypothesis = x_train.matmul(W) + b # or .mm or @

    # cost 계산
    cost = torch.mean((hypothesis - y_train) ** 2)

    # cost로 H(x) 개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    # 100번마다 로그 출력
    if epoch % 100 == 0:
        print('Epoch {:4d}/{} Cost: {:.6f}'.format(
            epoch, nb_epochs, cost.item()
        ))

Epoch    0/1000 Cost: 26811.960938
Epoch  100/1000 Cost: 11.335222
Epoch  200/1000 Cost: 10.945315
Epoch  300/1000 Cost: 10.585911
Epoch  400/1000 Cost: 10.254474
Epoch  500/1000 Cost: 9.948803
Epoch  600/1000 Cost: 9.666830
Epoch  700/1000 Cost: 9.406623
Epoch  800/1000 Cost: 9.166456
Epoch  900/1000 Cost: 8.944720
Epoch 1000/1000 Cost: 8.739944


## **High-level implementation with ```nn.Module```**

In [22]:
class MultivariateLinearRegressionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(3, 1)

    def forward(self, x):
        return self.linear(x)

In [26]:
# 데이터
x_train = torch.FloatTensor(x_data)
y_train = torch.FloatTensor(y_data)
# 모델 초기화
model = MultivariateLinearRegressionModel()
# optimizer 설정
optimizer = optim.SGD(model.parameters(), lr=1e-5)

nb_epochs = 1000
for epoch in range(nb_epochs+1):
    
    # H(x) 계산
    prediction = model(x_train)
    
    # cost 계산
    cost = F.mse_loss(prediction, y_train)
    
    # cost로 H(x) 개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    
    # 100번마다 로그 출력
    if epoch % 100 == 0:
        print('Epoch {:4d}/{} Cost: {:.6f}'.format(
            epoch, nb_epochs, cost.item()
        ))

Epoch    0/1000 Cost: 35661.042969
Epoch  100/1000 Cost: 17.405342
Epoch  200/1000 Cost: 16.464396
Epoch  300/1000 Cost: 15.602725
Epoch  400/1000 Cost: 14.813580
Epoch  500/1000 Cost: 14.090858
Epoch  600/1000 Cost: 13.428946
Epoch  700/1000 Cost: 12.822679
Epoch  800/1000 Cost: 12.267376
Epoch  900/1000 Cost: 11.758728
Epoch 1000/1000 Cost: 11.292800


## **Dataset & DataLoader**

In [28]:
class CustomDataset(Dataset):
    def __init__(self):
        self.x_data =  [[73, 80, 75],
                        [93, 88, 93],
                        [89, 91, 90],
                        [96, 98, 100],
                        [73, 66, 70]]
        self.y_data = [[152], [185], [180], [196], [142]]

    def __len__(self):
        return len(self.x_data)

    def __getitem__(self, indx):
        x = torch.FloatTensor(self.x_data[indx])
        y = torch.FloatTensor(self.y_data[indx])

        return x, y

In [29]:
dataset = CustomDataset()

In [32]:
dataloader = DataLoader(
    dataset,
    batch_size = 2,
    shuffle = True
)

In [52]:
model = MultivariateLinearRegressionModel()
optimizer = optim.SGD(model.parameters(), lr=1e-5)

epochs = 600
for epoch in range(epochs+1):
    for batch_indx, samples in enumerate(dataloader):
        x_train, y_train = samples
        predict = model(x_train)
        cost = F.mse_loss(predict, y_train)

        optimizer.zero_grad()
        cost.backward()
        optimizer.step()

        if epoch % 200 == 0:
            print('Epoch {:4d}/{} Batch {}/{} Cost: {:.6f}'.format(
                epoch, epochs, batch_indx+1, len(dataloader), cost.item()
            ))

Epoch    0/600 Batch 1/3 Cost: 9102.730469
Epoch    0/600 Batch 2/3 Cost: 2070.630127
Epoch    0/600 Batch 3/3 Cost: 497.563080
Epoch  200/600 Batch 1/3 Cost: 1.395558
Epoch  200/600 Batch 2/3 Cost: 1.910182
Epoch  200/600 Batch 3/3 Cost: 3.710279
Epoch  400/600 Batch 1/3 Cost: 0.185924
Epoch  400/600 Batch 2/3 Cost: 3.731120
Epoch  400/600 Batch 3/3 Cost: 0.785459
Epoch  600/600 Batch 1/3 Cost: 1.188651
Epoch  600/600 Batch 2/3 Cost: 0.201798
Epoch  600/600 Batch 3/3 Cost: 2.351507
