# 파트 소개 및 실습 환경 설정

## 도커 환경설정

* 도커 : 컨테이너 기반의 가상 시스템
    * LINUX 기반

# Machine Learning & PyTorch Basic

## Tensor Manipulation 1

In [None]:
import numpy as np
import torch

1D Array with NumPy

In [None]:
t = np.array([0., 1., 2., 3., 4., 5., 6.])
print(t)

[0. 1. 2. 3. 4. 5. 6.]


In [None]:
print('Rank of t : ', t.ndim)
print('Shape of t: ', t.shape)

Rank of t :  1
Shape of t:  (7,)


In [None]:
print('t[0] t[1] t[-1] = ', t[0], t[1], t[-1]) # Element
print('t[2:5] t[4:-1]  = ', t[2:5], t[4:-1])   # Slicing
print('t[:2] t[3:]     = ', t[:2], t[3:])      # Slicing 

t[0] t[1] t[-1] =  0.0 1.0 6.0
t[2:5] t[4:-1]  =  [2. 3. 4.] [4. 5.]
t[:2] t[3:]     =  [0. 1.] [3. 4. 5. 6.]


2D Array with NumPy

In [None]:
t = np.array([[1., 2., 3.], [4., 5., 6.], [7., 8., 9.], [10., 11., 12.]])
print(t)

[[ 1.  2.  3.]
 [ 4.  5.  6.]
 [ 7.  8.  9.]
 [10. 11. 12.]]


In [None]:
print('Rank of t : ', t.ndim)
print('Shape of t: ', t.shape)

Rank of t :  2
Shape of t:  (4, 3)


1D Array with PyTorch

In [None]:
t = torch.FloatTensor([0., 1., 2., 3., 4., 5., 6.])
print(t)

tensor([0., 1., 2., 3., 4., 5., 6.])


In [None]:
print(t.dim()) # rank
print(t.shape) # shape
print(t.size())  # shape
print(t[0], t[1], t[-1])   # Element
print(t[2:5], t[4:-1])     # Slicing
print(t[:2], t[3:])        # Slicing

1
torch.Size([7])
torch.Size([7])
tensor(0.) tensor(1.) tensor(6.)
tensor([2., 3., 4.]) tensor([4., 5.])
tensor([0., 1.]) tensor([3., 4., 5., 6.])


2D Array with PyTorch

In [None]:
t = torch.FloatTensor([[1., 2., 3.], 
                       [4., 5., 6.], 
                       [7., 8., 9.], 
                       [10., 11., 12.]])

print(t)

tensor([[ 1.,  2.,  3.],
        [ 4.,  5.,  6.],
        [ 7.,  8.,  9.],
        [10., 11., 12.]])


In [None]:
print(t.dim())  # rank
print(t.size()) # shape
print(t[:, 1])
print(t[:, 1].size())
print(t[:, :-1])

2
torch.Size([4, 3])
tensor([ 2.,  5.,  8., 11.])
torch.Size([4])
tensor([[ 1.,  2.],
        [ 4.,  5.],
        [ 7.,  8.],
        [10., 11.]])


Broadcasting

In [None]:
# Same shape
m1 = torch.FloatTensor([[3, 3]])
m2 = torch.FloatTensor([[2, 2]])

print(m1 + m2)

tensor([[5., 5.]])


In [None]:
# Vector + scalar
m1 = torch.FloatTensor([[1, 2]])
m2 = torch.FloatTensor([3])  # 3 -> [[3, 3]] 자동변환??

print(m1 + m2)

tensor([[4., 5.]])


In [None]:
# 2 * 1 Vector + 1 * 2 Vector

m1 = torch.FloatTensor([[1, 2]])
m2 = torch.FloatTensor([[3], [4]])
print(m1 + m2)

tensor([[4., 5.],
        [5., 6.]])


Multiplication vs Matrix Multiplication

In [None]:
print()
print('-------------')
print('Mul vs Matmul')
print('-------------')

m1 = torch.FloatTensor([[1, 2], [3, 4]])
m2 = torch.FloatTensor([[1], [2]])

print('Shape of Matrix 1 : ', m1.shape) # 2 * 2
print('Shape of Matrix 2 : ', m2.shape) # 2 * 1
print(m1.matmul(m2)) # 2 * 1

m1 = torch.FloatTensor([[1, 2], [3, 4]])
m2 = torch.FloatTensor([[1], [2]])

print('Shape of Matrix 1 : ', m1.shape) # 2 * 2
print('Shape of Matrix 2 : ', m2.shape) # 2 * 1
print(m1 * m2) # 2 * 2
print(m1.mul(m2))


-------------
Mul vs Matmul
-------------
Shape of Matrix 1 :  torch.Size([2, 2])
Shape of Matrix 2 :  torch.Size([2, 1])
tensor([[ 5.],
        [11.]])
Shape of Matrix 1 :  torch.Size([2, 2])
Shape of Matrix 2 :  torch.Size([2, 1])
tensor([[1., 2.],
        [6., 8.]])
tensor([[1., 2.],
        [6., 8.]])


Mean

In [None]:
t = torch.FloatTensor([[1, 2]])
print(t.mean())

tensor(1.5000)


In [None]:
# Can't use mean() on integers
t = torch.LongTensor([1, 2])
try:
    print(t.mean())
except Exception as exc:
    print(exc)

mean(): could not infer output dtype. Input dtype must be either a floating point or complex dtype. Got: Long


In [None]:
t = torch.FloatTensor([[1, 2], [3, 4]])
print(t)

tensor([[1., 2.],
        [3., 4.]])


In [None]:
print(t.mean())
print(t.mean(dim=0)) # 1과 3의 평균, 2와 4의 평균
print(t.mean(dim=1)) # 1과 2의 평균, 3과 4의 평균
print(t.mean(dim=-1))# 1과 2의 평균, 3과 4의 평균

tensor(2.5000)
tensor([2., 3.])
tensor([1.5000, 3.5000])
tensor([1.5000, 3.5000])


Sum

In [None]:
t = torch.FloatTensor([[1, 2], [3, 4]])
print(t)

tensor([[1., 2.],
        [3., 4.]])


In [None]:
print(t.sum())
print(t.sum(dim=0)) # 1+3, 2+4
print(t.sum(dim=1)) # 1+2, 3+4
print(t.sum(dim=-1))# 1+2, 3+4

tensor(10.)
tensor([4., 6.])
tensor([3., 7.])
tensor([3., 7.])


Max and Argmax

In [None]:
t = torch.FloatTensor([[1, 2], [3, 4]])
print(t)

tensor([[1., 2.],
        [3., 4.]])


In [None]:
print(t.max()) # 가장 큰 값

tensor(4.)


In [None]:
print(t.max(dim = 0)) # 1과 3 / 2와 4 비교
print('Max : ', t.max(dim=0)[0]) # values
print('Argmax : ', t.max(dim=0)[1]) # indices

torch.return_types.max(
values=tensor([3., 4.]),
indices=tensor([1, 1]))
Max :  tensor([3., 4.])
Argmax :  tensor([1, 1])


In [None]:
print(t.max(dim = 1)) # 1과 2 / 3과 4 비교
print(t.max(dim = -1))

torch.return_types.max(
values=tensor([2., 4.]),
indices=tensor([1, 1]))
torch.return_types.max(
values=tensor([2., 4.]),
indices=tensor([1, 1]))


## Tensor Manipulation 2

View(Reshape)

In [None]:
t = np.array([[[0, 1, 2],
               [3, 4, 5]],
             
              [[6, 7, 8],
               [9, 10, 11]]])

ft = torch.FloatTensor(t)
print(ft.shape)

torch.Size([2, 2, 3])


In [None]:
print(ft.view([-1, 3])) # -1 : 모르면 사용?
print(ft.view([-1, 3]).shape)

tensor([[ 0.,  1.,  2.],
        [ 3.,  4.,  5.],
        [ 6.,  7.,  8.],
        [ 9., 10., 11.]])
torch.Size([4, 3])


In [None]:
print(ft.view([-1, 1, 3]))
print(ft.view([-1, 1, 3]).shape)

tensor([[[ 0.,  1.,  2.]],

        [[ 3.,  4.,  5.]],

        [[ 6.,  7.,  8.]],

        [[ 9., 10., 11.]]])
torch.Size([4, 1, 3])


Squeeze

In [None]:
ft = torch.FloatTensor([[0], [1], [2]])
print(ft)
print(ft.shape)

tensor([[0.],
        [1.],
        [2.]])
torch.Size([3, 1])


In [None]:
print(ft.squeeze())
print(ft.squeeze().shape)

tensor([0., 1., 2.])
torch.Size([3])


squeeze(dim = ?) -> 해당 dim에 element 개수가 1이면 없애준다.

Unsqueeze -> 원하는 dim에 element 개수 1개를 넣어준다.

In [None]:
ft = torch.Tensor([0, 1, 2])
print(ft.shape)

torch.Size([3])


In [None]:
print(ft.unsqueeze(0))
print(ft.unsqueeze(0).shape)

tensor([[0., 1., 2.]])
torch.Size([1, 3])


In [None]:
print(ft.view([1,-1]))
print(ft.view([1,-1]).shape)

tensor([[0., 1., 2.]])
torch.Size([1, 3])


In [None]:
print(ft.unsqueeze(1))
print(ft.unsqueeze(1).shape)

tensor([[0.],
        [1.],
        [2.]])
torch.Size([3, 1])


In [None]:
print(ft.unsqueeze(-1))
print(ft.unsqueeze(-1).shape) # -1은 마지막 dim을 의미하므로 현재 1과 같다.

tensor([[0.],
        [1.],
        [2.]])
torch.Size([3, 1])


Type Casting

In [None]:
lt = torch.LongTensor([1, 2, 3, 4]) # LongTensor : int형 숫자를 숫자를 사용
print(lt)

tensor([1, 2, 3, 4])


In [None]:
print(lt.float())

tensor([1., 2., 3., 4.])


In [None]:
bt = torch.ByteTensor([True, False, False, True])
print(bt)

tensor([1, 0, 0, 1], dtype=torch.uint8)


In [None]:
print(bt.long())
print(bt.float())

tensor([1, 0, 0, 1])
tensor([1., 0., 0., 1.])


Concatenate

In [None]:
x = torch.FloatTensor([[1, 2], [3, 4]])
y = torch.FloatTensor([[5, 6], [7, 8]])

In [None]:
x

tensor([[1., 2.],
        [3., 4.]])

In [None]:
y

tensor([[5., 6.],
        [7., 8.]])

In [None]:
print(torch.cat([x, y], dim = 0))
print(torch.cat([x, y], dim = 1))

tensor([[1., 2.],
        [3., 4.],
        [5., 6.],
        [7., 8.]])
tensor([[1., 2., 5., 6.],
        [3., 4., 7., 8.]])


Stacking

In [None]:
x = torch.FloatTensor([1, 4])
y = torch.FloatTensor([2, 5]) 
z = torch.FloatTensor([3, 6]) 

In [None]:
print(torch.stack([x, y, z])) # 3 * 2
print(torch.stack([x, y, z], dim = 1)) # 2 * 3

tensor([[1., 4.],
        [2., 5.],
        [3., 6.]])
tensor([[1., 2., 3.],
        [4., 5., 6.]])


In [None]:
print(torch.cat([x.unsqueeze(0), y.unsqueeze(0), z.unsqueeze(0)], dim = 0))

tensor([[1., 4.],
        [2., 5.],
        [3., 6.]])


In [None]:
x.shape

torch.Size([2])

In [None]:
x.unsqueeze(0).shape

torch.Size([1, 2])

Ones and Zeors

In [None]:
x = torch.FloatTensor([[0,1,2],[2,1,0]])
print(x)

tensor([[0., 1., 2.],
        [2., 1., 0.]])


In [None]:
print(torch.ones_like(x))
print(torch.zeros_like(x))

tensor([[1., 1., 1.],
        [1., 1., 1.]])
tensor([[0., 0., 0.],
        [0., 0., 0.]])


In-place Operation

In [None]:
x = torch.FloatTensor([[1, 2], [3, 4]])

In [None]:
print(x)
print(x.mul(2.)) # x에 영구적으로 반영 안됨
print(x)
print(x.mul_(2.)) # '_'를 통해 x에 영구적으로 반영
print(x)

tensor([[2., 4.],
        [6., 8.]])
tensor([[ 4.,  8.],
        [12., 16.]])
tensor([[2., 4.],
        [6., 8.]])
tensor([[ 4.,  8.],
        [12., 16.]])
tensor([[ 4.,  8.],
        [12., 16.]])


## Linear regression

In [None]:
# 데이터 정의
x_train = torch.FloatTensor([[1], [2], [3]]) # 입력
y_train = torch.FloatTensor([[2], [4], [6]]) # 출력

# Hypothesis 초기화
W = torch.zeros(1, requires_grad = True)
b = torch.zeros(1, requires_grad = True)

# Optimizer 정의
optimizer = torch.optim.SGD([W, b], lr = 0.01) # lr : learning rate

# 반복
nb_epochs = 1000
for epoch in range(1, nb_epochs + 1):
    hypothesis = x_train * W + b # Hypothesis 예측
    cost = torch.mean((hypothesis - y_train) ** 2) # Cost 계산
    
    # Optimizer로 학습
    optimizer.zero_grad() # gradient 초기화
    cost.backward() # gradient 계산
    optimizer.step() # 개선

## Deeper Look at GD

In [None]:
# 데이터
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[1], [2], [3]])

# 모델 초기화
W = torch.zeros(1)

# Learning rate 설정
lr = 0.1

nb_epochs = 10
for epoch in range(nb_epochs + 1):

    # H(x) 계산
    hypothesis = x_train * W

    # cost gradient 계산
    cost = torch.mean((hypothesis - y_train) ** 2)
    gradient = torch.sum((W * x_train - y_train) * x_train)

    print('Epoch {:4d}/{} W: {:.3f}, Cost: {:.6f}'.format(epoch, nb_epochs, W.item(), cost.item()))

    # cost gradient로 H(x) 계산
    W -= lr * gradient

Epoch    0/10 W: 0.000, Cost: 4.666667
Epoch    1/10 W: 1.400, Cost: 0.746666
Epoch    2/10 W: 0.840, Cost: 0.119467
Epoch    3/10 W: 1.064, Cost: 0.019115
Epoch    4/10 W: 0.974, Cost: 0.003058
Epoch    5/10 W: 1.010, Cost: 0.000489
Epoch    6/10 W: 0.996, Cost: 0.000078
Epoch    7/10 W: 1.002, Cost: 0.000013
Epoch    8/10 W: 0.999, Cost: 0.000002
Epoch    9/10 W: 1.000, Cost: 0.000000
Epoch   10/10 W: 1.000, Cost: 0.000000


torch.optim 활용

In [None]:
# 데이터
x_train = torch.FloatTensor([[1], [2], [3]])
y_train = torch.FloatTensor([[1], [2], [3]])

# 모델 초기화
W = torch.zeros(1, requires_grad = True)

# optimizer 설정
optimizer = torch.optim.SGD([W], lr = 0.15)

nb_epochs = 10
for epoch in range(nb_epochs + 1):

    # H(x) 계산
    hypothesis = x_train * W

    # cost gradient 계산
    cost = torch.mean((hypothesis - y_train) ** 2)

    print('Epoch {:4d}/{} W: {:.3f}, Cost: {:.6f}'.format(epoch, nb_epochs, W.item(), cost.item()))

    # cost로 H(x) 계산
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

Epoch    0/10 W: 0.000, Cost: 4.666667
Epoch    1/10 W: 1.400, Cost: 0.746667
Epoch    2/10 W: 0.840, Cost: 0.119467
Epoch    3/10 W: 1.064, Cost: 0.019115
Epoch    4/10 W: 0.974, Cost: 0.003058
Epoch    5/10 W: 1.010, Cost: 0.000489
Epoch    6/10 W: 0.996, Cost: 0.000078
Epoch    7/10 W: 1.002, Cost: 0.000013
Epoch    8/10 W: 0.999, Cost: 0.000002
Epoch    9/10 W: 1.000, Cost: 0.000000
Epoch   10/10 W: 1.000, Cost: 0.000000


## Multivariable Linear regression

In [None]:
# 데이터
x_train = torch.FloatTensor([[73, 80, 75],
                            [93, 88, 93],
                            [89, 91, 90],
                            [96, 98, 100],
                            [73, 66, 70]])
y_train = torch.FloatTensor([[152], [185], [180], [196], [142]])
# 모델 초기화
W = torch.zeros((3, 1), requires_grad=True)
b = torch.zeros(1, requires_grad=True)
# optimizer 설정
optimizer = torch.optim.SGD([W, b], lr=1e-5)


nb_epochs = 20
for epoch in range(nb_epochs + 1):

    # H(x) 계산
    hypothesis = x_train.matmul(W) + b # or .mm or @
    # cost 계산
    cost = torch.mean((hypothesis - y_train) ** 2)
    # cost로 H(x) 개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()
    print('Epoch {:4d}/{} hypothesis: {} Cost: {:.6f}'.format(
        epoch, nb_epochs, hypothesis.squeeze().detach(),
        cost.item()
    ))

Epoch    0/20 hypothesis: tensor([0., 0., 0., 0., 0.]) Cost: 29661.800781
Epoch    1/20 hypothesis: tensor([67.2578, 80.8397, 79.6523, 86.7394, 61.6605]) Cost: 9298.520508
Epoch    2/20 hypothesis: tensor([104.9128, 126.0990, 124.2466, 135.3015,  96.1821]) Cost: 2915.712402
Epoch    3/20 hypothesis: tensor([125.9942, 151.4381, 149.2133, 162.4896, 115.5097]) Cost: 915.040527
Epoch    4/20 hypothesis: tensor([137.7967, 165.6247, 163.1911, 177.7112, 126.3307]) Cost: 287.936096
Epoch    5/20 hypothesis: tensor([144.4044, 173.5674, 171.0168, 186.2332, 132.3891]) Cost: 91.371063
Epoch    6/20 hypothesis: tensor([148.1035, 178.0143, 175.3980, 191.0042, 135.7812]) Cost: 29.758249
Epoch    7/20 hypothesis: tensor([150.1744, 180.5042, 177.8509, 193.6753, 137.6805]) Cost: 10.445267
Epoch    8/20 hypothesis: tensor([151.3336, 181.8983, 179.2240, 195.1707, 138.7440]) Cost: 4.391237
Epoch    9/20 hypothesis: tensor([151.9824, 182.6789, 179.9928, 196.0079, 139.3396]) Cost: 2.493121
Epoch   10/20 hypo

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class MultivariateLinearRegressionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(3, 1)

    def forward(self, x):
        return self.linear(x)

In [None]:
# 데이터
x_train = torch.FloatTensor([[73, 80, 75],
                             [93, 88, 93],
                             [89, 91, 90],
                             [96, 98, 100],
                             [73, 66, 70]])

y_train = torch.FloatTensor([[152], [185], [180], [196], [142]])

# 모델 초기화
# W = torch.zeros((3, 1), requires_grad = True)
# b = torch.zeros(1, requires_grad = True)
model = MultivariateLinearRegressionModel()

# optimizer 설정
optimizer = torch.optim.SGD(model.parameters(), lr = 1e-5)

nb_epochs = 20
for epoch in range(nb_epochs + 1):

    # H(x) 계산
    # hypothesis = x1_train * w1 + x2_train * w2 + x3_train * w3 + b
    # hypothesis = x_train.matmul(W) + b
    Hypothesis = model(x_train)

    # cost 계산
    # cost = torch.mean((hypothesis - y_train) ** 2)
    cost = F.mse_loss(Hypothesis, y_train)

    # cost로 H(x) 개선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    print('Epoch {:4d}/{} Cost: {:.6f}'.format(epoch, nb_epochs, cost.item()))

Epoch    0/20 Cost: 12986.235352
Epoch    1/20 Cost: 4071.806152
Epoch    2/20 Cost: 1277.605103
Epoch    3/20 Cost: 401.771942
Epoch    4/20 Cost: 127.243996
Epoch    5/20 Cost: 41.193333
Epoch    6/20 Cost: 14.220746
Epoch    7/20 Cost: 5.765716
Epoch    8/20 Cost: 3.115031
Epoch    9/20 Cost: 2.283747
Epoch   10/20 Cost: 2.022676
Epoch   11/20 Cost: 1.940341
Epoch   12/20 Cost: 1.914045
Epoch   13/20 Cost: 1.905329
Epoch   14/20 Cost: 1.902097
Epoch   15/20 Cost: 1.900598
Epoch   16/20 Cost: 1.899634
Epoch   17/20 Cost: 1.898863
Epoch   18/20 Cost: 1.898136
Epoch   19/20 Cost: 1.897412
Epoch   20/20 Cost: 1.896700


## Loading Data

In [None]:
import torch
from torch.utils.data import Dataset # torch.utils.data.Dataset 상속
from torch.utils.data import DataLoader # torch.utils.data.DataLader 사용

class CustomDataset(Dataset):
    def __init__(self):
        self.x_data = [[73, 80, 75],
                       [93, 89, 93],
                       [89, 91, 90],
                       [96, 98, 100],
                       [73, 66, 70]]
        self.y_data = [[152], [185], [180], [196], [142]]

    def __len__(self): # 이 데이터셋의 총 데이터 수
        return len(self.x_data)

    def __getitem__(self, idx): # 어떤한 인덱스 idx를 받았을 때, 그에 상응하는 입출력 데이터 반환
        x = torch.FloatTensor(self.x_data[idx])
        y = torch.FloatTensor(self.y_data[idx])

        return x, y

dataset = CustomDataset()

dataloader = DataLoader(
    dataset,
    batch_size = 2, # 각 minibatch의 크기 / 통상적으로 2의 제곱수로 설정한다.
    shuffle = True, # Epoch마다 데이터셋을 섞어서, 데이터가 학습되는 순서를 바꾼다.
)

nb_epochs = 20
for epoch in range(nb_epochs + 1):
    for batch_idx, samples in enumerate(dataloader): # minibatch 인덱스와 데이터를 받음
        x_train, y_train = samples
        # H(x) 계산
        prediction = model(x_train)

        # cost 계산
        cost = F.mse_loss(prediction, y_train)

        # cost로 H(x) 계선
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()

        print('Epoch {:4d}/{} Batch {}/{} Cost: {:.6f}'. format(
            epoch, nb_epochs, batch_idx+1, len(dataloader), # 한 epoch당 minibatch 개수
        ))

NameError: ignored

## Logistic Regression

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [None]:
# For reropducibility
torch.manual_seed(1)

<torch._C.Generator at 0x7f33d4a9b7b0>

In [None]:
# training data
x_data = [[1, 2], [2, 3], [3, 1], [4, 3], [5, 3], [6, 2]]
y_data = [[0], [0], [0], [1], [1], [1]]

In [None]:
x_train = torch.FloatTensor(x_data)
y_train = torch.FloatTensor(y_data)

In [None]:
print(x_train.shape)
print(y_train.shape)

torch.Size([6, 2])
torch.Size([6, 1])


In [None]:
print('e^1 equals: ', torch.exp(torch.FloatTensor([1])))

e^1 equals:  tensor([2.7183])


In [None]:
W = torch.zeros((2, 1), requires_grad = True)
b = torch.zeros(1, requires_grad = True)

In [None]:
# hypothesis = 1 / (1 + torch.exp(-(x_train.matmul(W) + b)))
hypothesis = torch.sigmoid(x_train.matmul(W) + b)

In [None]:
print(hypothesis)
print(hypothesis.shape)

tensor([[0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000],
        [0.5000]], grad_fn=<SigmoidBackward0>)
torch.Size([6, 1])


In [None]:
print(y_train)

tensor([[0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.]])


In [None]:
-(y_train[0] * torch.log(hypothesis[0]) + 
  (1 - y_train[0]) * torch.log(1-hypothesis[0]))

tensor([0.6931], grad_fn=<NegBackward0>)

In [None]:
losses = -(y_train * torch.log(hypothesis) + 
            (1 - y_train) * torch.log(1-hypothesis))
print(losses)

tensor([[0.6931],
        [0.6931],
        [0.6931],
        [0.6931],
        [0.6931],
        [0.6931]], grad_fn=<NegBackward0>)


In [None]:
cost = losses.mean()
print(cost)

tensor(0.6931, grad_fn=<MeanBackward0>)


In [None]:
F.binary_cross_entropy(hypothesis, y_train)

tensor(0.6931, grad_fn=<BinaryCrossEntropyBackward0>)

In [None]:
# 모델 초기호
W = torch.zeros((2, 1), requires_grad = True)
b = torch.zeros(1, requires_grad = True)

# optimizer 설정
optimizer = optim.SGD([W, b], lr = 1)

nb_epochs = 1000
for epoch in range(nb_epochs + 1):

    # Cost 계산
    hypothesis = torch.sigmoid(x_train.matmul(W) + b) # or .mm or @
    cost = F.binary_cross_entropy(hypothesis, y_train)

    # cost로 H(x) 계선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    # 100번마다 로그 출력
    if epoch % 100 == 0:
        print('Epoch {:4d}/{} Cost: {:.6f}'. format(
            epoch, nb_epochs, cost.item()
        ))

Epoch    0/1000 Cost: 0.693147
Epoch  100/1000 Cost: 0.134722
Epoch  200/1000 Cost: 0.080643
Epoch  300/1000 Cost: 0.057900
Epoch  400/1000 Cost: 0.045300
Epoch  500/1000 Cost: 0.037261
Epoch  600/1000 Cost: 0.031672
Epoch  700/1000 Cost: 0.027556
Epoch  800/1000 Cost: 0.024394
Epoch  900/1000 Cost: 0.021888
Epoch 1000/1000 Cost: 0.019852


In [None]:
hypothesis = torch.sigmoid(x_train.matmul(W) + b)
print(hypothesis[:5])

tensor([[2.7648e-04],
        [3.1608e-02],
        [3.8977e-02],
        [9.5622e-01],
        [9.9823e-01]], grad_fn=<SliceBackward0>)


In [None]:
prediction = hypothesis >= torch.FloatTensor([0.5])
print(prediction[:5])

tensor([[False],
        [False],
        [False],
        [ True],
        [ True]])


In [None]:
print(prediction[:5])
print(y_train[:5])

tensor([[False],
        [False],
        [False],
        [ True],
        [ True]])
tensor([[0.],
        [0.],
        [0.],
        [1.],
        [1.]])


In [None]:
correct_prediction = prediction.float() == y_train
print(correct_prediction[:5])

tensor([[True],
        [True],
        [True],
        [True],
        [True]])


In [None]:
class BinaryClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(2, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        return self.sigmoid(self.linear(x))

In [None]:
model = BinaryClassifier()

In [None]:
# optimizer 설정
optimizer = optim.SGD(model.parameters(), lr = 1)

nb_epochs = 100
for epoch in range(nb_epochs + 1):

    # H(x) 계산
    hypothesis = model(x_train)

    # cost 계산
    cost = F.binary_cross_entropy(hypothesis, y_train)

    # cost로 H(x) 계선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    # 10번마다 로그 출력
    if epoch % 10 == 0:
        prediction = hypothesis >= torch.FloatTensor([0.5])
        correct_prediction = prediction.float() == y_train
        accuracy = correct_prediction.sum().item() / len(correct_prediction)
        print('Epoch {:4d}/{} Cost: {:.6f} Accuracy {:2.2f}%'. format(
            epoch, nb_epochs, cost.item(), accuracy * 100,
        ))

Epoch    0/100 Cost: 1.580578 Accuracy 50.00%
Epoch   10/100 Cost: 0.466270 Accuracy 66.67%
Epoch   20/100 Cost: 0.395625 Accuracy 83.33%
Epoch   30/100 Cost: 0.334690 Accuracy 83.33%
Epoch   40/100 Cost: 0.282512 Accuracy 83.33%
Epoch   50/100 Cost: 0.235335 Accuracy 100.00%
Epoch   60/100 Cost: 0.193852 Accuracy 100.00%
Epoch   70/100 Cost: 0.163942 Accuracy 100.00%
Epoch   80/100 Cost: 0.147892 Accuracy 100.00%
Epoch   90/100 Cost: 0.137416 Accuracy 100.00%
Epoch  100/100 Cost: 0.128513 Accuracy 100.00%


y 클래스가 2개일 때 사용

## Softmax Classifcation

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [None]:
# For reropducibility
torch.manual_seed(1)

<torch._C.Generator at 0x7f33d4a9b7b0>

In [None]:
z = torch.FloatTensor([1, 2, 3])

In [None]:
hypothesis = F.softmax(z, dim = 0)
print(hypothesis)

tensor([0.0900, 0.2447, 0.6652])


In [None]:
hypothesis.sum()

tensor(1.)

In [None]:
z = torch.rand(3 ,5, requires_grad = True)
hypothesis = F.softmax(z, dim=1)
print(hypothesis)

tensor([[0.2645, 0.1639, 0.1855, 0.2585, 0.1277],
        [0.2430, 0.1624, 0.2322, 0.1930, 0.1694],
        [0.2226, 0.1986, 0.2326, 0.1594, 0.1868]], grad_fn=<SoftmaxBackward0>)


In [None]:
y = torch.randint(5, (3,)).long()
print(y)

tensor([2, 1, 0])


In [None]:
y_one_hot = torch.zeros_like(hypothesis)
y_one_hot.scatter_(1, y.unsqueeze(1), 1)

tensor([[0., 0., 1., 0., 0.],
        [0., 1., 0., 0., 0.],
        [1., 0., 0., 0., 0.]])

In [None]:
cost = (y_one_hot * -torch.log(hypothesis)).sum(dim=1).mean()
print(cost)

tensor(1.6682, grad_fn=<MeanBackward0>)


In [None]:
# Low level
torch.log(F.softmax(z, dim = 1))

tensor([[-1.3301, -1.8084, -1.6846, -1.3530, -2.0584],
        [-1.4147, -1.8174, -1.4602, -1.6450, -1.7758],
        [-1.5025, -1.6165, -1.4586, -1.8360, -1.6776]], grad_fn=<LogBackward0>)

In [None]:
# High level
F.log_softmax(z, dim=1)

tensor([[-1.3301, -1.8084, -1.6846, -1.3530, -2.0584],
        [-1.4147, -1.8174, -1.4602, -1.6450, -1.7758],
        [-1.5025, -1.6165, -1.4586, -1.8360, -1.6776]],
       grad_fn=<LogSoftmaxBackward0>)

In [None]:
# low level
(y_one_hot * -torch.log(F.softmax(z, dim=1))).sum(dim=1).mean()

tensor(1.6682, grad_fn=<MeanBackward0>)

In [None]:
# high level
F.nll_loss(F.log_softmax(z, dim=1), y)

tensor(1.6682, grad_fn=<NllLossBackward0>)

In [None]:
F.cross_entropy(z, y)

tensor(1.6682, grad_fn=<NllLossBackward0>)

In [None]:
x_train = [[1, 2, 1, 1],
           [2, 1, 3, 2],
           [3, 1, 3, 4],
           [4, 1, 5, 5],
           [1, 7, 5, 5],
           [1, 2, 5, 6],
           [1, 6, 6, 6],
           [1, 7, 7, 7]]
y_train = [2, 2, 2, 1, 1, 1, 0, 0]
x_train = torch.FloatTensor(x_train)
y_train = torch.LongTensor(y_train)

In [None]:
# 모델 초기화
W = torch.zeros((4, 3), requires_grad = True)
b = torch.zeros(1, requires_grad = True)

# optimzier 설정
optimizer = optim.SGD([W, b], lr = 0.1)

nb_epochs = 1000
for epoch in range(nb_epochs + 1):

    # Cost 계산(1)
    # hypothesis = F.softmax(x_train.matmul(W) + b, dim = 1)
    # y_one_hot = torch.zeros_like(hypothesis)
    # y_one_hot.scatter_(1, y_train.unsqueeze(1), 1)
    # cost = (y_one_hot * -torch.log(F.softmax(hypothesis, dim = 1))).sum(dim =1).mean()

    # Cost 계산(2)
    z = x_train.matmul(W) + b
    cost = F.cross_entropy(z, y_train)

    # cost로 H(x) 계선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    # 100번마다 로그 출력
    if epoch % 100 == 0:
        print('Epoch {:4d}/{} Cost: {:.6f}'. format(
            epoch, nb_epochs, cost.item()
        ))

Epoch    0/1000 Cost: 1.098612
Epoch  100/1000 Cost: 0.761050
Epoch  200/1000 Cost: 0.689991
Epoch  300/1000 Cost: 0.643229
Epoch  400/1000 Cost: 0.604117
Epoch  500/1000 Cost: 0.568256
Epoch  600/1000 Cost: 0.533922
Epoch  700/1000 Cost: 0.500291
Epoch  800/1000 Cost: 0.466908
Epoch  900/1000 Cost: 0.433507
Epoch 1000/1000 Cost: 0.399962


In [None]:
class SoftmaxClassifierModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(4, 3)

    def forward(self, x):
        return self.linear(x)

In [None]:
model = SoftmaxClassifierModel()

In [None]:
# optimizer 설정
optimizer = optim.SGD(model.parameters(), lr = 0.1)

nb_epochs = 1000
for epoch in range(nb_epochs + 1):

    # H(x) 계산
    prediction = model(x_train)

    # cost 계산
    cost = F.cross_entropy(prediction, y_train)

    # cost로 H(x) 계선
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    # 10번마다 로그 출력
    if epoch % 100 == 0:
        print('Epoch {:4d}/{} Cost: {:.6f}'. format(
            epoch, nb_epochs, cost.item()
        ))

Epoch    0/1000 Cost: 1.648873
Epoch  100/1000 Cost: 0.726158
Epoch  200/1000 Cost: 0.639376
Epoch  300/1000 Cost: 0.581379
Epoch  400/1000 Cost: 0.531050
Epoch  500/1000 Cost: 0.483738
Epoch  600/1000 Cost: 0.437740
Epoch  700/1000 Cost: 0.392250
Epoch  800/1000 Cost: 0.346883
Epoch  900/1000 Cost: 0.301798
Epoch 1000/1000 Cost: 0.259684


y 클래스가 3개 이상일 때 사용

## Tips

* Overfitting 방지하는 방법
    * More Data : 데이터를 많이 모으자!!
    * Less features : 컬럼을 줄이자!!
    * Regularization : 정규화
        * Early Stopping
        * Reducing Network Size
        * Weight Decay
        * Dropout
        * Batch Normalization

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [None]:
# For reproducibility
torch.manual_seed(1)

<torch._C.Generator at 0x7f7bd0151870>

In [None]:
x_train = torch.FloatTensor([[1, 2, 1],
                             [1, 3, 2],
                             [1, 3, 4],
                             [1, 5, 5],
                             [1, 7, 5],
                             [1, 2, 5],
                             [1, 6, 6],
                             [1, 7, 7]
                            ])

y_train = torch.LongTensor([2, 2, 2, 1, 1, 1, 0, 0])

In [None]:
x_test = torch.FloatTensor([[2, 1, 1], [3, 1, 2], [3, 3, 4]])
y_test = torch.LongTensor([2, 2, 2])

In [None]:
class SoftmaxClassifierModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(3, 3)
    def forward(self, x):
        return self.linear(x)

In [None]:
model = SoftmaxClassifierModel()

In [None]:
# optimizer 설정
optimizer = optim.SGD(model.parameters(), lr = 0.1)

In [None]:
def train(model, optimizer, x_train, y_train):
    nb_epochs = 20
    for epoch in range(nb_epochs):

        # H(x) 계산
        prediction = model(x_train)

        # cost 계산
        cost = F.cross_entropy(prediction, y_train)

        # cost로 H(x) 개선
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()

        print('Epoch {:4d}/{} Cost : {:.6f}'.format(
            epoch, nb_epochs, cost.item()
        ))

In [None]:
def test(model, optimizer, x_test, y_test):
    prediction = model(x_test)
    predicted_classes = prediction.max(1)[1]
    correct_count = (predicted_classes == y_test).sum().item()
    cost = F.cross_entropy(prediction, y_test)

    print('Accuracy: {}% Cost: {:.6f}'.format(
        correct_count / len(y_test) * 100, cost.item()
    ))

In [None]:
train(model, optimizer, x_train, y_train)

Epoch    0/20 Cost : 2.203667
Epoch    1/20 Cost : 1.199645
Epoch    2/20 Cost : 1.142985
Epoch    3/20 Cost : 1.117769
Epoch    4/20 Cost : 1.100901
Epoch    5/20 Cost : 1.089523
Epoch    6/20 Cost : 1.079872
Epoch    7/20 Cost : 1.071320
Epoch    8/20 Cost : 1.063325
Epoch    9/20 Cost : 1.055720
Epoch   10/20 Cost : 1.048378
Epoch   11/20 Cost : 1.041245
Epoch   12/20 Cost : 1.034285
Epoch   13/20 Cost : 1.027478
Epoch   14/20 Cost : 1.020813
Epoch   15/20 Cost : 1.014279
Epoch   16/20 Cost : 1.007872
Epoch   17/20 Cost : 1.001586
Epoch   18/20 Cost : 0.995419
Epoch   19/20 Cost : 0.989365


In [None]:
test(model, optimizer, x_test, y_test)

Accuracy: 0.0% Cost: 1.425844


learning rate이 너무 크면 diverge 하면서 cost가 점점 늘어난다.

In [None]:
model = SoftmaxClassifierModel()
optimizer = optim.SGD(model.parameters(), lr = 1e5) # 1e5 :  1 * 100000

In [None]:
train(model, optimizer, x_train, y_train)

Epoch    0/20 Cost : 1.280268
Epoch    1/20 Cost : 976950.750000
Epoch    2/20 Cost : 1279135.250000
Epoch    3/20 Cost : 1198378.875000
Epoch    4/20 Cost : 1098825.750000
Epoch    5/20 Cost : 1968197.750000
Epoch    6/20 Cost : 284763.250000
Epoch    7/20 Cost : 1532260.250000
Epoch    8/20 Cost : 1651503.750000
Epoch    9/20 Cost : 521878.593750
Epoch   10/20 Cost : 1397263.250000
Epoch   11/20 Cost : 750986.375000
Epoch   12/20 Cost : 918691.375000
Epoch   13/20 Cost : 1487888.250000
Epoch   14/20 Cost : 1582260.250000
Epoch   15/20 Cost : 685818.125000
Epoch   16/20 Cost : 1140048.875000
Epoch   17/20 Cost : 940566.375000
Epoch   18/20 Cost : 931638.250000
Epoch   19/20 Cost : 1971322.750000


learning rate이 너무 작으면 cost가 거의 줄어들지 않는다.

In [None]:
model = SoftmaxClassifierModel()
optimizer = optim.SGD(model.parameters(), lr = 1e-10)

In [None]:
train(model, optimizer, x_train, y_train)

Epoch    0/20 Cost : 3.187324
Epoch    1/20 Cost : 3.187324
Epoch    2/20 Cost : 3.187324
Epoch    3/20 Cost : 3.187324
Epoch    4/20 Cost : 3.187324
Epoch    5/20 Cost : 3.187324
Epoch    6/20 Cost : 3.187324
Epoch    7/20 Cost : 3.187324
Epoch    8/20 Cost : 3.187324
Epoch    9/20 Cost : 3.187324
Epoch   10/20 Cost : 3.187324
Epoch   11/20 Cost : 3.187324
Epoch   12/20 Cost : 3.187324
Epoch   13/20 Cost : 3.187324
Epoch   14/20 Cost : 3.187324
Epoch   15/20 Cost : 3.187324
Epoch   16/20 Cost : 3.187324
Epoch   17/20 Cost : 3.187324
Epoch   18/20 Cost : 3.187324
Epoch   19/20 Cost : 3.187324


적절한 숫자로 시작해 발산하면 작게, cost가 줄어들지 않으면 크게 조정하자

In [None]:
model = SoftmaxClassifierModel()
optimizer = optim.SGD(model.parameters(), lr = 1e-1)

train(model, optimizer, x_train, y_train)

Epoch    0/20 Cost : 1.341574
Epoch    1/20 Cost : 1.198802
Epoch    2/20 Cost : 1.150877
Epoch    3/20 Cost : 1.131977
Epoch    4/20 Cost : 1.116242
Epoch    5/20 Cost : 1.102514
Epoch    6/20 Cost : 1.089676
Epoch    7/20 Cost : 1.077479
Epoch    8/20 Cost : 1.065775
Epoch    9/20 Cost : 1.054511
Epoch   10/20 Cost : 1.043655
Epoch   11/20 Cost : 1.033187
Epoch   12/20 Cost : 1.023091
Epoch   13/20 Cost : 1.013356
Epoch   14/20 Cost : 1.003968
Epoch   15/20 Cost : 0.994917
Epoch   16/20 Cost : 0.986189
Epoch   17/20 Cost : 0.977775
Epoch   18/20 Cost : 0.969661
Epoch   19/20 Cost : 0.961836


Data Preprocessing

In [None]:
x_train = torch.FloatTensor([[73, 80, 75],
                             [93, 88, 93],
                             [89, 91, 90],
                             [96, 98, 100],
                             [73, 66, 70]])
y_train = torch.FloatTensor([[152], [185], [180], [196], [142]])

In [None]:
# standardization
mu = x_train.mean(dim=0)
mu

tensor([84.8000, 84.6000, 85.6000])

In [None]:
sigma = x_train.std(dim=0)
sigma

tensor([11.0544, 12.2393, 12.6214])

In [None]:
norm_x_train = (x_train - mu) / sigma
print(norm_x_train)

tensor([[-1.0674, -0.3758, -0.8398],
        [ 0.7418,  0.2778,  0.5863],
        [ 0.3799,  0.5229,  0.3486],
        [ 1.0132,  1.0948,  1.1409],
        [-1.0674, -1.5197, -1.2360]])


In [None]:
class MultivariateLinearRegressionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(3, 1)
    
    def forward(self, x):
        return self.linear(x)

In [None]:
model = MultivariateLinearRegressionModel()
optimizer = optim.SGD(model.parameters(), lr = 1e-1)

In [None]:
def train(model, optimizer, x_train, y_train):
    nb_epochs = 20
    for epoch in range(nb_epochs):

        # H(x) 계산
        prediction = model(x_train)

        # cost 계산
        cost = F.mse_loss(prediction, y_train)

        # cost로 H(x) 개선
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()

        print('Epoch {:4d}/{} Cost : {:.6f}'.format(
            epoch, nb_epochs, cost.item()
        ))

In [None]:
train(model, optimizer, norm_x_train, y_train)

Epoch    0/20 Cost : 29602.087891
Epoch    1/20 Cost : 18798.478516
Epoch    2/20 Cost : 11988.445312
Epoch    3/20 Cost : 7660.372559
Epoch    4/20 Cost : 4899.234375
Epoch    5/20 Cost : 3134.669922
Epoch    6/20 Cost : 2006.090454
Epoch    7/20 Cost : 1284.011108
Epoch    8/20 Cost : 821.937134
Epoch    9/20 Cost : 526.222473
Epoch   10/20 Cost : 336.965057
Epoch   11/20 Cost : 215.836151
Epoch   12/20 Cost : 138.308807
Epoch   13/20 Cost : 88.686180
Epoch   14/20 Cost : 56.922966
Epoch   15/20 Cost : 36.589779
Epoch   16/20 Cost : 23.571926
Epoch   17/20 Cost : 15.236008
Epoch   18/20 Cost : 9.896884
Epoch   19/20 Cost : 6.475713


## MNIST Introduction

In [18]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms 
from torch.utils.data import DataLoader

In [9]:
mnist_train = dsets.MNIST(root="MNIST_data/", train = True, transform = transforms.ToTensor(), download=True)

mnist_test = dsets.MNIST(root="MNIST_data/", train = False, transform = transforms.ToTensor(), download=True)

In [22]:
batch_size = 100

data_loader = DataLoader(mnist_train, batch_size = batch_size, shuffle=True, drop_last = True)