In [1]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

## 딥러닝 모델

In [3]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.mlp1 = nn.Linear(input_size, hidden_size) # 입력 -> 히든
        self.relu = nn.ReLU() # 활성함수
        self.mlp2 = nn.Linear(hidden_size, num_classes)  # 히든 -> 출력
        self.softmax = nn.Softmax(dim=1) # 최종 활성함수
        
    #  실제 돌아가는 함수 => 위에서 선언한 함수들을 작동
    def forward(self, x):
        out = self.mlp1(x)
        out = self.relu(out)
        out = self.mlp2(out)
        out = self.softmax(out)
        return out

* cuda를 사용할 수 있으면 사용, 아니면 cpu환경으로

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cpu


In [5]:
input_size = 28*28*1 # MNIST 이미지 크기
hidden_size = 100 # hyper parameter
num_classes = 10 # 총 class 수 => 10개의 이미지 사용

model = NeuralNet(input_size, hidden_size, num_classes).to(device)
print(model)

NeuralNet(
  (mlp1): Linear(in_features=784, out_features=100, bias=True)
  (relu): ReLU()
  (mlp2): Linear(in_features=100, out_features=10, bias=True)
  (softmax): Softmax(dim=1)
)


In [6]:
data = torch.rand(1, 28*28).to(device)
print('data:',data.shape)
print('-'*40)

pred = model(data)
print('pred:',pred)
print('-'*40)

y_hat = pred.argmax(1) # 가장 높은 확률
print('y hat:',y_hat)

data: torch.Size([1, 784])
----------------------------------------
pred: tensor([[0.0959, 0.1064, 0.1097, 0.0860, 0.1225, 0.0702, 0.0962, 0.1140, 0.1001,
         0.0991]], grad_fn=<SoftmaxBackward0>)
----------------------------------------
y hat: tensor([4])


### nn.Linear

In [7]:
x = torch.randn(3,28,28)

In [8]:
fc_layer = nn.Linear(in_features=28*28, out_features=100) # in_features=28*28 : 쭉 펴서 데이터를 넣어 적용 
x_after_fc = fc_layer(x.reshape(-1,28*28))
print('x_after_fc:',x_after_fc.shape)

x_after_fc: torch.Size([3, 100])


In [9]:
# nn.Flatten : x.reshape(-1,28*28)과 같은 역할
# 28x28 사이즈의 이미지를 784 픽셀 값을 갖는 배열로 변환

flatten = nn.Flatten()
flat_x = flatten(x)
print(flat_x.shape)

fc_layer = nn.Linear(in_features=28*28, out_features=100)
x_after_fc = fc_layer(flat_x)
print('x_after_fc:',x_after_fc.shape)

torch.Size([3, 784])
x_after_fc: torch.Size([3, 100])


### nn.ReLU

In [10]:
x = torch.randn(3,28,28)

In [13]:
x[0][0]

tensor([-2.2632, -1.1601, -1.5708, -0.7098,  1.1240,  0.5832, -0.4920,  0.7810,
        -1.0638,  0.0208,  0.2019, -0.5706,  1.8864,  0.7079,  0.4409, -0.6911,
        -1.3627,  0.4620,  0.4089, -0.6376, -1.2141, -1.4225, -0.4005,  0.5099,
        -1.4147, -0.9065, -1.8268,  1.4372])

* ReLU : 음수들은 0으로, 양수는 양수 그대로

In [14]:
before_relu = x[0][0]
print('before_relu :', before_relu, before_relu.shape)
print('-'*70)

relu = nn.ReLU()
after_relu = relu(before_relu)
print('after_relu :', after_relu, after_relu.shape)

before_relu : tensor([-2.2632, -1.1601, -1.5708, -0.7098,  1.1240,  0.5832, -0.4920,  0.7810,
        -1.0638,  0.0208,  0.2019, -0.5706,  1.8864,  0.7079,  0.4409, -0.6911,
        -1.3627,  0.4620,  0.4089, -0.6376, -1.2141, -1.4225, -0.4005,  0.5099,
        -1.4147, -0.9065, -1.8268,  1.4372]) torch.Size([28])
----------------------------------------------------------------------
after_relu : tensor([0.0000, 0.0000, 0.0000, 0.0000, 1.1240, 0.5832, 0.0000, 0.7810, 0.0000,
        0.0208, 0.2019, 0.0000, 1.8864, 0.7079, 0.4409, 0.0000, 0.0000, 0.4620,
        0.4089, 0.0000, 0.0000, 0.0000, 0.0000, 0.5099, 0.0000, 0.0000, 0.0000,
        1.4372]) torch.Size([28])


### Softmax

* 뉴럴 네트워크의 output이라 생각(1,10)

In [15]:
x = torch.randn(1,10)
x

tensor([[-0.9890,  0.7847,  1.2137, -3.0404, -1.2543, -0.3276,  0.6636, -1.2009,
         -0.3729, -0.2062]])

In [16]:
softmax = nn.Softmax(dim=1)
softmax

Softmax(dim=1)

In [17]:
pred = softmax(x)
print('pred:', pred)
print('sum of pred:',pred.sum())

pred: tensor([[0.0347, 0.2043, 0.3137, 0.0045, 0.0266, 0.0672, 0.1810, 0.0281, 0.0642,
         0.0758]])
sum of pred: tensor(1.)


### nn.Sequential : 한번에 처리하기

* Neural Network

In [18]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.mlp1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        self.mlp2 = nn.Linear(hidden_size, num_classes)  
        self.softmax = nn.Softmax(dim=1)
    def forward(self, x):
        out = self.mlp1(x)
        out = self.relu(out)
        out = self.mlp2(out)
        out = self.softmax(out)
        return out

In [19]:
input_size = 28*28*1 # MNIST 이미지 크기
hidden_size = 100 # hyper parameter
num_classes = 10 # 총 class 수

model = NeuralNet(input_size, hidden_size, num_classes).to(device)
print(model)

NeuralNet(
  (mlp1): Linear(in_features=784, out_features=100, bias=True)
  (relu): ReLU()
  (mlp2): Linear(in_features=100, out_features=10, bias=True)
  (softmax): Softmax(dim=1)
)


In [20]:
# nn.Sequential : 여러 모듈들을 묶어서 사용 가능

class NeuralNet2(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        sequential = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, num_classes),
            nn.Softmax(dim=1),
        )
    def forward(self, x):
        out = self.sequential(x)
        return out

In [21]:
input_size = 28*28*1 # MNIST 이미지 크기
hidden_size = 100 # hyper parameter
num_classes = 10 # 총 class 수

model = NeuralNet(input_size, hidden_size, num_classes).to(device)
print(model)

NeuralNet(
  (mlp1): Linear(in_features=784, out_features=100, bias=True)
  (relu): ReLU()
  (mlp2): Linear(in_features=100, out_features=10, bias=True)
  (softmax): Softmax(dim=1)
)


## 손실 함수

### Cross Entropy

Binary Cross Entropy Loss (nn.BCELoss())
- `Binary Class (0/1)`에 적용
- Loss 적용 전 `sigmoid`나 `softmax`를 취해줘야 함 => 확률 값으로 변경

In [22]:
x = torch.randn(3)
y = torch.tensor([0.,1.,0.]) # binary 이므로 target(y) 값은 0 또는 1

In [23]:
print('x:',x)
print('y:',y)

x: tensor([ 0.0467, -0.5376, -0.6343])
y: tensor([0., 1., 0.])


In [24]:
# Binary Cross Entropy Loss
bce_loss = nn.BCELoss()
sigmoid = nn.Sigmoid()

x_sigmoid = sigmoid(x) # Loss 적용 전 sigmoid/softmax로 확률값으로 변경
print('binary cross entropy loss value:', bce_loss(x_sigmoid,y))

binary cross entropy loss value: tensor(0.7133)


Binary Cross Entropy With Logits Loss (nn.BCEWithLogitsLoss())
- `Binary Class (0/1)`에 적용
- Loss안에 `Sigmoid가 내장`되어있음

In [25]:
# Binary Cross Entropy Loss
bce_with_logits_loss = nn.BCEWithLogitsLoss() # => sigmoid 계산이 필요 없음

print('binary cross entropy loss value:', bce_with_logits_loss(x,y))

binary cross entropy loss value: tensor(0.7133)


Cross Entropy Loss (nn.CrossEntropyLoss())
- `Multi Class`에서 적용
- Loss안에 `Softmax가 내장`되어 있음

In [26]:
x = torch.randn(1,10)
y = torch.tensor([1])

In [27]:
print('x:',x)
print('y:',y)

x: tensor([[ 1.0158, -2.0461,  1.5416, -0.4591, -0.9404, -0.1783, -0.9782,  1.9946,
          1.0499, -1.0803]])
y: tensor([1])


In [28]:
# Cross Entropy Loss

cross_entropy_loss = nn.CrossEntropyLoss()
# Softmax와 Log를 한 후 Cross Entropy Loss

print('cross_entropy loss value:', cross_entropy_loss(x,y))

cross_entropy loss value: tensor(5.0589)


### MSE

In [29]:
x = torch.randn(1,10)
y = torch.randn(1,10)

In [30]:
x, y

(tensor([[-1.0493,  0.7394,  1.2416, -2.7529, -1.1950, -0.4961, -0.0660,  0.3516,
          -0.1618,  0.3677]]),
 tensor([[-2.1191, -1.3712,  0.0992, -0.6513, -0.4538, -0.4753, -0.4755,  0.4516,
          -0.4128,  0.7341]]))

In [113]:
mse_loss = nn.MSELoss()
print('cross_entropy loss value:', mse_loss(x,y))

cross_entropy loss value: tensor(1.4131)
