<a href="https://colab.research.google.com/github/2018007956/HYU/blob/main/Deep_Learning/04_Multi_layer_perceptrons.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

- Softmax Function
- 학습 관련 개념
- MNIST data 분석
- Multi-layer perceptron으로 MNIST data classifier model 만들기

#1. Softmax function

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

x_train = torch.FloatTensor([[1,2,1,1],
                             [2,1,3,2],
                             [3,1,3,4],
                             [4,1,5,5],
                             [1,7,5,5],
                             [1,2,5,6],
                             [1,6,6,6],
                             [1,7,7,7]])
y_train = torch.LongTensor([2,2,2,1,1,1,0,0])

class MultiLayerPerceptron(nn.Module):
  def __init__(self):
    super(MultiLayerPerceptron, self).__init__()
    self.linear1 = nn.Linear(4, 3)
    self.activation = nn.Sigmoid()

    self.linear2 = nn.Linear(3, 3)

  def forward(self, x):
    z1 = self.linear1(x)
    a1 = self.activation(z1)

    z2 = self.linear2(a1)
    
    return z2

model = MultiLayerPerceptron().train()
optimizer = optim.SGD(model.parameters(), lr=1)

In [None]:
epochs = 8000
model.train()
for epoch in range(epochs):
  logits = model(x_train) # forward propagation

  # log_probs = nn.LogSoftmax(dim=1)(logits)
  # cost = nn.NLLLoss()(log_probs, y_train) # get cost
  cost = nn.CrossEntropyLoss()(logits, y_train)
  ## CE = LogSoftmax + NLLLoss
  optimizer.zero_grad() # Gradient 0으로 Reset
  cost.backward() # backward propagation
  optimizer.step() # update parameters

In [None]:
model.eval()
with torch.no_grad():
  logits = model(x_train)
probs = nn.Softmax(dim=1)(logits)

print('logit\n : {}'.format(logits))
print('predict with softmax\n : {}'.format(probs))
print('predict with argmax\n : {}'.format(torch.argmax(probs,dim=1)))

logit
 : tensor([[-3.0547, -2.4338,  5.9909],
        [-3.3071, -1.8190,  5.5474],
        [-3.3492, -1.6786,  5.4255],
        [-6.2207,  6.4279, -0.8199],
        [ 2.4480,  1.7542, -4.8793],
        [-0.8131,  9.0675, -9.6797],
        [ 2.4480,  1.7542, -4.8794],
        [ 2.4480,  1.7542, -4.8794]])
predict with softmax
 : tensor([[1.1787e-04, 2.1931e-04, 9.9966e-01],
        [1.4263e-04, 6.3166e-04, 9.9923e-01],
        [1.5445e-04, 8.2091e-04, 9.9902e-01],
        [3.2098e-06, 9.9929e-01, 7.1125e-04],
        [6.6652e-01, 3.3305e-01, 4.3816e-04],
        [5.1156e-05, 9.9995e-01, 7.2138e-09],
        [6.6652e-01, 3.3304e-01, 4.3807e-04],
        [6.6652e-01, 3.3304e-01, 4.3806e-04]])
predict with argmax
 : tensor([2, 2, 2, 1, 0, 1, 0, 0])


#2. 학습 관련 개념

- Epoch : 전체 Sample data 학습
- Step : 1 step당 weight와 Bias를 1회씩 업데이트함
- Batch Size : 1 step에서 사용한 데이터 수
- Learning rate : 경사 하강법에서 학습 단계별로 움직이는 학습 속도  
Ex) Batch Size가 100, Step이 10이면 약 1000개의 데이터 이용

#3. MNIST data

Image: 28x28 (Dimension 784)  
Label: 0~9 

Train:Valid:Test = 55,000:10,000:5,000

#4. MNIST data classifier

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
# Use GPU
if torch.cuda.is_available():
  device = torch.device('cuda')
else:
  device = torch.device('cpu')

In [None]:
import torchvision
import torchvision.transforms as transforms

train_dataset = torchvision.datasets.MNIST(root="MNIST_data/",
                                           train=True,
                                           transform=transforms.ToTensor(),
                                           download=True)
test_dataset = torchvision.datasets.MNIST(root="MNIST_data/",
                                           train=False,
                                           transform=transforms.ToTensor(),
                                           download=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to MNIST_data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting MNIST_data/MNIST/raw/train-images-idx3-ubyte.gz to MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting MNIST_data/MNIST/raw/train-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to MNIST_data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting MNIST_data/MNIST/raw/t10k-images-idx3-ubyte.gz to MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting MNIST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw



In [None]:
batch_size = 128

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size)

In [None]:
class Model(nn.Module):
  def __init__(self):
    super(Model, self).__init__()
    self.linear1 = nn.Linear(784, 784*3)
    self.linear2 = nn.Linear(784*3, 784*2)
    self.linear3 = nn.Linear(784*2, 10)

    self.activation = nn.Sigmoid()

  def forward(self, x):
    z1 = self.linear1(x)
    a1 = self.activation(z1)

    z2 = self.linear2(a1)
    a2 = self.activation(z2)

    z3 = self.linear3(a2)

    return z3

In [None]:
model = Model().to(device).train()
optimizer = optim.SGD(model.parameters(), lr=0.1)
criterion = nn.CrossEntropyLoss()

In [None]:
epochs = 15

model.train()
for epoch in range(epochs):
  avg_cost = 0
  total_batch_num = len(train_dataloader)

  for b_x, b_y in train_dataloader:
    b_x = b_x.view(-1, 28*28).to(device)
    logits = model(b_x)
    loss = criterion(logits, b_y.to(device))

    optimizer.zero_grad()
    loss = criterion(logits, b_y.to(device)) # get cost

    optimizer.zero_grad() # Gradient 0으로 Reset
    loss.backward() # backward propagation
    optimizer.step() # update parameters

    avg_cost += loss / total_batch_num
  
  print('Epoch : {}/{}, cost:{}'.format(epoch+1, epochs, avg_cost))

Epoch : 1/15, cost:2.328648328781128
Epoch : 2/15, cost:1.452009916305542
Epoch : 3/15, cost:0.7514124512672424
