# Methods to Improve models

colab에서 열기: https://colab.research.google.com/github/SLCFLAB/Fintech2022/blob/main/ML_day10.ipynb

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

import torchvision.utils
import torchvision.datasets as dsets
import torchvision.transforms as transforms

import numpy as np
import random
import os

import matplotlib.pyplot as plt
%matplotlib inline

## Load data

In [2]:
train_data = dsets.MNIST(root='data/',
                         train=True,
                         transform=transforms.ToTensor(),
                         download=True)

test_data = dsets.MNIST(root='data/',
                        train=False,
                        transform=transforms.ToTensor(),
                        download=True)

In [3]:
batch_size = 100 # 한번에 할 연산량

train_loader = DataLoader(dataset=train_data,
                          batch_size=batch_size,
                          shuffle=True)

test_loader = DataLoader(dataset=test_data,
                         batch_size=batch_size,
                         shuffle=False)

## Drop out

drop out과 batch normalize layer는 train할 때는 작동하지만, inference때는 작동하지 않기 때문에, inference 할 때는 model.eval()를 통해 inference 때 동작하지 않도록 만든다.

In [4]:
# nn layers
linear1 = torch.nn.Linear(784, 512, bias=True)
linear2 = torch.nn.Linear(512, 512, bias=True)
linear3 = torch.nn.Linear(512, 512, bias=True)
linear4 = torch.nn.Linear(512, 512, bias=True)
linear5 = torch.nn.Linear(512, 10, bias=True)
relu = torch.nn.ReLU()
dropout = torch.nn.Dropout(p=0.1)

In [5]:
# model
model = torch.nn.Sequential(linear1, relu, dropout,
                            linear2, relu, dropout,
                            linear3, relu, dropout,
                            linear4, relu, dropout,
                            linear5).cuda()

In [6]:
loss = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [7]:
total_batch = len(train_loader)
model.train()    # set the model to train mode (dropout=True)
for epoch in range(20):
    avg_cost = 0

    for X, Y in train_loader:
        # reshape input image into [batch_size by 784]
        # label is not one-hot encoded
        X = X.view(-1, 28 * 28).cuda()
        Y = Y.cuda()

        optimizer.zero_grad()
        hypothesis = model(X)
        cost = loss(hypothesis, Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / total_batch

    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning finished')

Epoch: 0001 cost = 2.297901392
Epoch: 0002 cost = 2.275900602
Epoch: 0003 cost = 2.005052328
Epoch: 0004 cost = 0.945786715
Epoch: 0005 cost = 0.604985178
Epoch: 0006 cost = 0.480596662
Epoch: 0007 cost = 0.415942878
Epoch: 0008 cost = 0.369540066
Epoch: 0009 cost = 0.333335429
Epoch: 0010 cost = 0.300353795
Epoch: 0011 cost = 0.271818429
Epoch: 0012 cost = 0.248090923
Epoch: 0013 cost = 0.226623401
Epoch: 0014 cost = 0.208310455
Epoch: 0015 cost = 0.191368833
Epoch: 0016 cost = 0.176847771
Epoch: 0017 cost = 0.166522771
Epoch: 0018 cost = 0.154115245
Epoch: 0019 cost = 0.144841611
Epoch: 0020 cost = 0.137235180
Learning finished


In [8]:
# Test model and check accuracy
with torch.no_grad():
    model.eval()    # set the model to evaluation mode (dropout=False)
    accuracy = 0
    for data, target in test_loader:
        
    # Test the model using test sets
        X_test = data.view(-1, 28 * 28).cuda()
        Y_test = target.cuda()

        prediction = model(X_test)
        correct_prediction = torch.argmax(prediction, 1) == Y_test
        accuracy += correct_prediction.float().sum()
    print('Accuracy:', accuracy.item()/len(test_loader.dataset))

Accuracy: 0.9641


## Batch normalization

In [9]:
 # nn layers
linear1 = torch.nn.Linear(784, 32, bias=True) 
linear2 = torch.nn.Linear(32, 32, bias=True) 
linear3 = torch.nn.Linear(32, 10, bias=True) 

relu = torch.nn.ReLU()
bn1 = torch.nn.BatchNorm1d(32)
bn2 = torch.nn.BatchNorm1d(32)

# model
bn_model = torch.nn.Sequential(linear1, bn1, relu,
                              linear2, bn2, relu,
                              linear3).cuda()

In [10]:
bn_loss = nn.CrossEntropyLoss()
bn_optimizer = optim.SGD(bn_model.parameters(), lr=0.01)

In [11]:
for epoch in range(20): 
    bn_model.train() # set the model to train mode (반드시 선언해야됨)
    avg_cost = 0
    for X, Y in train_loader:
        X = X.view(-1, 28 * 28).cuda()
        Y = Y.cuda()

        bn_optimizer.zero_grad()
        bn_prediction = bn_model(X)
        bn_cost = bn_loss(bn_prediction, Y)
        bn_cost.backward()
        bn_optimizer.step()
        avg_cost += bn_cost/len(train_loader)
    
    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning finished')

Epoch: 0001 cost = 0.948867857
Epoch: 0002 cost = 0.370169938
Epoch: 0003 cost = 0.266219616
Epoch: 0004 cost = 0.215652913
Epoch: 0005 cost = 0.187259838
Epoch: 0006 cost = 0.166151226
Epoch: 0007 cost = 0.151855528
Epoch: 0008 cost = 0.137558356
Epoch: 0009 cost = 0.127136320
Epoch: 0010 cost = 0.117978394
Epoch: 0011 cost = 0.110545211
Epoch: 0012 cost = 0.105900384
Epoch: 0013 cost = 0.100477427
Epoch: 0014 cost = 0.095583752
Epoch: 0015 cost = 0.091084093
Epoch: 0016 cost = 0.087018654
Epoch: 0017 cost = 0.084118955
Epoch: 0018 cost = 0.080683120
Epoch: 0019 cost = 0.077405237
Epoch: 0020 cost = 0.074686468
Learning finished


In [12]:
# Test model and check accuracy
with torch.no_grad():
    bn_model.eval()    # set the model to evaluation mode (dropout=False)
    accuracy = 0
    for data, target in test_loader:
        
    # Test the model using test sets
        X_test = data.view(-1, 28 * 28).cuda()
        Y_test = target.cuda()

        prediction = bn_model(X_test)
        correct_prediction = torch.argmax(prediction, 1) == Y_test
        accuracy += correct_prediction.float().sum()
    print('Accuracy:', accuracy.item()/len(test_loader.dataset))

Accuracy: 0.9706


## l_2 Reguralization 

In [13]:
# nn layers
linear1 = torch.nn.Linear(784, 32, bias=True) 
linear2 = torch.nn.Linear(32, 32, bias=True) 
linear3 = torch.nn.Linear(32, 10, bias=True) 

relu = torch.nn.ReLU()

In [14]:
# model
model = torch.nn.Sequential(linear1, relu, 
                            linear2, relu, 
                            linear3).cuda()

In [15]:
loss = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=1e-2, momentum=0.9, weight_decay=1e-5)#weight_decay: l2_regularity

In [16]:
total_batch = len(train_loader)
model.train()    # set the model to train mode (dropout=True)
for epoch in range(20):
    avg_cost = 0

    for X, Y in train_loader:
        # reshape input image into [batch_size by 784]
        # label is not one-hot encoded
        X = X.view(-1, 28 * 28).cuda()
        Y = Y.cuda()

        optimizer.zero_grad()
        hypothesis = model(X)
        cost = loss(hypothesis, Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / total_batch

    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost))

print('Learning finished')

Epoch: 0001 cost = 0.746400595
Epoch: 0002 cost = 0.281020582
Epoch: 0003 cost = 0.218953595
Epoch: 0004 cost = 0.179661870
Epoch: 0005 cost = 0.154709116
Epoch: 0006 cost = 0.135046378
Epoch: 0007 cost = 0.123610385
Epoch: 0008 cost = 0.111430608
Epoch: 0009 cost = 0.101981118
Epoch: 0010 cost = 0.096400365
Epoch: 0011 cost = 0.090471856
Epoch: 0012 cost = 0.085287340
Epoch: 0013 cost = 0.079083249
Epoch: 0014 cost = 0.075178340
Epoch: 0015 cost = 0.071025617
Epoch: 0016 cost = 0.066618495
Epoch: 0017 cost = 0.064508028
Epoch: 0018 cost = 0.061680585
Epoch: 0019 cost = 0.058119863
Epoch: 0020 cost = 0.054921046
Learning finished


In [17]:
# Test model and check accuracy
with torch.no_grad():
    model.eval()    # set the model to evaluation mode (dropout=False)
    accuracy = 0
    for data, target in test_loader:
        
    # Test the model using test sets
        X_test = data.view(-1, 28 * 28).cuda()
        Y_test = target.cuda()

        prediction = model(X_test)
        correct_prediction = torch.argmax(prediction, 1) == Y_test
        accuracy += correct_prediction.float().sum()
    print('Accuracy:', accuracy.item()/len(test_loader.dataset))

Accuracy: 0.9709
