<a href="https://colab.research.google.com/github/Steve-YJ/Assignment_Standalone_DL/blob/master/%5BPractice%5D_Cifar10_Refactoring_Optimize_Clean_Up!.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Lec3. Practice
# CIFAR-10 Classifier: Hyperparameter Tunning!!
* Training with PyTorch
* 20.09.04.Fri. pm 9:00
* Review >>> 20.09.07.mon. pm 6:25
* Again >>> 20.09.22.Ture. pm 4:00
<br><br>
* Reference: https://github.com/heartcored98/Standalone-DeepLearning/blob/master/Lec3/Lab4_write_pretty_DL_code.ipynb


> <code>학습 Flow</code> -20.09.16.wed-<br>
> Import Library<br> 
> Data Preparation<br>
> Model Architecture<br>
> Define Exp<br>
> Experiment!<br>


# Import Library

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import argparse
import numpy as np

## Data Preparation

* Train, Val, Test Loader를 따로 구현한다

In [2]:
transforms = transforms.Compose([
                                 transforms.ToTensor(),
                                 transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize a tensor image with mean and standard deviation
     ])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transforms)
trainset, valset = torch.utils.data.random_split(trainset, [40000, 10000])
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transforms)

# what is partition?
partition = {'train': trainset, 'val':valset, 'test':testset}

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


## Inspect Dataset

In [3]:
import matplotlib.pyplot as plt
import numpy as np

# functions to show an image


def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


# get some random training images
dataiter = iter(trainloader)
images, labels = dataiter.next()

# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join('%5s' % classes[labels[j]] for j in range(4)))

print(type(images), images.shape)
print(type(labels), labels.shape, labels)  # 각각의 클래스



NameError: ignored

# Model Architecture
* dropout, batch-norm, xavier_initialization등을 추가해준다

In [3]:
import torch.nn as nn
import torch.nn.functional as F

class MLP(nn.Module):
    def __init__(self, in_dim, out_dim, hid_dim, n_layer, act, dropout, use_bn, use_xavier):
        super(MLP, self).__init__()
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.hid_dim = hid_dim
        self.n_layer = n_layer
        self.act = act
        # add dropout, use_bn, use_xavier
        self.dropout = dropout
        self.use_bn = use_bn
        self.use_xavier = use_xavier
        
        # ===== Create Linear Layers ==== #
        self.fc1 = nn.Linear(self.in_dim, self.hid_dim)
        self.linears = nn.ModuleList()  # 여러개의 hid_layer 구성. nn.ModuleList()
        self.bns = nn.ModuleList()
        for i in range(self.n_layer-1):
            self.linears.append(nn.Linear(self.hid_dim, self.hid_dim))
            if self.use_bn:
                self.bns.append(nn.BatchNorm1d(self.hid_dim))  # Applies Batch Normalization over a 2D or 3D input (a mini-batch of 1D inputs with optional additional channel dimension) as described in the paper

        self.fc2 = nn.Linear(self.hid_dim, self.out_dim)

        # ===== Create Activation Funciton ====== #
        if self.act == 'relu':
            self.act = nn.ReLU() 
        elif self.act == 'tanh':
            self.act = nn.Tanh()
        elif self.act == 'sigmoid':
            self.act = nn.Sigmoid()
        else:
            raise ValueError('no valid activation function selected!')

        # ===== Create Regularization Layer ===== #
        self.dropout = nn.Dropout(self.dropout)
        if self.use_xavier:
            self.xavier_init()

    def forward(self, x):
        x = self.act(self.fc1(x))
        for i in range(len(self.linears)):
            x = self.act(self.linears[i](x))
            x = self.bns[i](x)
            x = self.dropout(x)
        x = self.fc2(x)
        return x

    def xavier_init(self):
        for linear in self.linears:
            nn.init.xavier_normal_(linear.weight)
            linear.bias.data.fill_(0.01)

net = MLP(3072, 10, 100, 4, 'relu', 0.1, True, True)
print(net)

MLP(
  (fc1): Linear(in_features=3072, out_features=100, bias=True)
  (linears): ModuleList(
    (0): Linear(in_features=100, out_features=100, bias=True)
    (1): Linear(in_features=100, out_features=100, bias=True)
    (2): Linear(in_features=100, out_features=100, bias=True)
  )
  (bns): ModuleList(
    (0): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (1): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (fc2): Linear(in_features=100, out_features=10, bias=True)
  (act): ReLU()
  (dropout): Dropout(p=0.1, inplace=False)
)


# Train, Validate, Test and Experiment

In [4]:
def train(net, partition, optimizer, criterion, args):
    # train 함수 내에 trainloader 정의
    trainloader = torch.utils.data.DataLoader(partition['train'],
                                              batch_size = args.train_batch_size,
                                              shuffle=True,
                                              num_workers=2)
    
    net.train()
    optimizer.zero_grad()

    correct = 0
    total = 0
    train_loss = 0.0

    for i, data in enumerate(trainloader, 0):  # data loader를 돌면서...!
        # get the inputs
        inputs, labels = data
        inputs = inputs.view(-1, 3072)  # 입력값의 형상(shape)을 입력 레이어의 형상과 같게 만들어준다
        inputs = inputs.cuda()  # GPU에 데이터를 올려준다
        labels = labels.cuda()
        outputs = net(inputs)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        # print('print predicted: ', predicted.shape, predicted)
        # print('ptin labels: ', labels)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_loss = train_loss / len(trainloader)
    train_acc = 100 * correct / total
    return net, train_loss, train_acc

In [9]:
def validate(net, partition, criterion, args):
    valloader = torch.utils.data.DataLoader(partition['val'],
                                            batch_size=args.test_batch_size,
                                            shuffle=False,
                                            num_workers=2)
    net.eval()

    correct = 0
    total = 0
    val_loss = 0

    with torch.no_grad():
        for data in valloader:
            images, labels = data
            images = images.view(-1, 3072)
            images = images.cuda()
            labels = labels.cuda()

            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        val_loss = val_loss / len(valloader)
        val_acc = 100 * correct / total
    return val_loss, val_acc

In [13]:
def test(net, partition, args):
    testloader = torch.utils.data.DataLoader(partition['test'],
                                            batch_size=args.test_batch_size,
                                            shuffle=False,
                                            num_workers=2)
    net.eval()

    correct = 0
    total = 0
    test_loss = 0
    with torch.no_grad():
        for data, labels in testloader:
            images = data
            images = images.view(-1, 3072)
            images = images.cuda()
            labels = labels.cuda()
            outputs = net(images)

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        test_acc = 100 * correct / total
    return test_loss, test_acc

In [7]:
def experiment(partition, args):
    import time
    net = MLP(args.in_dim, args.out_dim, args.hid_dim, args.n_layer, args.act, args.dropout, args.use_bn, args.use_xavier)
    net.cuda()

    criterion = nn.CrossEntropyLoss()
    if args.optim == 'SGD':
        optimizer = optim.SGD(net.parameters(), lr=args.lr, weight_decay=args.l2)
    elif args.optim == 'RMSprop':
        optimizer = optim.RMSprop(net.parameters(), lr=args.lr, weight_decay=args.l2)
    elif args.optim == 'Adam':
        optimizer = optim.Adam(net.parameters(), lr=args.lr, weight_decay=args.l2)
    else:
        raise ValueError('In-valid optimizer choice')

    for epoch in range(args.epoch):  # loop over the dataset multiple times
        ts = time.time()
        net, train_loss, train_acc = train(net, partition, optimizer, criterion, args)
        val_loss, val_acc = validate(net, partition, criterion, args)
        te = time.time()
        print('Epoch {}, Acc(train/val): {:2.2f}/{:2.2f}, Loss(train/val) {:2.2f}/{:2.2f}, Took {:2.2f} sec'.format(epoch, train_acc, val_acc, train_loss, val_loss, te-ts))
    test_acc = test(net, partition, args)
    return train_loss, val_loss, train_acc, val_acc, test_acc

# Experiment

In [None]:
import argparse

# ===== Random Seed initialization ===== #
seed = 123
np.random.seed(seed)
torch.manual_seed(seed)

parser = argparse.ArgumentParser()
args = parser.parse_args("")

# ===== Model Capacity ===== #
args.in_dim = 3072
args.out_dim = 10
args.hid_dim = 100
args.act = 'relu'

# ===== Regularization ===== #
args.dropout = 0.1
args.use_bn = True
args.l2 = 0.00001
args.use_xavier = True

# ===== Opitmizer & Training ===== #
args.optim = 'SGD'  # RMSprop, SGD, RMSprop, ADAM....
args.lr = 0.00005
args.epoch = 20

args.train_batch_size = 256
args.test_batch_size = 1024

# ===== Experiment Variable ===== #
name_var1 = 'n_layer'
name_var2 = 'hid_dim'
list_var1 = [3, 5, 10, 20, 50, 100]
list_var2 = [300, 500, 700, 1000]

for var1 in list_var1:
    for var2 in list_var2:
        setattr(args, name_var1, var1)
        setattr(args, name_var2, var2)
        print(args)
        result = experiment(partition, args)

Namespace(act='relu', dropout=0.1, epoch=20, hid_dim=300, in_dim=3072, l2=1e-05, lr=5e-05, n_layer=3, optim='SGD', out_dim=10, test_batch_size=1024, train_batch_size=256, use_bn=True, use_xavier=True)
Epoch 0, Acc(train/val): 25.57/35.13, Loss(train/val) 2.08/0.00, Took 5.99 sec
Epoch 1, Acc(train/val): 36.35/39.99, Loss(train/val) 1.80/0.00, Took 5.95 sec
Epoch 2, Acc(train/val): 39.95/42.07, Loss(train/val) 1.70/0.00, Took 5.97 sec
Epoch 3, Acc(train/val): 42.40/43.29, Loss(train/val) 1.64/0.00, Took 5.97 sec
Epoch 4, Acc(train/val): 44.38/44.08, Loss(train/val) 1.58/0.00, Took 5.91 sec
Epoch 5, Acc(train/val): 46.01/45.90, Loss(train/val) 1.54/0.00, Took 5.98 sec
Epoch 6, Acc(train/val): 47.54/46.25, Loss(train/val) 1.50/0.00, Took 5.94 sec
Epoch 7, Acc(train/val): 49.10/46.78, Loss(train/val) 1.46/0.00, Took 5.95 sec
Epoch 8, Acc(train/val): 49.98/47.29, Loss(train/val) 1.43/0.00, Took 5.96 sec
Epoch 9, Acc(train/val): 51.03/47.86, Loss(train/val) 1.40/0.00, Took 6.07 sec
Epoch 10,