이 [링크](https://colab.research.google.com/drive/1ZG5eD25M8ZI-uKn4jY1olZFMzCLGQU9s)로 들어가 파일->드라이브에 사본 저장으로 드라이브에 똑같은 주피터 노트북을 복사해주세요.

### 1. Importing required packages and fixing random seed

In [0]:
import torch.nn as nn
import torch.optim
import torch.utils.data
import torchvision
import os
import argparse
import random
import numpy as np
import matplotlib.pyplot as plt
from copy import deepcopy
import time
from itertools import product

manual_seed = "aiming".__hash__() % (2 ** 32) #random.randint(1, 10000)
print("Random Seed: ", manual_seed)
random.seed(manual_seed)
torch.manual_seed(manual_seed)
np.random.seed(manual_seed)

!mkdir results

### 2. Downloading Dataset
For first example, we will use MNIST dataset. It is image data set consisting handwritting of numbers, 0-9. The data is $28\times 28$ pixel grayscale image.

In [0]:
transform = torchvision.transforms.Compose(
    [torchvision.transforms.ToTensor(), 
     torchvision.transforms.Normalize((0,), (1,))])
# Need to add normalize
trainset = torchvision.datasets.MNIST(root='../data', train=True,
                                        download=True, transform=transform)
# This contains total 60,000 data.
testset = torchvision.datasets.MNIST(root='../data', train=False,
                                       download=True, transform=transform)
# This contains total 10,000 data. You SHOULD NOT train with these data.
trainset, valset = torch.utils.data.random_split(trainset, [50000, 10000])
partition = {'train':trainset, 'val': valset, 'test':testset}

print(len(partition['train']))
print(len(partition['val']))
print(len(partition['test']))

### 3. Model Construction

In [0]:
class MLP(nn.Module):
    def __init__(self, in_dim, out_dim, hid_dim, n_layer, act, dropout_rate, batchnorm, init):
        super(VanillaMLP, self).__init__()
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.hid_dim = hid_dim
        self.n_layer = n_layer
        self.act = act
        self.dropout_rate = dropout_rate
        self.batchnorm = batchnorm
        self.init = init

        # ====== Create Linear Layers ====== #
        self.lin_first = nn.Linear(self.in_dim, self.hid_dim)
        self.linears = nn.ModuleList()
        for i in range(self.n_layer - 1):
            self.linears.append(nn.Linear(self.hid_dim, self.hid_dim))
        self.lin_last = nn.Linear(self.hid_dim, self.out_dim)

        # ====== Create Activation Function ====== #
        if self.act.lower() == "sigmoid":
            self.activation = nn.Sigmoid()
        elif self.act.lower() == "tanh":
            self.activation = nn.Tanh()
        elif self.act.lower() == "relu":
            self.activation = nn.ReLU()
        else:
            raise ValueError("Not a valid activation function argument")

        # ====== Create BatchNorm, Dropout Layers ====== #
        self.bn_first = nn.BatchNorm1D(self.hid_dim)
        self.batchnorms = nn.ModuleList()
        if self.batchnorm:
            for i in range(self.n_layer - 1):
                self.batchnorms.append(nn.BatchNorm1D(self.hid_dim))
        
        self.dropout = nn.Dropout(self.dropout_rate)

        if self.init == "normal":
            if self.act.lower() == "relu":
                self.weight_init(nn.init.kaiming_normal_)
            else:
                self.weight_init(nn.init.xavier_normal_)
        elif self.init == "uniform":
            if self.act.lower() == "relu":
                self.weight_init(nn.init.kaiming_uniform_)
            else:
                self.weight_init(nn.init.xavier_uniform_)
        
    def forward(self, x, is_train):
        # Ordering : FC -> BatchNorm -> act -> dropout
        # I referred this stackoverflow answer for ordering. 
        # https://stackoverflow.com/a/40295999
        
        x = x.view(-1, self.in_dim)
        x = self.lin_first(x)
        if self.batchnorm:
            x = self.bn_first(x)
        x = self.activation(x)
        if self.dropout_rate != 0.0 and is_train:
            x = self.dropout(x)
        for i in range(self.n_layer - 1):
            if self.batchnorm:
                x = self.batchnorms[i](x)
            x = self.activation(x)
            if self.dropout_rate != 0.0 and is_train:
                x = self.dropout(x)
        return self.lin_last(x)

    def weight_init(self, initializer):
        initializer(self.lin_first.weight)
        self.lin_first.bias.data.fill_(0.0)
        for linear in self.linears:
            initializer(linear.weight)
            linear.bias.data.fill_(0.0)

test_net = MLP(784, 10, 300, 3, "relu", 0.2, True, "normal")

### 4. Defining Hyperparameters

In [0]:
parser = argparse.ArgumentParser()
args = parser.parse_args("")

# Training related
args.batch_size = 128
args.test_batch_size = 1000
args.learning_rate = .01
args.epoch = 5
args.weight_decay = 0.1

# Model related
args.n_layer = 3
args.hid_dim = 100
args.act = "relu"

# Fixed parameter
args.in_dim = 784
args.out_dim = 10

# Regularization related
self.dropout_rate = 0.1
self.batchnorm = True
self.init = "normal"

print(args)

### 5. Defining helper function

In [0]:
def train(net, partition, optimizer, criterion, args):
    trainloader = torch.utils.data.DataLoader(partition['train'], 
                                              batch_size=args.batch_size, 
                                              shuffle=True, num_workers=2)
    net.train()
    optimizer.zero_grad()

    correct = 0
    total = 0
    train_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data
        inputs = inputs.view(-1, args.in_dim)
        outputs = net(inputs)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_loss = train_loss / len(trainloader)
    train_acc = 100 * correct / total
    return net, train_loss, train_acc

In [0]:
def validate(net, partition, criterion, args):
    valloader = torch.utils.data.DataLoader(partition['val'], 
                                            batch_size=args.test_batch_size, 
                                            shuffle=False, num_workers=2)
    net.eval()

    correct = 0
    total = 0
    val_loss = 0 
    with torch.no_grad():
        for data in valloader:
            images, labels = data
            images = images.view(-1, args.in_dim)
            outputs = net(images)

            loss = criterion(outputs, labels)
            
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        val_loss = val_loss / len(valloader)
        val_acc = 100 * correct / total
    return val_loss, val_acc

In [0]:
def test(net, partition, args):
    testloader = torch.utils.data.DataLoader(partition['test'], 
                                             batch_size=args.test_batch_size, 
                                             shuffle=False, num_workers=2)
    net.eval()
    
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images = images.view(-1, args.in_dim)

            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        test_acc = 100 * correct / total
    return test_acc

In [0]:
def experiment(partition, args):
    
    net = MLP(args.in_dim, args.out_dim, args.hid_dim, args.n_layer, args.act, args.dropout_rate, args.batchnorm, args.init)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(net.parameters(), lr = args.learning_rate, weight_decay = args.weight_decay)
    
    print(args)

    train_losses = list()
    val_losses = list()
    train_accs = list()
    val_accs = list()
    test_accs = list()

    
    for epoch in range(args.epoch):
        ts = time.time()
        net, train_loss, train_acc = train(net, partition, optimizer, criterion, args)
        val_loss, val_acc = validate(net, partition, criterion, args)
        te = time.time()
        
        print('Epoch {}, Acc(train/val) : {:2.2f}/{:2.2f}, Loss(train/val): {:2.2f}/{:2.2f}. Took {:2.2f} sec.'.format(epoch, train_acc, val_acc, train_loss, val_loss, te-ts))
        
        train_losses.append(train_loss)
        val_losses.append(val_loss)
        train_accs.append(train_acc)
        val_accs.append(val_acc)
        
    test_acc = test(net, partition, args)

    result = {}
    result['train_losses'] = train_losses
    result['train_accs'] = train_accs
    result['val_losses'] = val_losses
    result['val_accs'] = val_accs
    result['test_acc'] = test_acc
    
    return vars(args), result

In [0]:
import hashlib
import json
from os import listdir
from os.path import isfile, join
import pandas as pd

def save_exp_result(setting, result):
    exp_name = setting['exp_name']
    del setting['epoch']
    del setting['test_batch_size']

    hash_key = hashlib.sha1(str(setting).encode()).hexdigest()[:6]
    filename = './results/{}-{}.json'.format(exp_name, hash_key)
    result.update(setting)
    with open(filename, 'w') as f:
        json.dump(result, f)
    
def load_exp_result(exp_name):
    dir_path = './results'
    filenames = [f for f in listdir(dir_path) if isfile(join(dir_path, f)) if '.json' in f]
    list_result = []
    for filename in filenames:
        if exp_name in filename:
            with open(join(dir_path, filename), 'r') as infile:
                results = json.load(infile)
                list_result.append(results)
    df = pd.DataFrame(list_result) # .drop(columns=[])
    return df

In [0]:
from google.colab import files

def download_local(exp_name):
    dir_path = './results'
    filenames = [f for f in listdir(dir_path) if isfile(join(dir_path, f)) if exp_name in f if '.json' in f]
    for filename in filenames:
        files.download(dir_path + "/" + filename)

In [0]:
def data_to_dict(args, name_var1, name_var2, list_var1, list_var2):
    loss_dict = dict()
    acc_dict = dict()
    df = load_exp_result(args.exp_name)

    for var1 in list_var1:
        for var2 in list_var2:
            row = df.loc[df[name_var1]==var1]
            row = row.loc[df[name_var2]==var2]
            dl = list()
            da = list()
            for d in row.train_losses:
                dl.append(d)
            for d in row.train_accs:
                da.append(d)
            loss_dict[(var1, var2, 'train')] = (list(range(args.epoch)), dl)
            acc_dict[(var1, var2, 'train')] = (list(range(args.epoch)), da)
            dl = list()
            da = list()
            for d in row.val_losses:
                dl.append(d)
            for d in row.val_accs:
                da.append(d)
            loss_dict[(var1, var2, 'val')] = (list(range(args.epoch)), dl)
            acc_dict[(var1, var2, 'val')] = (list(range(args.epoch)), da)

    return loss_dict, acc_dict

In [0]:
def dict_to_plot(args, data_dict, name_var1, name_var2, list_var1, list_var2):
    f, axes = plt.subplots(len(list_var1), len(list_var2), sharey=True)

    for i in range(len(list_var1)):
        for j in range(len(list_var2)):
            axes[i][j].plot(list(range(args.epoch)), data_dict[list_var1[i], list_var2[j], 'train'][1][0], '-b')
            axes[i][j].plot(list(range(args.epoch)), data_dict[list_var1[i], list_var2[j], 'val'][1][0], '-r')
            
    for i in range(len(list_var1)):
        if len(list_var1) // 2 == i:
            axes[i][0].set_ylabel(name_var1 + "\n" + str(list_var1[i]))
        else:
            axes[i][0].set_ylabel(list_var1[i])
    for j in range(len(list_var2)):
        if len(list_var2) // 2 == j:
            axes[len(list_var1) - 1][j].set_xlabel(str(list_var2[i]) + "\n" + name_var2)
        else:
            axes[len(list_var1) - 1][j].set_xlabel(list_var2[j])
    return f, axes

### 6. Experiment

In [0]:
args.exp_name = "exp1"

name_var1 = "n_layer"
name_var2 = "hid_dim"
list_var1 = [1, 2]
list_var2 = [100, 200]

for var_list in product(list_var1, list_var2):
    setattr(args, name_var1, var_list[0])
    setattr(args, name_var2, var_list[1])
    setting, result = experiment(partition, deepcopy(args))
    save_exp_result(setting, result)

# download_local("exp1")
# loss_dict, acc_dict = data_to_dict(args.exp_name)
# f1, axes1 = dict_to_plot(loss_dict)
# f2, axes2 = dict_to_plot(acc_dict)