Dacon 월간 데이콘 1 반도체 박막 두께 분석
Overfit
2020년 02월 03일 (제출날짜)
모델링 코드 작성방법
A 코드 관련

1) 입상자는 코드 제출 필수. 제출 코드는 예측 결과를 리더보드 점수로 복원할 수 있어야 함

2) 코드 제출시 확장자가 R user는 R or .rmd. Python user는 .py or .ipynb

3) 코드에 ‘/data’ 데이터 입/출력 경로 포함 제출 or R의 경우 setwd(" "), python의 경우 os.chdir을 활용하여 경로 통일

4) 전체 프로세스를 일목요연하게 정리하여 주석을 포함하여 하나의 파일로 제출

5) 모든 코드는 오류 없이 실행되어야 함(라이브러리 로딩 코드 포함되어야 함).

6) 코드와 주석의 인코딩은 모두 UTF-8을 사용하여야 함

B 외부 데이터 관련

1) 외부 공공 데이터 (날씨 정보 등) 사용이 가능하나, 코드 제출 시 함께 제출

2) 공공 데이터 외의 외부 데이터는 법적인 제약이 없는 경우에만 사용 가능

3) 외부 데이터를 크롤링할 경우, 크롤링 코드도 함께 제출

1. 라이브러리 및 데이터
Library & Data

In [None]:
# 시스템 환경은 다음과 같음.

# OS : Ubuntu 18.04.3 LTS
# CPU : Intel(R) Xeon(R) CPU @ 2.30GHz (4 CPUs.)
# GPU : Nvidia Tesla T4 (1 GPU.)

# Python Version : 3.7.6
# Nvidia Driver Version : 440.33.01
# CUDA Version (PyTorch) : 10.1.243
# cuDNN Version (PyTorch) : 7.6.3

In [None]:
import time
import random
import numpy as np  # 1.18.1
from numpy.random import shuffle
import pandas as pd  # 0.25.3
import torch  # 1.4.0
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset
from torch.utils.data.sampler import Sampler, SequentialSampler
from torch.backends import cudnn

#실행 파일과 같은 directory에 'train.csv'와 'test.csv'를 포함하는 'Data' 폴더 필요함.

2. 데이터 전처리
Data Cleansing & Pre-Processing

In [None]:
# 주어진 데이터를 전부 활용하였으며 전처리 하지 않음.

3. 탐색적 자료분석
Exploratory Data Analysis


In [None]:
# pandas를 이용하여 correlation matrix를 계산하고 시각화함.
# (예측 결과를 복원하는 것과는 무관함으로 code는 생략함.)

4. 변수 선택 및 모델 구축
Feature Engineering & Initial Modeling

In [None]:
# fully-connected NN(layer 5개 미만)을 이용한 초기 실험에서 under-fitting되는 문제를 확인함.
# depth를 키운 fully-connected NN을 기본적으로 이용하였고 이를 ensemble하여 성능을 개선함.
# (예측 결과를 복원하는 것과는 무관함으로 code는 생략함.)

5. 모델 학습 및 검증
Model Tuning & Evaluation

In [None]:
# 학습 부분에 대한 code 설명을 다음과 같음.

# ContinuousBatchSampler는 mini-batch의 크기가 항상 일정하게 유지되도록 하는 BatchSampler임.
# 대회 목표와 같은 L1Loss를 이용함.
# SGD optimizer를 이용함.
# CosineAnnealingWarmRestarts를 이용하여 learning rate를 조절함.

In [None]:
# 학습 1

random.seed(0)
np.random.seed(0)
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False


class ContinuousBatchSampler(Sampler):
    def __init__(self, sampler, batch_size, drop_last):
        self.sampler = sampler
        self.batch_size = batch_size
        self.drop_last = drop_last
        self.from_last_epoch = []

    def __iter__(self):
        idx_from_sampler = set(self.sampler)
        idx_to_exclude = set(self.from_last_epoch)
        idx_after_exclusion = sorted(list(idx_from_sampler - idx_to_exclude))
        shuffle(idx_after_exclusion)
        first_batch = self.from_last_epoch + idx_after_exclusion[:self.batch_size - len(self.from_last_epoch)]
        yield first_batch
        idx_of_left = sorted(idx_after_exclusion[self.batch_size - len(self.from_last_epoch):] + list(idx_to_exclude))
        shuffle(idx_of_left)
        batch = []
        for idx in idx_of_left:
            batch.append(idx)
            if len(batch) == self.batch_size:
                yield batch
                batch = []
        if not self.drop_last:
            self.from_last_epoch = batch.copy()

    def __len__(self):
        if self.drop_last:
            return len(self.sampler) // self.batch_size
        else:
            return (len(self.sampler) + len(self.from_last_epoch)) // self.batch_size


device = torch.device('cuda:0')
num_epochs = 1000
batch_size = 2048
initial_learning_rate = 0.2
loader_params = {'num_workers': 8, 'pin_memory': True}

train_data = np.array(pd.read_csv('./Data/train.csv'), dtype=np.float32)
X_train = torch.tensor(train_data[:, 4:], dtype=torch.float32)
y_train = torch.tensor(train_data[:, :4], dtype=torch.float32)

dataset = TensorDataset(X_train, y_train)
train_loader = torch.utils.data.DataLoader(dataset, batch_sampler=ContinuousBatchSampler(
    sampler=SequentialSampler(range(len(dataset))), batch_size=batch_size, drop_last=False), **loader_params)

prediction = np.zeros((10000, 4), dtype=np.float32)
logging_term = 1000
logging_total = int(810000 * num_epochs / batch_size)

for model_no in ['Model_01', 'Model_02', 'Model_03', 'Model_04', 'Model_05',
                 'Model_06', 'Model_07', 'Model_08', 'Model_09', 'Model_10',
                 'Model_11', 'Model_12', 'Model_13', 'Model_14', 'Model_15',
                 'Model_16', 'Model_17', 'Model_18', 'Model_19', 'Model_20']:

    net = nn.Sequential(
        nn.BatchNorm1d(226),
        nn.ReLU(),
        nn.Linear(226, 768),
        nn.BatchNorm1d(768),
        nn.ReLU(),
        nn.Linear(768, 768),
        nn.BatchNorm1d(768),
        nn.ReLU(),
        nn.Linear(768, 768),
        nn.BatchNorm1d(768),
        nn.ReLU(),
        nn.Linear(768, 768),
        nn.BatchNorm1d(768),
        nn.ReLU(),
        nn.Linear(768, 768),
        nn.BatchNorm1d(768),
        nn.ReLU(),
        nn.Linear(768, 768),
        nn.BatchNorm1d(768),
        nn.ReLU(),
        nn.Linear(768, 4)
    )

    model = net.to(device)
    running_loss = 0.
    running_counter = 0

    criterion = torch.nn.L1Loss()
    optimizer = optim.SGD(model.parameters(), lr=initial_learning_rate, momentum=0.9)
    scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=250, T_mult=1, eta_min=0.005,
                                                               last_epoch=-1)

    for epoch in range(num_epochs):

        model.train()
        for xx, yy in train_loader:

            xx, yy = xx.to(device), yy.to(device)
            with torch.no_grad():
                xx += torch.randn((xx.shape[0], 226), device='cuda:0') * 0.003

            out = model(xx)
            loss = criterion(out, yy)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_counter += 1
            running_loss += loss.item()

            if running_counter % logging_term == 0:
                print(model_no + ' (iter {:6d}/{:6d}) {:.4f}'.format(running_counter, logging_total,
                                                                     running_loss / logging_term))
                running_loss = 0.

        scheduler.step()

    model.eval()
    output = model(
        torch.tensor(np.array(pd.read_csv('./Data/test.csv'), dtype=np.float32))[:, 1:].to(device))
    output = np.array(output.detach().to('cpu'), dtype=np.float32)
    prediction += output * 0.05

prediction = pd.DataFrame({'id': np.array(list(range(10000)), dtype=np.int32),
                           'layer_1': prediction[:, 0],
                           'layer_2': prediction[:, 1],
                           'layer_3': prediction[:, 2],
                           'layer_4': prediction[:, 3]})
prediction.to_csv('./prediction_result_{}.csv'.format(time.strftime('%y%m%d%H%M%S')), index=False)

In [None]:
# 학습 2

random.seed(0)
np.random.seed(0)
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False


class ContinuousBatchSampler(Sampler):
    def __init__(self, sampler, batch_size, drop_last):
        self.sampler = sampler
        self.batch_size = batch_size
        self.drop_last = drop_last
        self.from_last_epoch = []

    def __iter__(self):
        idx_from_sampler = set(self.sampler)
        idx_to_exclude = set(self.from_last_epoch)
        idx_after_exclusion = sorted(list(idx_from_sampler - idx_to_exclude))
        shuffle(idx_after_exclusion)
        first_batch = self.from_last_epoch + idx_after_exclusion[:self.batch_size - len(self.from_last_epoch)]
        yield first_batch
        idx_of_left = sorted(idx_after_exclusion[self.batch_size - len(self.from_last_epoch):] + list(idx_to_exclude))
        shuffle(idx_of_left)
        batch = []
        for idx in idx_of_left:
            batch.append(idx)
            if len(batch) == self.batch_size:
                yield batch
                batch = []
        if not self.drop_last:
            self.from_last_epoch = batch.copy()

    def __len__(self):
        if self.drop_last:
            return len(self.sampler) // self.batch_size
        else:
            return (len(self.sampler) + len(self.from_last_epoch)) // self.batch_size


device = torch.device('cuda:0')
num_epochs = 1000
batch_size = 2048
initial_learning_rate = 0.16
loader_params = {'num_workers': 8, 'pin_memory': True}

train_data = np.array(pd.read_csv('./Data/train.csv'), dtype=np.float32)
X_train = torch.tensor(train_data[:, 4:], dtype=torch.float32)
y_train = torch.tensor(train_data[:, :4], dtype=torch.float32)

dataset = TensorDataset(X_train, y_train)
train_loader = torch.utils.data.DataLoader(dataset, batch_sampler=ContinuousBatchSampler(
    sampler=SequentialSampler(range(len(dataset))), batch_size=batch_size, drop_last=False), **loader_params)

prediction = np.zeros((10000, 4), dtype=np.float32)
logging_term = 1000
logging_total = int(810000 * num_epochs / batch_size)

for model_no in ['Model_01', 'Model_02', 'Model_03', 'Model_04', 'Model_05', 'Model_06', 'Model_07']:

    net = nn.Sequential(
        nn.Linear(226, 768),
        nn.BatchNorm1d(768),
        nn.ReLU(),
        nn.Linear(768, 768),
        nn.BatchNorm1d(768),
        nn.ReLU(),
        nn.Linear(768, 768),
        nn.BatchNorm1d(768),
        nn.ReLU(),
        nn.Linear(768, 768),
        nn.BatchNorm1d(768),
        nn.ReLU(),
        nn.Linear(768, 768),
        nn.BatchNorm1d(768),
        nn.ReLU(),
        nn.Linear(768, 768),
        nn.BatchNorm1d(768),
        nn.ReLU(),
        nn.Linear(768, 768),
        nn.BatchNorm1d(768),
        nn.ReLU(),
        nn.Linear(768, 768),
        nn.BatchNorm1d(768),
        nn.ReLU(),
        nn.Linear(768, 4)
    )

    model = net.to(device)
    running_loss = 0.
    running_counter = 0

    criterion = torch.nn.L1Loss()
    optimizer = optim.SGD(model.parameters(), lr=initial_learning_rate, momentum=0.9, weight_decay=2e-6)
    scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=250, T_mult=1, eta_min=0.005,
                                                               last_epoch=-1)

    for epoch in range(num_epochs):

        model.train()
        for xx, yy in train_loader:

            xx, yy = xx.to(device), yy.to(device)
            with torch.no_grad():
                xx += torch.randn((xx.shape[0], 226), device='cuda:0') * 0.00325

            out = model(xx)
            loss = criterion(out, yy)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_counter += 1
            running_loss += loss.item()

            if running_counter % logging_term == 0:
                print(model_no + ' (iter {:6d}/{:6d}) {:.4f}'.format(running_counter, logging_total,
                                                                     running_loss / logging_term))
                running_loss = 0.

        scheduler.step()

    model.eval()
    output = model(
        torch.tensor(np.array(pd.read_csv('./Data/test.csv'), dtype=np.float32))[:, 1:].to(device))
    output = np.array(output.detach().to('cpu'), dtype=np.float32)
    prediction += output * 1 / 7

prediction = pd.DataFrame({'id': np.array(list(range(10000)), dtype=np.int32),
                           'layer_1': prediction[:, 0],
                           'layer_2': prediction[:, 1],
                           'layer_3': prediction[:, 2],
                           'layer_4': prediction[:, 3]})
prediction.to_csv('./prediction_result_{}.csv'.format(time.strftime('%y%m%d%H%M%S')), index=False)

In [None]:
# 학습 3

random.seed(1)
np.random.seed(1)
torch.manual_seed(1)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False


class ContinuousBatchSampler(Sampler):
    def __init__(self, sampler, batch_size, drop_last):
        self.sampler = sampler
        self.batch_size = batch_size
        self.drop_last = drop_last
        self.from_last_epoch = []

    def __iter__(self):
        idx_from_sampler = set(self.sampler)
        idx_to_exclude = set(self.from_last_epoch)
        idx_after_exclusion = sorted(list(idx_from_sampler - idx_to_exclude))
        shuffle(idx_after_exclusion)
        first_batch = self.from_last_epoch + idx_after_exclusion[:self.batch_size - len(self.from_last_epoch)]
        yield first_batch
        idx_of_left = sorted(idx_after_exclusion[self.batch_size - len(self.from_last_epoch):] + list(idx_to_exclude))
        shuffle(idx_of_left)
        batch = []
        for idx in idx_of_left:
            batch.append(idx)
            if len(batch) == self.batch_size:
                yield batch
                batch = []
        if not self.drop_last:
            self.from_last_epoch = batch.copy()

    def __len__(self):
        if self.drop_last:
            return len(self.sampler) // self.batch_size
        else:
            return (len(self.sampler) + len(self.from_last_epoch)) // self.batch_size


device = torch.device('cuda:0')
num_epochs = 1000
batch_size = 2048
initial_learning_rate = 0.16
loader_params = {'num_workers': 8, 'pin_memory': True}

train_data = np.array(pd.read_csv('./Data/train.csv'), dtype=np.float32)
X_train = torch.tensor(train_data[:, 4:], dtype=torch.float32)
y_train = torch.tensor(train_data[:, :4], dtype=torch.float32)

dataset = TensorDataset(X_train, y_train)
train_loader = torch.utils.data.DataLoader(dataset, batch_sampler=ContinuousBatchSampler(
    sampler=SequentialSampler(range(len(dataset))), batch_size=batch_size, drop_last=False), **loader_params)

prediction = np.zeros((10000, 4), dtype=np.float32)
logging_term = 1000
logging_total = int(810000 * num_epochs / batch_size)

for model_no in ['Model_01', 'Model_02', 'Model_03', 'Model_04', 'Model_05', 'Model_06', 'Model_07']:

    net = nn.Sequential(
        nn.Linear(226, 768),
        nn.BatchNorm1d(768),
        nn.ReLU(),
        nn.Linear(768, 768),
        nn.BatchNorm1d(768),
        nn.ReLU(),
        nn.Linear(768, 768),
        nn.BatchNorm1d(768),
        nn.ReLU(),
        nn.Linear(768, 768),
        nn.BatchNorm1d(768),
        nn.ReLU(),
        nn.Linear(768, 768),
        nn.BatchNorm1d(768),
        nn.ReLU(),
        nn.Linear(768, 768),
        nn.BatchNorm1d(768),
        nn.ReLU(),
        nn.Linear(768, 768),
        nn.BatchNorm1d(768),
        nn.ReLU(),
        nn.Linear(768, 768),
        nn.BatchNorm1d(768),
        nn.ReLU(),
        nn.Linear(768, 4)
    )

    model = net.to(device)
    running_loss = 0.
    running_counter = 0

    criterion = torch.nn.L1Loss()
    optimizer = optim.SGD(model.parameters(), lr=initial_learning_rate, momentum=0.9, weight_decay=2e-6)
    scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=250, T_mult=1, eta_min=0.005,
                                                               last_epoch=-1)

    for epoch in range(num_epochs):

        model.train()
        for xx, yy in train_loader:

            xx, yy = xx.to(device), yy.to(device)
            with torch.no_grad():
                xx += torch.randn((xx.shape[0], 226), device='cuda:0') * 0.00325

            out = model(xx)
            loss = criterion(out, yy)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_counter += 1
            running_loss += loss.item()

            if running_counter % logging_term == 0:
                print(model_no + ' (iter {:6d}/{:6d}) {:.4f}'.format(running_counter, logging_total,
                                                                     running_loss / logging_term))
                running_loss = 0.

        scheduler.step()

    model.eval()
    output = model(
        torch.tensor(np.array(pd.read_csv('./Data/test.csv'), dtype=np.float32))[:, 1:].to(device))
    output = np.array(output.detach().to('cpu'), dtype=np.float32)
    prediction += output * 1 / 7

prediction = pd.DataFrame({'id': np.array(list(range(10000)), dtype=np.int32),
                           'layer_1': prediction[:, 0],
                           'layer_2': prediction[:, 1],
                           'layer_3': prediction[:, 2],
                           'layer_4': prediction[:, 3]})
prediction.to_csv('./prediction_result_{}.csv'.format(time.strftime('%y%m%d%H%M%S')), index=False)

In [None]:
# 학습 4

random.seed(0)
np.random.seed(0)
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False


class ContinuousBatchSampler(Sampler):
    def __init__(self, sampler, batch_size, drop_last):
        self.sampler = sampler
        self.batch_size = batch_size
        self.drop_last = drop_last
        self.from_last_epoch = []

    def __iter__(self):
        idx_from_sampler = set(self.sampler)
        idx_to_exclude = set(self.from_last_epoch)
        idx_after_exclusion = sorted(list(idx_from_sampler - idx_to_exclude))
        shuffle(idx_after_exclusion)
        first_batch = self.from_last_epoch + idx_after_exclusion[:self.batch_size - len(self.from_last_epoch)]
        yield first_batch
        idx_of_left = sorted(idx_after_exclusion[self.batch_size - len(self.from_last_epoch):] + list(idx_to_exclude))
        shuffle(idx_of_left)
        batch = []
        for idx in idx_of_left:
            batch.append(idx)
            if len(batch) == self.batch_size:
                yield batch
                batch = []
        if not self.drop_last:
            self.from_last_epoch = batch.copy()

    def __len__(self):
        if self.drop_last:
            return len(self.sampler) // self.batch_size
        else:
            return (len(self.sampler) + len(self.from_last_epoch)) // self.batch_size


device = torch.device('cuda:0')
num_epochs = 1000
batch_size = 2048
initial_learning_rate = 0.16
loader_params = {'num_workers': 8, 'pin_memory': True}

train_data = np.array(pd.read_csv('./Data/train.csv'), dtype=np.float32)
X_train = torch.tensor(train_data[:, 4:], dtype=torch.float32)
y_train = torch.tensor(train_data[:, :4], dtype=torch.float32)

dataset = TensorDataset(X_train, y_train)
train_loader = torch.utils.data.DataLoader(dataset, batch_sampler=ContinuousBatchSampler(
    sampler=SequentialSampler(range(len(dataset))), batch_size=batch_size, drop_last=False), **loader_params)

prediction = np.zeros((10000, 4), dtype=np.float32)
logging_term = 1000
logging_total = int(810000 * num_epochs / batch_size)


class net(nn.Module):
    def __init__(self):
        super().__init__()
        self.avg = nn.AvgPool1d(kernel_size=3, stride=3, padding=1)
        self.linear = nn.Sequential(
            nn.Linear(76, 768),
            nn.BatchNorm1d(768),
            nn.ReLU(),
            nn.Linear(768, 768),
            nn.BatchNorm1d(768),
            nn.ReLU(),
            nn.Linear(768, 768),
            nn.BatchNorm1d(768),
            nn.ReLU(),
            nn.Linear(768, 768),
            nn.BatchNorm1d(768),
            nn.ReLU(),
            nn.Linear(768, 768),
            nn.BatchNorm1d(768),
            nn.ReLU(),
            nn.Linear(768, 768),
            nn.BatchNorm1d(768),
            nn.ReLU(),
            nn.Linear(768, 768),
            nn.BatchNorm1d(768),
            nn.ReLU(),
            nn.Linear(768, 768),
            nn.BatchNorm1d(768),
            nn.ReLU(),
            nn.Linear(768, 4)
        )

    def forward(self, x):
        x = x.view(x.shape[0], 1, -1)
        x = self.avg(x)
        x = x.view(x.shape[0], -1)
        x = self.linear(x)
        return x


for model_no in ['Model_01', 'Model_02', 'Model_03', 'Model_04', 'Model_05', 'Model_06', 'Model_07']:

    model = net().to(device)
    running_loss = 0.
    running_counter = 0

    criterion = torch.nn.L1Loss()
    optimizer = optim.SGD(model.parameters(), lr=initial_learning_rate, momentum=0.9, weight_decay=2e-6)
    scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=250, T_mult=1, eta_min=0.005,
                                                               last_epoch=-1)

    for epoch in range(num_epochs):

        model.train()
        for xx, yy in train_loader:

            xx, yy = xx.to(device), yy.to(device)
            with torch.no_grad():
                xx += torch.randn((xx.shape[0], 226), device='cuda:0') * 0.005

            out = model(xx)
            loss = criterion(out, yy)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            running_counter += 1
            running_loss += loss.item()

            if running_counter % logging_term == 0:
                print(model_no + ' (iter {:6d}/{:6d}) {:.4f}'.format(running_counter, logging_total,
                                                                     running_loss / logging_term))
                running_loss = 0.

        scheduler.step()

    model.eval()
    output = model(
        torch.tensor(np.array(pd.read_csv('./Data/test.csv'), dtype=np.float32))[:, 1:].to(device))
    output = np.array(output.detach().to('cpu'), dtype=np.float32)
    prediction += output * 1 / 7

prediction = pd.DataFrame({'id': np.array(list(range(10000)), dtype=np.int32),
                           'layer_1': prediction[:, 0],
                           'layer_2': prediction[:, 1],
                           'layer_3': prediction[:, 2],
                           'layer_4': prediction[:, 3]})
prediction.to_csv('./prediction_result_{}.csv'.format(time.strftime('%y%m%d%H%M%S')), index=False)

In [None]:
# 위의 4개의 학습 코드를 실행하면 4개의 임시 결과 파일이 생성됨.
# 이를 다시 아래의 코드로 ensemble하여 성능을 개선함.
# 아래의 코드로 생성된 결과를 최종 제출함.

file_list = np.sort(glob('./*.csv'))

file_0 = pd.read_csv(file_list[0]).to_numpy()[:, 1:]
file_1 = pd.read_csv(file_list[1]).to_numpy()[:, 1:]
file_2 = pd.read_csv(file_list[2]).to_numpy()[:, 1:]
file_3 = pd.read_csv(file_list[3]).to_numpy()[:, 1:]

prediction = (file_0 + file_1 + file_2 + file_3) / 4

prediction = pd.DataFrame({'id': np.array(list(range(10000)), dtype=np.int32),
                           'layer_1': prediction[:, 0],
                           'layer_2': prediction[:, 1],
                           'layer_3': prediction[:, 2],
                           'layer_4': prediction[:, 3]})
prediction.to_csv('../prediction_result_{}.csv'.format(time.strftime('%y%m%d%H%M%S')), index=False)

6. 결과 및 결언
Conclusion & Discussion

In [None]:
# 많은 수의 model을 학습하여 ensemble함으로 확률적(stochastically)으로 
# under-fitting 문제를 개선하고 안정적인 model을 학습할 수 있었음.

# 시간 문제로 많은 수의 실험을 진행하지는 못했지만 model의(depth, width 등)을
# 더 넓은 범위에서 찾으면 존재하는 under-fitting 문제를 해결할 수 있을 것으로 사료됨.
# 또한, 최근 연구되고 있는 AutoML 등을 기법을 이용하는 것을 고려할 수 있음.