# Ablation Study
Q&A 모델로 부터 나온 확률값 중 [CLS]토큰이 의미하는 것은 답이 없을 확률입니다. 따라서 Training데이터에서 답이 있을 경우 이 [CLS] 토큰의 확률이 0에 수렴하는지 테스트하고 Regularization이 필요한지 여부에 대해 생각해보겠습니다.

## 1. Load Config & Model & Tokenizer

In [2]:
import sys
## Root 파일(.py)을 Import 하기
sys.path.append("../")

In [37]:
import torch
import os
import logging
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from transformers.data.processors.squad import SquadExample, SquadResult
from transformers.data.processors.squad import SquadV1Processor, SquadV2Processor, squad_convert_examples_to_features
from tqdm import tqdm
import json
from korquad import korquadExample, korquad_convert_examples_to_features
from korquad_metrics import compute_predictions_logits
from kobert_transformers.tokenization_kobert import KoBertTokenizer
import time

from transformers import (
    AlbertConfig,
    AlbertForQuestionAnswering,
    AlbertTokenizer,
    BertConfig,
    BertForQuestionAnswering,
    BertTokenizer,
    DistilBertConfig,
    DistilBertForQuestionAnswering,
    DistilBertTokenizer,
    RobertaConfig,
    RobertaForQuestionAnswering,
    RobertaTokenizer,
    XLMConfig,
    XLMForQuestionAnswering,
    XLMTokenizer,
    XLNetConfig,
    XLNetForQuestionAnswering,
    XLNetTokenizer,
)
from tokenization_hanbert import HanBertTokenizer

MODEL_CLASSES = {
    "bert": (BertConfig, BertForQuestionAnswering, BertTokenizer),
    "roberta": (RobertaConfig, RobertaForQuestionAnswering, RobertaTokenizer),
    "xlnet": (XLNetConfig, XLNetForQuestionAnswering, XLNetTokenizer),
    "xlm": (XLMConfig, XLMForQuestionAnswering, XLMTokenizer),
    "distilbert": (DistilBertConfig, DistilBertForQuestionAnswering, DistilBertTokenizer),
    "albert": (AlbertConfig, AlbertForQuestionAnswering, AlbertTokenizer),
    "kobert": (BertConfig, BertForQuestionAnswering, KoBertTokenizer),
    "hanbert":(BertConfig, BertForQuestionAnswering, HanBertTokenizer),
    "distilkobert": (DistilBertConfig, DistilBertForQuestionAnswering, KoBertTokenizer),
}

QUESTION_ID = 'QA01'

In [7]:
model_name_or_path = '../aihub_model/bert'
device = torch.device("cuda")

In [10]:
## load config
## Training Setting
training_args = torch.load(os.path.join(model_name_or_path, 'training_args.bin'))
args = training_args

In [12]:
## Load model information
config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type]
config = config_class.from_pretrained(
    model_name_or_path,
    cache_dir=None,
)
tokenizer = tokenizer_class.from_pretrained(
    model_name_or_path,
    do_lower_case=args.do_lower_case,
    cache_dir=None,
)
model = model_class.from_pretrained(
    model_name_or_path,
    config=config,
    cache_dir=None,
)

In [13]:
model.to(device)
model.eval()

BertForQuestionAnswering(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(119547, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise

## 2. Load Dataset

In [26]:
data_path = '../resource/korquad1/train_sample.json'

In [34]:
processor = SquadV1Processor()

In [35]:
examples = processor.get_train_examples('../resource/korquad1', filename='train_sample.json')

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 1001.74it/s]


In [45]:
features, dataset = squad_convert_examples_to_features(
                    examples=examples,
                    tokenizer=tokenizer,
                    max_seq_length=args.max_seq_length,
                    doc_stride=args.doc_stride,
                    max_query_length=args.max_query_length,
                    is_training=not False,
                    return_dataset="pt",
                    threads=args.threads,
                            )

convert squad examples to features: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 71.50it/s]
add example index and unique id: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<?, ?it/s]


## 3. Run Model

In [80]:
from torch.nn import Softmax
def to_list(tensor):
    return tensor.detach().cpu().tolist()
softmax = Softmax(dim=1)

In [52]:
target = 0

In [53]:
inputs = {
    "input_ids" : dataset[target][0].unsqueeze(0).to(device),
    "attention_mask": dataset[target][1].unsqueeze(0).to(device),
    "token_type_ids": dataset[target][2].unsqueeze(0).to(device),
    }

In [55]:
## 결과값 도출
with torch.no_grad():
    outputs = model(**inputs)

In [83]:
start_logits = to_list(softmax(outputs[0]))
end_logits = to_list(softmax(outputs[1]))