# Install

In [None]:
! pip install attrdict

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting attrdict
  Downloading attrdict-2.0.1-py2.py3-none-any.whl (9.9 kB)
Installing collected packages: attrdict
Successfully installed attrdict-2.0.1


In [None]:
! pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.24.0-py3-none-any.whl (5.5 MB)
[K     |████████████████████████████████| 5.5 MB 4.8 MB/s 
[?25hCollecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.11.0-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 87.4 MB/s 
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 79.0 MB/s 
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.11.0 tokenizers-0.13.2 transformers-4.24.0


In [None]:
! pip install seqeval

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting seqeval
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[K     |████████████████████████████████| 43 kB 1.6 MB/s 
Building wheels for collected packages: seqeval
  Building wheel for seqeval (setup.py) ... [?25l[?25hdone
  Created wheel for seqeval: filename=seqeval-1.2.2-py3-none-any.whl size=16182 sha256=a17e85ef5c952776af83268033414b833c8bad32c3e9bb6ebc082736d7ee46a1
  Stored in directory: /root/.cache/pip/wheels/05/96/ee/7cac4e74f3b19e3158dce26a20a1c86b3533c43ec72a549fd7
Successfully built seqeval
Installing collected packages: seqeval
Successfully installed seqeval-1.2.2


# Libraries

In [1]:
from collections import defaultdict
import argparse
import json
import glob
import os
import random
import timeit
import numpy as np
import torch
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from torch.utils.data.distributed import DistributedSampler
from fastprogress.fastprogress import master_bar, progress_bar
from attrdict import AttrDict
from transformers import (
    AdamW,
    get_linear_schedule_with_warmup,
    squad_convert_examples_to_features
)
from transformers.data.metrics.squad_metrics import (
    compute_predictions_logits,
    squad_evaluate,
)
from transformers.data.processors.squad import SquadResult, SquadV1Processor, SquadV2Processor

def to_list(tensor):
  return tensor.detach().cpu().tolist()

# utils

In [2]:
import random
from numpy.lib.function_base import average

import torch
import numpy as np

from scipy.stats import pearsonr, spearmanr
from seqeval import metrics as seqeval_metrics
from sklearn import metrics as sklearn_metrics
from transformers import (
    AlbertConfig,
    DistilBertConfig,
    ElectraConfig,
    BertTokenizer,
    AlbertTokenizer,
    ElectraTokenizer,
    AlbertForSequenceClassification,
    DistilBertForSequenceClassification,
    ElectraForSequenceClassification,
    AlbertForTokenClassification,
    DistilBertForTokenClassification,
    ElectraForTokenClassification,
    AlbertForQuestionAnswering,
    DistilBertForQuestionAnswering,
    ElectraForQuestionAnswering,
)

CONFIG_CLASSES = {
    "albert": AlbertConfig,
    "distilkobert": DistilBertConfig,
    "koelectra-base": ElectraConfig,
    "koelectra-small": ElectraConfig,
    "koelectra-base-v2": ElectraConfig,
    "koelectra-base-v3": ElectraConfig,
    "koelectra-small-v2": ElectraConfig,
    "koelectra-small-v3": ElectraConfig,
}

TOKENIZER_CLASSES = {
    "albert": BertTokenizer,
    "distilkobert": BertTokenizer,
    "koelectra-base": ElectraTokenizer,
    "koelectra-small": ElectraTokenizer,
    "koelectra-base-v2": ElectraTokenizer,
    "koelectra-base-v3": ElectraTokenizer,
    "koelectra-small-v2": ElectraTokenizer,
    "koelectra-small-v3": ElectraTokenizer,
}

MODEL_FOR_SEQUENCE_CLASSIFICATION = {
    "albert": AlbertForSequenceClassification,
    "distilkobert": DistilBertForSequenceClassification,
    "koelectra-base": ElectraForSequenceClassification,
    "koelectra-small": ElectraForSequenceClassification,
    "koelectra-base-v2": ElectraForSequenceClassification,
    "koelectra-base-v3": ElectraForSequenceClassification,
    "koelectra-small-v2": ElectraForSequenceClassification,
    "koelectra-small-v3": ElectraForSequenceClassification,
}

MODEL_FOR_TOKEN_CLASSIFICATION = {
    "albert": AlbertForTokenClassification,
    "distilkobert": DistilBertForTokenClassification,
    "koelectra-base": ElectraForTokenClassification,
    "koelectra-small": ElectraForTokenClassification,
    "koelectra-base-v2": ElectraForTokenClassification,
    "koelectra-base-v3": ElectraForTokenClassification,
    "koelectra-small-v2": ElectraForTokenClassification,
    "koelectra-small-v3": ElectraForTokenClassification,
    "koelectra-small-v3-51000": ElectraForTokenClassification,
}

MODEL_FOR_QUESTION_ANSWERING = {
    "albert": AlbertForQuestionAnswering,
    "distilkobert": DistilBertForQuestionAnswering,
    "koelectra-base": ElectraForQuestionAnswering,
    "koelectra-small": ElectraForQuestionAnswering,
    "koelectra-base-v2": ElectraForQuestionAnswering,
    "koelectra-base-v3": ElectraForQuestionAnswering,
    "koelectra-small-v2": ElectraForQuestionAnswering,
    "koelectra-small-v3": ElectraForQuestionAnswering,
}


def set_seed(args):
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if not args.no_cuda and torch.cuda.is_available():
        torch.cuda.manual_seed_all(args.seed)


In [3]:
def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=False):
    # Load data features from cache or dataset file
    input_dir = args.data_dir if args.data_dir else "."
    cached_features_file = os.path.join(
        input_dir,
        "cached_{}_{}_{}".format(
            "dev" if evaluate else "train",
            list(filter(None, args.model_name_or_path.split("/"))).pop(),
            str(args.max_seq_length),
        ),
    )

    # Init features and dataset from cache if it exists
    if os.path.exists(cached_features_file):
        print("Loading features from cached file %s"% cached_features_file)
        features_and_dataset = torch.load(cached_features_file)
        features, dataset, examples = (
            features_and_dataset["features"],
            features_and_dataset["dataset"],
            features_and_dataset["examples"],
        )
    else:
        print("Creating features from dataset file at %s"%input_dir)

        if not args.data_dir and ((evaluate and not args.predict_file) or (not evaluate and not args.train_file)):
            try:
                import tensorflow_datasets as tfds
            except ImportError:
                raise ImportError("If data_dir is not specified, tensorflow_datasets needs to be installed.")

            if args.version_2_with_negative:
                print("tensorflow_datasets does not handle version 2 of SQuAD.")

            tfds_examples = tfds.load("squad")
            examples = SquadV1Processor().get_examples_from_dataset(tfds_examples, evaluate=evaluate)
        else:
            processor = SquadV2Processor() if args.version_2_with_negative else SquadV1Processor()
            if evaluate:
                examples = processor.get_dev_examples(os.path.join(args.data_dir, args.task),
                                                      filename=args.predict_file)
            else:
                examples = processor.get_train_examples(os.path.join(args.data_dir, args.task),
                                                        filename=args.train_file)

        features, dataset = squad_convert_examples_to_features(
            examples=examples,
            tokenizer=tokenizer,
            max_seq_length=args.max_seq_length,
            doc_stride=args.doc_stride,
            max_query_length=args.max_query_length,
            is_training=not evaluate,
            return_dataset="pt",
            threads=args.threads,
        )

        print("Saving features into cached file %s"% cached_features_file)
        torch.save({"features": features, "dataset": dataset, "examples": examples}, cached_features_file)

    if output_examples:
        return dataset, examples, features
    return dataset

# Sketchy Reading Module

## Train

In [4]:
def train(args, train_dataset, model, tokenizer):
  train_sampler = RandomSampler(train_dataset)
  train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size = args.train_batch_size)

  if args.max_steps > 0:
    t_total = args.max_steps
    args.num_train_epochs = args.max_steps // (len(train_dataloader) // args.gradient_accumulation_steps) + 1
  else:
    t_total = len(train_dataloader) // args.gradient_accumulation_steps * args.num_train_epochs

  no_decay = ["bias", "LayerNorm.weight"]
  optimizer_grouped_parameters = [
      {
          "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
          "weight_decay": args.weight_decay,
      },
      {"params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], "weight_decay": 0.0},
  ]
  optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon)
  scheduler = get_linear_schedule_with_warmup(
      optimizer, num_warmup_steps=int(t_total * args.warmup_proportion), num_training_steps=t_total
  )

  # Check if saved optimizer or scheduler states exist
  if os.path.isfile(os.path.join(args.model_name_or_path, "optimizer.pt")) and os.path.isfile(
          os.path.join(args.model_name_or_path, "scheduler.pt")
  ):
      # Load in optimizer and scheduler states
      optimizer.load_state_dict(torch.load(os.path.join(args.model_name_or_path, "optimizer.pt")))
      scheduler.load_state_dict(torch.load(os.path.join(args.model_name_or_path, "scheduler.pt")))

  # Train
  print("***** Running training *****")
  print("  Num examples = %d" % len(train_dataset))
  print("  Num Epochs = %d" % args.num_train_epochs)
  print("  Train batch size per GPU = %d"% args.train_batch_size)
  print("  Total train batch size (w. parallel, distributed & accumulation) = %d"% (args.train_batch_size * args.gradient_accumulation_steps))
  print("  Gradient Accumulation steps = %d"% args.gradient_accumulation_steps)
  print("  Total optimization steps = %d"% t_total)

  global_step = 1
  steps_trained_in_current_epoch = 0
  # Check if continuing training from a checkpoint
  if os.path.exists(args.model_name_or_path):
      try:
          # set global_step to global_step of last saved checkpoint from model path
          checkpoint_suffix = args.model_name_or_path.split("-")[-1].split("/")[0]
          global_step = int(checkpoint_suffix)
          epochs_trained = global_step // (len(train_dataloader) // args.gradient_accumulation_steps)
          steps_trained_in_current_epoch = global_step % (len(train_dataloader) // args.gradient_accumulation_steps)

          print("  Continuing training from checkpoint, will skip to saved global_step")
          print("  Continuing training from epoch %d"% epochs_trained)
          print("  Continuing training from global step %d"% global_step)
          print("  Will skip the first %d steps in the first epoch"% steps_trained_in_current_epoch)
      except ValueError:
          print("  Starting fine-tuning.")

  tr_loss, logging_loss = 0.0, 0.0
  model.zero_grad()
  mb = master_bar(range(int(args.num_train_epochs)))
  # Added here for reproductibility
  set_seed(args)

  for epoch in mb:
      epoch_iterator = progress_bar(train_dataloader, parent=mb)
      for step, batch in enumerate(epoch_iterator):
          # Skip past any already trained steps if resuming training
          if steps_trained_in_current_epoch > 0:
              steps_trained_in_current_epoch -= 1
              continue

          model.train()
          batch = tuple(t.to(args.device, dtype=torch.long) for t in batch)

          inputs = {
              "input_ids": batch[0],
              "attention_mask": batch[1],
              "token_type_ids": batch[2],
              "labels": batch[7],
          }


          outputs = model(**inputs)
          # model outputs are always tuple in transformers (see doc)
          loss = outputs[0]

          if args.gradient_accumulation_steps > 1:
              loss = loss / args.gradient_accumulation_steps

          loss.backward()

          tr_loss += loss.item()
          if (step + 1) % args.gradient_accumulation_steps == 0:
              torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm)

              optimizer.step()
              scheduler.step()  # Update learning rate schedule
              model.zero_grad()
              global_step += 1
              

              # Save model checkpoint
              if args.save_steps > 0 and global_step % args.save_steps == 0:
                  output_dir = os.path.join(args.output_dir, "checkpoint-{}".format(global_step))
                  if not os.path.exists(output_dir):
                      os.makedirs(output_dir)
                  # Take care of distributed/parallel training
                  model_to_save = model.module if hasattr(model, "module") else model
                  model_to_save.save_pretrained(output_dir)
                  tokenizer.save_pretrained(output_dir)

                  torch.save(args, os.path.join(output_dir, "training_args.bin"))
                  print("Saving model checkpoint to %s"% output_dir)

                  if args.save_optimizer:
                      torch.save(optimizer.state_dict(), os.path.join(output_dir, "optimizer.pt"))
                      torch.save(scheduler.state_dict(), os.path.join(output_dir, "scheduler.pt"))
                      print("Saving optimizer and scheduler states to %s"% output_dir)

          if args.max_steps > 0 and global_step > args.max_steps:
              break

      mb.write("Epoch {} done".format(epoch+1))

      if args.max_steps > 0 and global_step > args.max_steps:
          break

  return global_step, tr_loss / global_step

## Evaluate

In [5]:
from sklearn.metrics import f1_score
def evaluate(args, model, tokenizer, global_step=None):
    dataset, examples, features = load_and_cache_examples(args, tokenizer, evaluate=True, output_examples=True)

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    # Note that DistributedSampler samples randomly
    eval_sampler = SequentialSampler(dataset)
    eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=args.eval_batch_size)


    # Eval!
    print("***** Running evaluation {} *****".format(global_step))
    print("  Num examples = %d"% len(examples))
    print("  Num features = %d"% len(features))
    print("  Batch size = %d"% args.eval_batch_size)

    start_time = timeit.default_timer()

    results = {}
    num_id = 0
    preds = None
    out_label_ids = None
    key_map = {}
    cnt_map = {}

    id_map = [feature.qas_id for feature in features]
    # if args.write_qas_id:
    #     id_map = [feature.qas_id for feature in features]
    # else:
    #     id_map = [feature.example_index for feature in features]

    out_label_ids = [feature.is_impossible for feature in features]

    example_idx_to_label = defaultdict(int)

    for idx, example in enumerate(examples):
        example_idx_to_label[idx] = example.is_impossible

    for batch in progress_bar(eval_dataloader):
        model.eval()
        batch = tuple(t.to(args.device) for t in batch)

        with torch.no_grad():
            inputs = {'input_ids': batch[0],
                    'attention_mask': batch[1],
                    'token_type_ids': batch[2]
                    }

            logits = model(**inputs)
            logits = logits[0].detach().cpu().numpy()

        # batch내의 모든 예측값에 대해서
        for logit in logits:
            qas_id = id_map[num_id]
            
            if qas_id in key_map:
                logit_list = key_map[qas_id]
                logit_list[0] += logit[0]
                logit_list[1] += logit[1]
                cnt_map[qas_id] += 1

            else:
                cnt_map[qas_id] = 1
                key_map[qas_id] = [logit[0], logit[1]]
            
            num_id += 1
    
        if preds is None:
            preds = logits

        else:
            preds = np.append(preds, logits, axis=0)
    
    preds = np.argmax(preds, axis=1).tolist()



    evalTime = timeit.default_timer() - start_time
    print("  Evaluation done in total %f secs (%f sec per example)"% (evalTime, evalTime / len(dataset)))

    result = {"f1": f1_score(y_true=out_label_ids, y_pred=preds)}
    results.update(result)

    final_map = {}
    for idx, key in enumerate(key_map):
        key_list = key_map[key]
        key_list[0] = key_list[0] / cnt_map[key]
        key_list[1] = key_list[1] / cnt_map[key]
        final_map[key] = key_list[1] - key_list[0]

    with open(os.path.join(args.output_dir, "cls_score.json"), "w") as writer:
        writer.write(json.dumps(final_map, indent=4, ensure_ascii=False) + "\n")

    output_eval_file = os.path.join(args.output_dir, "eval_results.txt")
    with open(output_eval_file, "a") as writer:
        print("***** Eval results *****")
        writer.write("***** Eval results *****")
        for key in sorted(result.keys()):
            print("  %s = %s"% (key, str(result[key])))
            writer.write("%s = %s\n" % (key, str(result[key])))


    return results

## RUN

In [6]:
def main(cli_args):
    # Read from config file and make args
    with open(os.path.join(cli_args.config_dir, cli_args.task, cli_args.config_file)) as f:
        args = AttrDict(json.load(f))
    print("Training/evaluation parameters {}".format(args))

    args.output_dir = os.path.join(args.ckpt_dir, args.output_dir)

    if args.doc_stride >= args.max_seq_length - args.max_query_length:
        print(
            "WARNING - You've set a doc stride which may be superior to the document length in some "
            "examples. This could result in errors when building features from the examples. Please reduce the doc "
            "stride or increase the maximum length to ensure the features are correctly built."
        )
    set_seed(args)

    # Load pretrained model and tokenizer
    config = CONFIG_CLASSES[args.model_type].from_pretrained(
        args.model_name_or_path,
    )
    tokenizer = TOKENIZER_CLASSES[args.model_type].from_pretrained(
        args.model_name_or_path,
        do_lower_case=args.do_lower_case,
    )
    model = MODEL_FOR_SEQUENCE_CLASSIFICATION[args.model_type].from_pretrained(
        args.model_name_or_path,
        config=config,
    )
    # GPU or CPU
    args.device = "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu"
    model.to(args.device)

    print("Training/evaluation parameters %s"% args)

    # Training
    if args.do_train:
        train_dataset = load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=False)
        global_step, tr_loss = train(args, train_dataset, model, tokenizer)
        print(" global_step = %s, average loss = %s"% (global_step, tr_loss))

    # Evaluation - we can ask to evaluate all the checkpoints (sub-directories) in a directory
    results = {}

    if args.do_eval:
        checkpoints = list(
            os.path.dirname(c)
            for c in sorted(glob.glob("/content/drive/MyDrive/Colab Notebooks/KB/ckpt/SketchReadingModule" + "/**/" + "pytorch_model.bin", recursive=True))
        )

        print("Evaluate the following checkpoints: %s"% checkpoints)

        for checkpoint in checkpoints:
            # Reload the model
            global_step = checkpoint.split("-")[-1]
            model = MODEL_FOR_SEQUENCE_CLASSIFICATION["albert"].from_pretrained(checkpoint)
            model.to(args.device)
            args.output_dir = os.path.join(args.output_dir, checkpoint)
            result = evaluate(args, model, tokenizer, global_step=global_step)
            result = dict((k + ("_{}".format(global_step) if global_step else ""), v) for k, v in result.items())
            results.update(result)

        output_eval_file = os.path.join(args.output_dir, "eval_results_sketchy.txt")
        with open(output_eval_file, "w") as f_w:
            for key in sorted(results.keys()):
                f_w.write("{} = {}\n".format(key, str(results[key])))

In [None]:
import gc

gc.collect()

torch.cuda.empty_cache()

In [8]:
import easydict 
cli_args = easydict.EasyDict({
    "task": "news",
    "config_dir": "/content/drive/MyDrive/Colab Notebooks/KB/config",
    "config_file": "SketchReadingModule_train_evaluate.json"

})

main(cli_args)

Some weights of the model checkpoint at /content/drive/MyDrive/Colab Notebooks/KB/model/kb-albert-char-base-v2 were not used when initializing AlbertForSequenceClassification: ['predictions.decoder.weight', 'predictions.dense.bias', 'predictions.LayerNorm.bias', 'predictions.LayerNorm.weight', 'predictions.bias', 'predictions.decoder.bias', 'sop_classifier.classifier.weight', 'predictions.dense.weight', 'sop_classifier.classifier.bias']
- This IS expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForSequenceClassification were not initi

Training/evaluation parameters AttrDict({'task': 'news', 'data_dir': '/content/drive/MyDrive/Colab Notebooks/KB/data', 'ckpt_dir': '/content/drive/MyDrive/Colab Notebooks/KB/ckpt', 'train_file': 'TL.json', 'predict_file': 'VL.json', 'threads': 20, 'version_2_with_negative': True, 'null_score_diff_threshold': 0.0, 'max_seq_length': 512, 'doc_stride': 400, 'max_query_length': 100, 'max_answer_length': 100, 'n_best_size': 20, 'verbose_logging': True, 'overwrite_output_dir': True, 'evaluate_during_training': True, 'eval_all_checkpoints': True, 'select_checkpoint': 'checkpoint-51000', 'save_optimizer': False, 'do_lower_case': False, 'do_train': True, 'do_eval': True, 'n_gpu': 1, 'num_train_epochs': 3, 'weight_decay': 0.0, 'gradient_accumulation_steps': 1, 'adam_epsilon': 1e-08, 'warmup_proportion': 0, 'max_steps': -1, 'max_grad_norm': 1.0, 'no_cuda': False, 'model_type': 'albert', 'model_name_or_path': '/content/drive/MyDrive/Colab Notebooks/KB/model/kb-albert-char-base-v2', 'output_dir': '

100%|██████████| 46486/46486 [00:16<00:00, 2763.71it/s]
convert squad examples to features: 100%|██████████| 92980/92980 [01:28<00:00, 1046.06it/s]
add example index and unique id: 100%|██████████| 92980/92980 [00:00<00:00, 893279.98it/s]


Saving features into cached file /content/drive/MyDrive/Colab Notebooks/KB/data/cached_train_kb-albert-char-base-v2_512
***** Running training *****
  Num examples = 92980
  Num Epochs = 3
  Train batch size per GPU = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 17436
  Starting fine-tuning.


Saving model checkpoint to /content/drive/MyDrive/Colab Notebooks/KB/ckpt/SketchReadingModule/checkpoint-4000
Saving model checkpoint to /content/drive/MyDrive/Colab Notebooks/KB/ckpt/SketchReadingModule/checkpoint-8000
Saving model checkpoint to /content/drive/MyDrive/Colab Notebooks/KB/ckpt/SketchReadingModule/checkpoint-12000
Saving model checkpoint to /content/drive/MyDrive/Colab Notebooks/KB/ckpt/SketchReadingModule/checkpoint-16000
 global_step = 17437, average loss = 0.2082437890372223
Evaluate the following checkpoints: ['/content/drive/MyDrive/Colab Notebooks/KB/ckpt/SketchReadingModule/checkpoint-12000', '/content/drive/MyDrive/Colab Notebooks/KB/ckpt/SketchReadingModule/checkpoint-16000', '/content/drive/MyDrive/Colab Notebooks/KB/ckpt/SketchReadingModule/checkpoint-4000', '/content/drive/MyDrive/Colab Notebooks/KB/ckpt/SketchReadingModule/checkpoint-8000']
Creating features from dataset file at /content/drive/MyDrive/Colab Notebooks/KB/data


100%|██████████| 13519/13519 [00:05<00:00, 2465.45it/s]
convert squad examples to features: 100%|██████████| 27038/27038 [00:26<00:00, 1007.47it/s]
add example index and unique id: 100%|██████████| 27038/27038 [00:00<00:00, 775391.04it/s]


Saving features into cached file /content/drive/MyDrive/Colab Notebooks/KB/data/cached_dev_kb-albert-char-base-v2_512
***** Running evaluation 12000 *****
  Num examples = 27038
  Num features = 27038
  Batch size = 128


  Evaluation done in total 182.102888 secs (0.006735 sec per example)
***** Eval results *****
  f1 = 0.9310478271756698
Loading features from cached file /content/drive/MyDrive/Colab Notebooks/KB/data/cached_dev_kb-albert-char-base-v2_512
***** Running evaluation 16000 *****
  Num examples = 27038
  Num features = 27038
  Batch size = 128


  Evaluation done in total 180.606842 secs (0.006680 sec per example)
***** Eval results *****
  f1 = 0.9298181485881385
Loading features from cached file /content/drive/MyDrive/Colab Notebooks/KB/data/cached_dev_kb-albert-char-base-v2_512
***** Running evaluation 4000 *****
  Num examples = 27038
  Num features = 27038
  Batch size = 128


  Evaluation done in total 180.635217 secs (0.006681 sec per example)
***** Eval results *****
  f1 = 0.9118766830717802
Loading features from cached file /content/drive/MyDrive/Colab Notebooks/KB/data/cached_dev_kb-albert-char-base-v2_512
***** Running evaluation 8000 *****
  Num examples = 27038
  Num features = 27038
  Batch size = 128


  Evaluation done in total 180.593479 secs (0.006679 sec per example)
***** Eval results *****
  f1 = 0.9282514209294549


# Intensive Reading Module

## Train

In [9]:
def train(args, train_dataset, model, tokenizer):
    """ Train the model """
    train_sampler = RandomSampler(train_dataset)
    train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args.train_batch_size)

    if args.max_steps > 0:
        t_total = args.max_steps
        args.num_train_epochs = args.max_steps // (len(train_dataloader) // args.gradient_accumulation_steps) + 1
    else:
        t_total = len(train_dataloader) // args.gradient_accumulation_steps * args.num_train_epochs

    # Prepare optimizer and schedule (linear warmup and decay)
    no_decay = ["bias", "LayerNorm.weight"]
    optimizer_grouped_parameters = [
        {
            "params": [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)],
            "weight_decay": args.weight_decay,
        },
        {"params": [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], "weight_decay": 0.0},
    ]
    optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon)
    scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=int(t_total * args.warmup_proportion), num_training_steps=t_total
    )

    # Check if saved optimizer or scheduler states exist
    if os.path.isfile(os.path.join(args.model_name_or_path, "optimizer.pt")) and os.path.isfile(
            os.path.join(args.model_name_or_path, "scheduler.pt")
    ):
        # Load in optimizer and scheduler states
        optimizer.load_state_dict(torch.load(os.path.join(args.model_name_or_path, "optimizer.pt")))
        scheduler.load_state_dict(torch.load(os.path.join(args.model_name_or_path, "scheduler.pt")))

    # Train!
    print("***** Running training *****")
    print("  Num examples = %d"% len(train_dataset))
    print("  Num Epochs = %d"% args.num_train_epochs)
    print("  Train batch size per GPU = %d"% args.train_batch_size)
    print(
        "  Total train batch size (w. parallel, distributed & accumulation) = %d"% args.train_batch_size * args.gradient_accumulation_steps)
    print("  Gradient Accumulation steps = %d"% args.gradient_accumulation_steps)
    print("  Total optimization steps = %d"% t_total)

    global_step = 1
    epochs_trained = 0
    steps_trained_in_current_epoch = 0
    # Check if continuing training from a checkpoint
    if os.path.exists(args.model_name_or_path):
        try:
            # set global_step to gobal_step of last saved checkpoint from model path
            checkpoint_suffix = args.model_name_or_path.split("-")[-1].split("/")[0]
            global_step = int(checkpoint_suffix)
            epochs_trained = global_step // (len(train_dataloader) // args.gradient_accumulation_steps)
            steps_trained_in_current_epoch = global_step % (len(train_dataloader) // args.gradient_accumulation_steps)

            print("  Continuing training from checkpoint, will skip to saved global_step")
            print("  Continuing training from epoch %d"% epochs_trained)
            print("  Continuing training from global step %d"% global_step)
            print("  Will skip the first %d steps in the first epoch"% steps_trained_in_current_epoch)
        except ValueError:
            print("  Starting fine-tuning.")

    tr_loss, logging_loss = 0.0, 0.0
    model.zero_grad()
    mb = master_bar(range(int(args.num_train_epochs)))
    # Added here for reproductibility
    set_seed(args)

    for epoch in mb:
        epoch_iterator = progress_bar(train_dataloader, parent=mb)
        for step, batch in enumerate(epoch_iterator):
            # Skip past any already trained steps if resuming training
            if steps_trained_in_current_epoch > 0:
                steps_trained_in_current_epoch -= 1
                continue

            model.train()
            batch = tuple(t.to(args.device) for t in batch)

            inputs = {
                "input_ids": batch[0],
                "attention_mask": batch[1],
                "token_type_ids": batch[2],
                "start_positions": batch[3],
                "end_positions": batch[4],
            }

            if args.model_type in ["xlm", "roberta", "distilbert", "distilkobert", "xlm-roberta"]:
                del inputs["token_type_ids"]

            outputs = model(**inputs)
            # model outputs are always tuple in transformers (see doc)
            loss = outputs[0]

            if args.gradient_accumulation_steps > 1:
                loss = loss / args.gradient_accumulation_steps

            loss.backward()

            tr_loss += loss.item()
            if (step + 1) % args.gradient_accumulation_steps == 0:
                torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm)

                optimizer.step()
                scheduler.step()  # Update learning rate schedule
                model.zero_grad()
                global_step += 1

                # Save model checkpoint
                if args.save_steps > 0 and global_step % args.save_steps == 0:
                    output_dir = os.path.join(args.output_dir, "checkpoint-{}".format(global_step))
                    if not os.path.exists(output_dir):
                        os.makedirs(output_dir)
                    # Take care of distributed/parallel training
                    model_to_save = model.module if hasattr(model, "module") else model
                    model_to_save.save_pretrained(output_dir)
                    tokenizer.save_pretrained(output_dir)

                    torch.save(args, os.path.join(output_dir, "training_args.bin"))
                    print("Saving model checkpoint to %s"% output_dir)

                    if args.save_optimizer:
                        torch.save(optimizer.state_dict(), os.path.join(output_dir, "optimizer.pt"))
                        torch.save(scheduler.state_dict(), os.path.join(output_dir, "scheduler.pt"))
                        print("Saving optimizer and scheduler states to %s"% output_dir)

            if args.max_steps > 0 and global_step > args.max_steps:
                break

        mb.write("Epoch {} done".format(epoch+1))

        if args.max_steps > 0 and global_step > args.max_steps:
            break

    return global_step, tr_loss / global_step

## Evaluate

In [10]:
def evaluate(args, model, tokenizer, global_step=None):
    dataset, examples, features = load_and_cache_examples(args, tokenizer, evaluate=True, output_examples=True)

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    # Note that DistributedSampler samples randomly
    eval_sampler = SequentialSampler(dataset)
    eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=args.eval_batch_size)

    # Eval!
    print("***** Running evaluation {} *****".format(global_step))
    print("  Num examples = %d"% len(examples))
    print("  Num features = %d"% len(features))
    print("  Batch size = %d"% args.eval_batch_size)

    all_results = []
    start_time = timeit.default_timer()

    for batch in progress_bar(eval_dataloader):
        model.eval()
        batch = tuple(t.to(args.device) for t in batch)

        with torch.no_grad():
            inputs = {
                "input_ids": batch[0],
                "attention_mask": batch[1],
                "token_type_ids": batch[2],
            }

            if args.model_type in ["xlm", "roberta", "distilbert", "distilkobert", "xlm-roberta"]:
                del inputs["token_type_ids"]

            example_indices = batch[3]

            outputs = model(**inputs)

        for i, example_index in enumerate(example_indices):
            eval_feature = features[example_index.item()]
            unique_id = int(eval_feature.unique_id)

            output = [to_list(output[i]) for output in outputs.values()]

            start_logits, end_logits = output
            result = SquadResult(unique_id, start_logits, end_logits)

            all_results.append(result)

    evalTime = timeit.default_timer() - start_time
    print("  Evaluation done in total %f secs (%f sec per example)"% (evalTime, evalTime / len(dataset)))

    # Compute predictions
    output_prediction_file = os.path.join(args.output_dir, "predictions_{}.json".format(global_step))
    output_nbest_file = os.path.join(args.output_dir, "nbest_predictions_{}.json".format(global_step))

    if args.version_2_with_negative:
        output_null_log_odds_file = os.path.join(args.output_dir, "null_odds_{}.json".format(global_step))
    else:
        output_null_log_odds_file = None

    predictions = compute_predictions_logits(
        examples,
        features,
        all_results,
        args.n_best_size,
        args.max_answer_length,
        args.do_lower_case,
        output_prediction_file,
        output_nbest_file,
        output_null_log_odds_file,
        args.verbose_logging,
        args.version_2_with_negative,
        args.null_score_diff_threshold,
        tokenizer,
    )

    # Compute the F1 and exact scores.
    results = squad_evaluate(examples, predictions)
    # Write the result
    # Write the evaluation result on file
    output_dir = os.path.join(args.output_dir, 'eval')
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    output_eval_file = os.path.join(output_dir, "eval_result_{}_{}.txt".format(list(filter(None, args.model_name_or_path.split("/"))).pop(),
                                                                               global_step))

    with open(output_eval_file, "w", encoding='utf-8') as f:
        json.dump(results, f, ensure_ascii=False)

    return results

## Run

In [11]:
def main(cli_args):
    # Read from config file and make args
    with open(os.path.join(cli_args.config_dir, cli_args.task, cli_args.config_file)) as f:
        args = AttrDict(json.load(f))
    print("Training/evaluation parameters {}".format(args))

    args.output_dir = os.path.join(args.ckpt_dir, args.output_dir)

    if args.doc_stride >= args.max_seq_length - args.max_query_length:
        print(
            "WARNING - You've set a doc stride which may be superior to the document length in some "
            "examples. This could result in errors when building features from the examples. Please reduce the doc "
            "stride or increase the maximum length to ensure the features are correctly built."
        )

    set_seed(args)

  
    # Load pretrained model and tokenizer
    config = CONFIG_CLASSES[args.model_type].from_pretrained(
        args.model_name_or_path,
    )
    tokenizer = TOKENIZER_CLASSES[args.model_type].from_pretrained(
        args.model_name_or_path,
        do_lower_case=args.do_lower_case,
    )
    model = MODEL_FOR_QUESTION_ANSWERING[args.model_type].from_pretrained(
        args.model_name_or_path,
        config=config,
    )
    # GPU or CPU
    args.device = "cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu"
    model.to(args.device)

    print("Training/evaluation parameters %s"% args)

    # Training
    if args.do_train:
        train_dataset = load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=False)
        global_step, tr_loss = train(args, train_dataset, model, tokenizer)
        print(" global_step = %s, average loss = %s"% (global_step, tr_loss))

    # Evaluation - we can ask to evaluate all the checkpoints (sub-directories) in a directory
    results = {}
    if args.do_eval:
        checkpoints = list(
            os.path.dirname(c)
            for c in sorted(glob.glob(args.output_dir + "/**/" + "pytorch_model.bin", recursive=True))
        )
        if not args.eval_all_checkpoints:
            checkpoints = [checkpoint for checkpoint in checkpoints if checkpoint.find(args.select_checkpoint) != -1]

        print("Evaluate the following checkpoints: %s"% checkpoints)

        for checkpoint in checkpoints:
            # Reload the model
            global_step = checkpoint.split("-")[-1]
            model = MODEL_FOR_QUESTION_ANSWERING[args.model_type].from_pretrained(checkpoint)
            model.to(args.device)
            args.output_dir = os.path.join(args.output_dir, checkpoint)
            result = evaluate(args, model, tokenizer, global_step=global_step)
            result = dict((k + ("_{}".format(global_step) if global_step else ""), v) for k, v in result.items())
            results.update(result)

        output_eval_file = os.path.join(args.output_dir, "eval_results.txt")
        with open(output_eval_file, "w") as f_w:
            for key in sorted(results.keys()):
                f_w.write("{} = {}\n".format(key, str(results[key])))

In [None]:
import gc

gc.collect()

torch.cuda.empty_cache()

In [21]:
import easydict 
cli_args = easydict.EasyDict({
    "task": "news",
    "config_dir": "/content/drive/MyDrive/Colab Notebooks/KB/config",
    "config_file": "IntensiveReadingModule_train_evaluate.json"

})

main(cli_args)

Some weights of the model checkpoint at /content/drive/MyDrive/Colab Notebooks/KB/model/kb-albert-char-base-v2 were not used when initializing AlbertForQuestionAnswering: ['predictions.decoder.weight', 'predictions.dense.bias', 'predictions.LayerNorm.weight', 'predictions.LayerNorm.bias', 'predictions.bias', 'predictions.decoder.bias', 'sop_classifier.classifier.weight', 'predictions.dense.weight', 'sop_classifier.classifier.bias']
- This IS expected if you are initializing AlbertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing AlbertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of AlbertForQuestionAnswering were not initialized from the mode

Training/evaluation parameters AttrDict({'task': 'news', 'data_dir': '/content/drive/MyDrive/Colab Notebooks/KB/data', 'ckpt_dir': '/content/drive/MyDrive/Colab Notebooks/KB/ckpt', 'train_file': 'TL.json', 'predict_file': 'VL.json', 'threads': 20, 'version_2_with_negative': True, 'null_score_diff_threshold': 0.2, 'max_seq_length': 512, 'doc_stride': 400, 'max_query_length': 100, 'max_answer_length': 100, 'n_best_size': 20, 'verbose_logging': True, 'overwrite_output_dir': True, 'evaluate_during_training': True, 'eval_all_checkpoints': True, 'select_checkpoint': 'checkpoint-51000', 'save_optimizer': False, 'do_lower_case': False, 'do_train': False, 'do_eval': True, 'n_gpu': 1, 'num_train_epochs': 3, 'weight_decay': 0.0, 'gradient_accumulation_steps': 1, 'adam_epsilon': 1e-08, 'warmup_proportion': 0, 'max_steps': -1, 'max_grad_norm': 1.0, 'no_cuda': False, 'model_type': 'albert', 'model_name_or_path': '/content/drive/MyDrive/Colab Notebooks/KB/model/kb-albert-char-base-v2', 'output_dir': 

  Evaluation done in total 185.482622 secs (0.006860 sec per example)
Loading features from cached file /content/drive/MyDrive/Colab Notebooks/KB/data/cached_dev_kb-albert-char-base-v2_512
***** Running evaluation 17436 *****
  Num examples = 27038
  Num features = 27038
  Batch size = 128


  Evaluation done in total 185.249944 secs (0.006851 sec per example)
Loading features from cached file /content/drive/MyDrive/Colab Notebooks/KB/data/cached_dev_kb-albert-char-base-v2_512
***** Running evaluation 5812 *****
  Num examples = 27038
  Num features = 27038
  Batch size = 128


  Evaluation done in total 185.305591 secs (0.006854 sec per example)


# Inference

In [22]:
import collections

def get_score1(args):
  tokenizer = TOKENIZER_CLASSES[args.model_type].from_pretrained(
      "/content/drive/MyDrive/Colab Notebooks/KB/ckpt/IntensiveReadingModule/checkpoint-11624",
      do_lower_case=False,
  )
  dataset, examples, features = load_and_cache_examples(args, tokenizer, evaluate=True, output_examples=True)
  cof = [1, 1]
  best_cof = [1]
  all_scores = collections.OrderedDict()
  idx = 0
  for input_file in args.input_null_files.split(","):
    with open(input_file, 'r') as reader:
      input_data = json.load(reader, strict=False)
      for (key, score) in input_data.items():
        if key not in all_scores:
          all_scores[key] = []
        all_scores[key].append(cof[idx] * score)
    idx += 1
  output_scores = {}
  for (key, scores) in all_scores.items():
    mean_score = 0.0
    for score in scores:
      mean_score += score
    mean_score /= float(len(scores))
    output_scores[key] = mean_score

  all_nbest = collections.OrderedDict()

  with open(args.input_nbest_files, 'r') as reader:
    input_data = json.load(reader, strict=False)
    for (key, entries) in input_data.items():
      if key not in all_nbest:
        all_nbest[key] = collections.defaultdict(float)
      for entry in entries:
        all_nbest[key][entry["text"]] += best_cof[0] * entry['probability']
  output_predictions = {}
  for (key, entry_map) in all_nbest.items():
    # sorted_texts = sorted(
    #     entry_map.keys(), key=lambda x: entry[x], reverse=True)
    best_text = list(entry_map.keys())[0]
    output_predictions[key] = best_text
  
  best_th = args.thresh

  for qid in output_predictions.keys():
    if output_scores[qid] > best_th:
      output_predictions[qid] = ""
  

  output_prediction_file = "/content/drive/MyDrive/Colab Notebooks/KB/final_predictions.json"
  with open(output_prediction_file, "w") as writer:
    writer.write(json.dumps(output_predictions, indent=4, ensure_ascii=False) + "\n")

  for example in examples:
    example.qas_id = str(example.qas_id)
  results = squad_evaluate(examples, output_predictions)

  output_eval_file = "/content/drive/MyDrive/Colab Notebooks/KB/final_results.json"

  with open(output_eval_file, "w", encoding='utf-8') as f:
      json.dump(results, f, ensure_ascii=False)

In [23]:
import easydict 
args = easydict.EasyDict({
    'input_null_files': "/content/drive/MyDrive/Colab Notebooks/KB/ckpt/SketchReadingModule/checkpoint-12000/cls_score.json,/content/drive/MyDrive/Colab Notebooks/KB/ckpt/IntensiveReadingModule/checkpoint-11624/null_odds_11624.json",
    'input_nbest_files': "/content/drive/MyDrive/Colab Notebooks/KB/ckpt/IntensiveReadingModule/checkpoint-11624/nbest_predictions_11624.json",
    'thresh': 0.2,
    "data_dir": "/content/drive/MyDrive/Colab Notebooks/KB/data",
    "model_name_or_path": "/content/drive/MyDrive/Colab Notebooks/KB/model/kb-albert-char-base-v2",
    "max_seq_length": 512,
    "model_type": "albert"
})

get_score1(args)

Loading features from cached file /content/drive/MyDrive/Colab Notebooks/KB/data/cached_dev_kb-albert-char-base-v2_512


# DEPLOY

## Install

In [None]:
! pip install ratsnlp

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting ratsnlp
  Downloading ratsnlp-1.0.52-py3-none-any.whl (42 kB)
[K     |████████████████████████████████| 42 kB 978 kB/s 
[?25hCollecting flask-cors>=3.0.10
  Downloading Flask_Cors-3.0.10-py2.py3-none-any.whl (14 kB)
Collecting transformers==4.10.0
  Downloading transformers-4.10.0-py3-none-any.whl (2.8 MB)
[K     |████████████████████████████████| 2.8 MB 8.4 MB/s 
[?25hCollecting flask-ngrok>=0.0.25
  Downloading flask_ngrok-0.0.25-py3-none-any.whl (3.1 kB)
Collecting pytorch-lightning==1.6.1
  Downloading pytorch_lightning-1.6.1-py3-none-any.whl (582 kB)
[K     |████████████████████████████████| 582 kB 82.8 MB/s 
[?25hCollecting Korpora>=0.2.0
  Downloading Korpora-0.2.0-py3-none-any.whl (57 kB)
[K     |████████████████████████████████| 57 kB 6.8 MB/s 
Collecting pyDeprecate<0.4.0,>=0.3.1
  Downloading pyDeprecate-0.3.2-py3-none-any.whl (10 kB)
Collecting torchmetrics>=

In [None]:
def _is_whitespace(c):
    if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F:
        return True
    return False
    
class SquadExample:
    """
    A single training/test example for the Squad dataset, as loaded from disk.

    Args:
        qas_id: The example's unique identifier
        question_text: The question string
        context_text: The context string
        answer_text: The answer string
        start_position_character: The character position of the start of the answer
        title: The title of the example
        answers: None by default, this is used during evaluation. Holds answers as well as their start positions.
        is_impossible: False by default, set to True if the example has no possible answer.
    """

    def __init__(
        self,
        qas_id,
        question_text,
        context_text,
        answer_text,
        start_position_character,
        title,
        answers=[],
        is_impossible=False,
    ):
        self.qas_id = qas_id
        self.question_text = question_text
        self.context_text = context_text
        self.answer_text = answer_text
        self.title = title
        self.is_impossible = is_impossible
        self.answers = answers

        self.start_position, self.end_position = 0, 0

        doc_tokens = []
        char_to_word_offset = []
        prev_is_whitespace = True

        # Split on whitespace so that different tokens may be attributed to their original position.
        for c in self.context_text:
            if _is_whitespace(c):
                prev_is_whitespace = True
            else:
                if prev_is_whitespace:
                    doc_tokens.append(c)
                else:
                    doc_tokens[-1] += c
                prev_is_whitespace = False
            char_to_word_offset.append(len(doc_tokens) - 1)

        self.doc_tokens = doc_tokens
        self.char_to_word_offset = char_to_word_offset

        # Start and end positions only has a value during evaluation.
        if start_position_character is not None and not is_impossible:
            self.start_position = char_to_word_offset[start_position_character]
            self.end_position = char_to_word_offset[
                min(start_position_character + len(answer_text) - 1, len(char_to_word_offset) - 1)
            ]

## Inference

In [None]:
import easydict
import collections
def inference(question, context):
  args = easydict.EasyDict({
    'input_null_files': "/content/drive/MyDrive/Colab Notebooks/KB/inference/cls_score.json,/content/drive/MyDrive/Colab Notebooks/KB/inference/null_odds.json",
    'input_nbest_files':  "/content/drive/MyDrive/Colab Notebooks/KB/inference/nbest_predictions.json",
    'thresh': 0,
    "task": "news",
    "threads": 20,
    "version_2_with_negative": True,
    "null_score_diff_threshold": 0.0,
    "max_seq_length": 512,
    "doc_stride": 400,
    "max_query_length": 100,
    "max_answer_length": 100,
    "n_best_size": 20,
    "verbose_logging": True,
    "overwrite_output_dir": True,
    "evaluate_during_training": True,
    "eval_all_checkpoints": True,
    "save_optimizer": False,
    "do_lower_case": False,
    "n_gpu": 1,
    "adam_epsilon": 1e-8,
    "warmup_proportion": 0,
    "max_steps": -1,
    "max_grad_norm": 1.0,
    "no_cuda": False,
    "model_type": "albert",
    "model_name_or_path": "/content/drive/MyDrive/Colab Notebooks/KB/model/kb-albert-char-base-v2",
    "output_dir": "/content/drive/MyDrive/Colab Notebooks/KB/inference",
    "seed": 42,
    "train_batch_size": 16,
    "eval_batch_size": 128,
    "logging_steps": 4000,
    "save_steps": 4000,
    "learning_rate": 5e-5,    
    })
  args.device = "cpu"
  processor = SquadV2Processor()
  tokenizer = TOKENIZER_CLASSES[args.model_type].from_pretrained("/content/drive/MyDrive/Colab Notebooks/KB/ckpt/IntensiveReadingModule/checkpoint-11624"
                                                                 , do_lower_case=args.do_lower_case,
      )

  examples = []
  title = "title"
  context_text = context
  qas_id = 0
  question_text = question
  start_position_character = None
  answer_text = None
  answers = [{
      "text": "",
      "answer_start": 0,
      "clue_start": 0,
      "clue_text": 0,
      "options": 0
      }]
  is_impossible = False

  example = SquadExample(
      qas_id=qas_id,
      question_text=question_text,
      context_text=context_text,
      answer_text=answer_text,
      start_position_character=start_position_character,
      title=title,
      is_impossible=is_impossible,
      answers=answers,
  )
  examples.append(example)

  features, dataset = squad_convert_examples_to_features(
      examples=examples,
      tokenizer=tokenizer,
      max_seq_length=args.max_seq_length,
      doc_stride=args.doc_stride,
      max_query_length=args.max_query_length,
      is_training=False,
      return_dataset="pt",
      threads=args.threads,
  )

  ## SKETCHY MODULE
  config = CONFIG_CLASSES[args.model_type].from_pretrained("/content/drive/MyDrive/Colab Notebooks/KB/ckpt/SketchReadingModule/checkpoint-12000")

  model = MODEL_FOR_SEQUENCE_CLASSIFICATION[args.model_type].from_pretrained("/content/drive/MyDrive/Colab Notebooks/KB/ckpt/SketchReadingModule/checkpoint-12000", config = config)

  eval_sampler = SequentialSampler(dataset)
  eval_dataloader = DataLoader(dataset, sampler=eval_sampler, batch_size=1)


  # Inference
  print("***** Running Sketchy *****")

  num_id = 0
  preds = None
  key_map = {}
  cnt_map = {}

  id_map = [feature.qas_id for feature in features]


  for batch in progress_bar(eval_dataloader):
      model.eval()
      batch = tuple(t.to(args.device) for t in batch)

      with torch.no_grad():
          inputs = {'input_ids': batch[0],
                  'attention_mask': batch[1],
                  'token_type_ids': batch[2]
                  }

          logits = model(**inputs)
          logits = logits[0].detach().cpu().numpy()

      # batch내의 모든 예측값에 대해서
      for logit in logits:
          qas_id = id_map[num_id]
          
          if qas_id in key_map:
              logit_list = key_map[qas_id]
              logit_list[0] += logit[0]
              logit_list[1] += logit[1]
              cnt_map[qas_id] += 1

          else:
              cnt_map[qas_id] = 1
              key_map[qas_id] = [logit[0], logit[1]]
          
          num_id += 1


  final_map = {}
  for idx, key in enumerate(key_map):
      key_list = key_map[key]
      key_list[0] = key_list[0] / cnt_map[key]
      key_list[1] = key_list[1] / cnt_map[key]
      final_map[key] = key_list[1] - key_list[0]

  with open(os.path.join(args.output_dir, "cls_score.json"), "w") as writer:
      writer.write(json.dumps(final_map, indent=4, ensure_ascii=False) + "\n")

  ## INTENSIVE MODULE

  config = CONFIG_CLASSES[args.model_type].from_pretrained("/content/drive/MyDrive/Colab Notebooks/KB/ckpt/IntensiveReadingModule/checkpoint-11624")
  model = MODEL_FOR_QUESTION_ANSWERING[args.model_type].from_pretrained("/content/drive/MyDrive/Colab Notebooks/KB/ckpt/IntensiveReadingModule/checkpoint-11624", config=config)


  # Eval!
  print("***** Running INTENSIVE *****")

  all_results = []

  for batch in progress_bar(eval_dataloader):
      model.eval()
      batch = tuple(t.to(args.device) for t in batch)

      with torch.no_grad():
          inputs = {
              "input_ids": batch[0],
              "attention_mask": batch[1],
              "token_type_ids": batch[2],
          }

          if args.model_type in ["xlm", "roberta", "distilbert", "distilkobert", "xlm-roberta"]:
              del inputs["token_type_ids"]

          example_indices = batch[3]

          outputs = model(**inputs)

      for i, example_index in enumerate(example_indices):
          eval_feature = features[example_index.item()]
          unique_id = int(eval_feature.unique_id)

          output = [to_list(output[i]) for output in outputs.values()]

          start_logits, end_logits = output
          result = SquadResult(unique_id, start_logits, end_logits)

          all_results.append(result)

  # Compute predictions
  output_prediction_file = os.path.join(args.output_dir, "predictions.json")
  output_nbest_file = os.path.join(args.output_dir, "nbest_predictions.json")

  output_null_log_odds_file = os.path.join(args.output_dir, "null_odds.json")

  predictions = compute_predictions_logits(
      examples,
      features,
      all_results,
      args.n_best_size,
      args.max_answer_length,
      args.do_lower_case,
      output_prediction_file,
      output_nbest_file,
      output_null_log_odds_file,
      args.verbose_logging,
      args.version_2_with_negative,
      args.null_score_diff_threshold,
      tokenizer,
  )


  # REAR VERIFICATION

  cof = [1, 1]
  best_cof = [1]
  all_scores = collections.OrderedDict()
  idx = 0
  for input_file in args.input_null_files.split(","):
    with open(input_file, 'r') as reader:
      input_data = json.load(reader, strict=False)
      for (key, score) in input_data.items():
        if key not in all_scores:
          all_scores[key] = []
        all_scores[key].append(cof[idx] * score)
    idx += 1
  output_scores = {}
  for (key, scores) in all_scores.items():
    mean_score = 0.0
    for score in scores:
      mean_score += score
    mean_score /= float(len(scores))
    output_scores[key] = mean_score

  all_nbest = collections.OrderedDict()

  with open(args.input_nbest_files, 'r') as reader:
    input_data = json.load(reader, strict=False)
    for (key, entries) in input_data.items():
      if key not in all_nbest:
        all_nbest[key] = collections.defaultdict(float)
      for entry in entries:
        all_nbest[key][entry["text"]] += best_cof[0] * entry['probability']
  output_predictions = {}
  for (key, entry_map) in all_nbest.items():
    best_text = list(entry_map.keys())[0]
    output_predictions[key] = best_text
  
  best_th = args.thresh

  for qid in output_predictions.keys():
    if output_scores[qid] > best_th:
      output_predictions[qid] = ""
  answer = "".join(output_predictions.values())
  return {
      'question': question,
      'context': context,
      'answer': answer
  }

## test

In [None]:
question = '서로의 채무와 채권을 같은 액수만큼 소멸시키는 것을 무엇이라 하나요'
context = "상계란 채무자와 은행이 서로에 대해 금전 채무와 채권을 가지는 경우에, 일방적 의사표시로  서로의 채무와 채권을 같은 액수만큼 소멸시키는 것을 말합니다. 은행은 대출 등 채무의 변제기가 도래하였거나 채무자가 기한의 이익을 상실한 경우, 채무자의 대출 등 그 채무와 채무자의 은행에 대한 예금 기타의 채권을 그채권의 기한이 도래하지 않았어도 서면통지에 의하여 상계할 수 있습니다."
inference(question, context)

convert squad examples to features: 100%|██████████| 1/1 [00:00<00:00, 210.72it/s]
add example index and unique id: 100%|██████████| 1/1 [00:00<00:00, 12865.96it/s]


***** Running Sketchy *****


***** Running INTENSIVE *****


{'question': '서로의 채무와 채권을 같은 액수만큼 소멸시키는 것을 무엇이라 하나요',
 'context': '상계란 채무자와 은행이 서로에 대해 금전 채무와 채권을 가지는 경우에, 일방적 의사표시로  서로의 채무와 채권을 같은 액수만큼 소멸시키는 것을 말합니다. 은행은 대출 등 채무의 변제기가 도래하였거나 채무자가 기한의 이익을 상실한 경우, 채무자의 대출 등 그 채무와 채무자의 은행에 대한 예금 기타의 채권을 그채권의 기한이 도래하지 않았어도 서면통지에 의하여 상계할 수 있습니다.',
 'answer': '상계'}

## WEB

In [None]:
!mkdir /root/.ngrok2 && echo "authtoken: 2HgLSGNA1ZMv6lFdhE3Wvt92Qts_QDnBDxTkFsMVe1DEigU" > /root/.ngrok2/ngrok.yml

mkdir: cannot create directory ‘/root/.ngrok2’: File exists


In [None]:
from ratsnlp.nlpbook.qa import get_web_service_app

app = get_web_service_app(inference)
app.run()

 * Serving Flask app "ratsnlp.nlpbook.qa.deploy" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


INFO:werkzeug: * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)


 * Running on http://8157-34-173-25-172.ngrok.io
 * Traffic stats available on http://127.0.0.1:4040


INFO:werkzeug:127.0.0.1 - - [17/Nov/2022 20:17:50] "[37mGET / HTTP/1.1[0m" 200 -
INFO:werkzeug:127.0.0.1 - - [17/Nov/2022 20:17:50] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
convert squad examples to features: 100%|██████████| 1/1 [00:00<00:00, 99.52it/s]
add example index and unique id: 100%|██████████| 1/1 [00:00<00:00, 12520.31it/s]

***** Running Sketchy *****





***** Running INTENSIVE *****


INFO:werkzeug:127.0.0.1 - - [17/Nov/2022 20:18:56] "[37mPOST /api HTTP/1.1[0m" 200 -
convert squad examples to features: 100%|██████████| 1/1 [00:00<00:00, 94.63it/s]
add example index and unique id: 100%|██████████| 1/1 [00:00<00:00, 13315.25it/s]

***** Running Sketchy *****





***** Running INTENSIVE *****


INFO:werkzeug:127.0.0.1 - - [17/Nov/2022 20:20:11] "[37mPOST /api HTTP/1.1[0m" 200 -
