In [1]:
pip install ratsnlp

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting ratsnlp
  Downloading ratsnlp-1.0.52-py3-none-any.whl (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.3/42.3 KB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pytorch-lightning==1.6.1
  Downloading pytorch_lightning-1.6.1-py3-none-any.whl (582 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m582.5/582.5 KB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting flask-cors>=3.0.10
  Downloading Flask_Cors-3.0.10-py2.py3-none-any.whl (14 kB)
Collecting transformers==4.10.0
  Downloading transformers-4.10.0-py3-none-any.whl (2.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.8/2.8 MB[0m [31m40.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting Korpora>=0.2.0
  Downloading Korpora-0.2.0-py3-none-any.whl (57 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.8/57.8 KB[0m [31m3.

In [2]:
from ratsnlp.nlpbook.ner import NERDeployArguments

In [3]:
args = NERDeployArguments(
    pretrained_model_name="beomi/kcbert-base",
    downstream_model_dir="/content/drive/MyDrive/class/language/checkpoint-ner",
    max_seq_length=64
)

downstream_model_checkpoint_fpath: /content/drive/MyDrive/class/language/checkpoint-ner/epoch=1-val_loss=0.20.ckpt
downstream_model_labelmap_fpath: /content/drive/MyDrive/class/language/checkpoint-ner/label_map.txt


In [4]:
from transformers import BertTokenizer

In [5]:
tokenizer = BertTokenizer.from_pretrained(
    args.pretrained_model_name,
    do_lower_case=False
)

Downloading:   0%|          | 0.00/250k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/619 [00:00<?, ?B/s]

In [6]:
import torch

In [7]:
fine_tuned_model_ckpt = torch.load(
    args.downstream_model_checkpoint_fpath,
    map_location = torch.device('cpu')
)

In [8]:
from transformers import BertConfig

In [9]:
pretrained_model_config = BertConfig.from_pretrained(
    args.pretrained_model_name,
    num_labels = fine_tuned_model_ckpt['state_dict']['model.classifier.bias'].shape.numel()
)

In [13]:
from transformers import BertForTokenClassification


In [14]:
model = BertForTokenClassification(pretrained_model_config)

In [15]:
model.load_state_dict({k.replace("model.","") : v for k,v in fine_tuned_model_ckpt['state_dict'].items()})

<All keys matched successfully>

In [None]:
fine_tuned_model_ckpt['state_dict'].items()

In [None]:
model.eval()

In [20]:
labels = [label.strip() for label in open(args.downstream_model_labelmap_fpath, 'r').readlines()]

In [21]:
id_to_label={}

In [22]:
labels

['[CLS]',
 '[SEP]',
 '[PAD]',
 '[MASK]',
 'O',
 'B-PER',
 'B-NOH',
 'B-POH',
 'B-ORG',
 'B-DAT',
 'B-LOC',
 'B-MNY',
 'B-PNT',
 'B-TIM',
 'B-DUR',
 'I-PER',
 'I-NOH',
 'I-POH',
 'I-ORG',
 'I-DAT',
 'I-LOC',
 'I-MNY',
 'I-PNT',
 'I-TIM',
 'I-DUR']

In [23]:
for idx, label in enumerate(labels):
  if "PER" in label:
      label = "인명"
  elif "LOC" in label:
      label = "지명"
  elif "ORG" in label:
      label = "기관명"
  elif "DAT" in label:
      label = "날짜"
  elif "TIM" in label:
      label = "시간"
  elif "DUR" in label:
      label = "기간"
  elif "MNY" in label:
      label = "통화"
  elif "PNT" in label:
      label = "비율"
  elif "NOH" in label:
      label = "기타 수량표현"
  elif "POH" in label:
      label = "기타"
  else:
      label = label
  id_to_label[idx] = label

In [60]:
def inference_fn(sentence) :
  inputs = tokenizer(
      [sentence],
      max_length = args.max_seq_length,
      padding="max_length",
      truncation = True,
      return_tensors = "pt"
  )

  with torch.no_grad() :
    outputs = model(**inputs)
    probs = outputs.logits.softmax(dim=-1)
    top_probs, preds =(torch.topk(probs, dim=-1, k=1))
    predicted_tags=[id_to_label[pred.item()] for pred in preds[0]]
    tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0])
    result=[]
    for token, predicted_tag, top_prob in zip(tokens, predicted_tags, top_probs[0]) :
      if token not in [tokenizer.pad_token, tokenizer.cls_token, tokenizer.sep_token] :
        token_result = {"token":token, 
                        "predicted_tag":predicted_tag, 
                        "top_prob":round(top_prob.item(), 4)}
        result.append(token_result)

  return {
     "sentence" : sentence,
     "result" : result,
  }

In [61]:
inference_fn('나는 이찬웅이다')

{'sentence': '나는 이찬웅이다',
 'result': [{'token': '나는', 'predicted_tag': 'O', 'top_prob': 0.9996},
  {'token': '이찬', 'predicted_tag': '인명', 'top_prob': 0.9973},
  {'token': '##웅', 'predicted_tag': '인명', 'top_prob': 0.9989},
  {'token': '##이다', 'predicted_tag': 'O', 'top_prob': 0.9993}]}

In [62]:
!mkdir /root/.ngrok2 && echo "authtoken: 2KUKWK7BGUqKWYtcmI6OA1BQHR3_73E9abYcCoNbPq4kqDvJb" > /root/.ngrok2/ngrok.yml

mkdir: cannot create directory ‘/root/.ngrok2’: File exists


In [64]:
from ratsnlp.nlpbook.ner import get_web_service_app
app = get_web_service_app(inference_fn)
app.run()


 * Serving Flask app "ratsnlp.nlpbook.ner.deploy" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


INFO:werkzeug: * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)


 * Running on http://65d1-35-196-19-117.ngrok.io
 * Traffic stats available on http://127.0.0.1:4040


INFO:werkzeug:127.0.0.1 - - [19/Jan/2023 07:32:07] "[37mGET / HTTP/1.1[0m" 200 -
INFO:werkzeug:127.0.0.1 - - [19/Jan/2023 07:32:07] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [19/Jan/2023 07:32:22] "[37mPOST /api HTTP/1.1[0m" 200 -
INFO:werkzeug:127.0.0.1 - - [19/Jan/2023 07:32:37] "[37mPOST /api HTTP/1.1[0m" 200 -
INFO:werkzeug:127.0.0.1 - - [19/Jan/2023 07:33:14] "[37mPOST /api HTTP/1.1[0m" 200 -
