In [13]:
%run  packaging.py

# date_ruler

In [16]:
import spacy
from bedoner.lang.mecab import Japanese
from bedoner.entity_rulers.date import DateRuler
import shutil
name="date_ruler"

nlp = Japanese(meta={"name": "date_ruler", "requirements": ["mecab-python3", "regex"]})
nlp.add_pipe(DateRuler(nlp))
text = "2019年11月8日に高松隆と東京タワーに行った"
expected = nlp(text).ents

In [17]:
pkgd, tmpd = create_package(nlp)
nlp = spacy.load(tmpd.name)
tmpd.cleanup()
assert nlp(text).ents == expected

[38;5;2m✔ Loaded meta.json from file[0m
/var/folders/vc/4qw043p150b0gtkbm6rhqw9w0000gp/T/tmp1gqhqedk/meta.json
[38;5;2m✔ Successfully created package 'mecab_date_ruler-0.0.0'[0m
/Users/yohei_tamura/work/bedore-ner/scripts/../pkgs/mecab_date_ruler-0.0.0
To build the package, run `python setup.py sdist` in this directory.


# person_ruler

- mecabのユーザ辞書を含める必要がありちょっと面倒

In [18]:
import os 
import spacy
from bedoner.lang.mecab import Japanese
from bedoner.entity_rulers.person import create_person_ruler
from pathlib import Path
from spacy.cli import package
from shutil import copy

name="person_ruler"
user_dic = os.path.expanduser("~/.bedoner/user.dic")
nlp = Japanese(meta={"tokenizer": {"userdic": user_dic,"assets": "./jinmei/"}, "name":name,"requirements": ["mecab-python3", "regex"] })
nlp.add_pipe(create_person_ruler(nlp))
expected = nlp(text).ents

In [19]:
pkgd, tmpd = create_package(nlp)
nlp=spacy.load(tmpd.name)
tmpd.cleanup()
assert nlp(text).ents == expected

[38;5;2m✔ Loaded meta.json from file[0m
/var/folders/vc/4qw043p150b0gtkbm6rhqw9w0000gp/T/tmpn0sko71z/meta.json
[38;5;2m✔ Successfully created package 'mecab_person_ruler-0.0.0'[0m
/Users/yohei_tamura/work/bedore-ner/scripts/../pkgs/mecab_person_ruler-0.0.0
To build the package, run `python setup.py sdist` in this directory.


# entity_ruler

- 上の二つの組みわせ

In [20]:
import spacy
from bedoner.entity_rulers.person import create_person_ruler
from bedoner.entity_rulers.date import DateRuler

name="entity_ruler"
nlp = Japanese(meta={"tokenizer": {"userdic": user_dic,"assets": "./jinmei/"}, "name":name, "requirements": ["mecab-python3", "regex"]})
nlp.add_pipe(DateRuler(nlp))
nlp.add_pipe(create_person_ruler(nlp))
expected = nlp(text).ents

In [21]:
pkgd,tmpd=create_package(nlp)
nlp = spacy.load(tmpd.name) 
tmpd.cleanup()
assert nlp(text).ents == expected

[38;5;2m✔ Loaded meta.json from file[0m
/var/folders/vc/4qw043p150b0gtkbm6rhqw9w0000gp/T/tmp0h6t85_r/meta.json
[38;5;2m✔ Successfully created package 'mecab_entity_ruler-0.0.0'[0m
/Users/yohei_tamura/work/bedore-ner/scripts/../pkgs/mecab_entity_ruler-0.0.0
To build the package, run `python setup.py sdist` in this directory.


# knp entity extractor

In [22]:
import spacy
from bedoner.lang.knp import Japanese
from bedoner.entity_extractors.knp import KnpEntityExtractor

name = "entity_extractor"
nlp = Japanese(meta={"name": name, "requirements": ["regex", "pyknp"]})
p = nlp.create_pipe("knp_entity_extractor")
nlp.add_pipe(p)
expected = nlp(text).ents

In [23]:
pkgd, tmpd=create_package(nlp)
nlp = spacy.load(tmpd.name)
tmpd.cleanup()
assert nlp(text).ents == expected

[38;5;2m✔ Loaded meta.json from file[0m
/var/folders/vc/4qw043p150b0gtkbm6rhqw9w0000gp/T/tmp14n_3cqe/meta.json
[38;5;2m✔ Successfully created package 'knp_entity_extractor-0.0.0'[0m
/Users/yohei_tamura/work/bedore-ner/scripts/../pkgs/knp_entity_extractor-0.0.0
To build the package, run `python setup.py sdist` in this directory.


# bert

In [1]:
import spacy
from bedoner.entity_extractors.bert_ner import BertEntityExtractor, create_estimator
from pathlib import Path
import pickle
from spacy.tokens import Doc
from bedoner.lang.juman import Japanese as Juman
from spacy.strings import StringStore
from spacy.vocab import Vocab
from spacy_pytorch_transformers.pipeline.wordpiecer import PyTT_WordPiecer
from bedoner.lang.mecab import Japanese
from bedoner.wordpiecer import BertWordPiecer
import json
from pathlib import Path 
import shutil
from spacy.cli import package
import tempfile

In [2]:
name="bert_ner"

In [4]:
__dir__ = Path(".").parent

In [5]:
vocabfile = __dir__ / "../data/Japanese_L-12_H-768_A-12_E-30_BPE/vocab.txt"
with vocabfile.open() as f:
    vs = []
    for line in f:
        vs.append(line[:-1])
s = StringStore(vs)
v = Vocab(strings=s)
nlp = Juman(v, meta={"name": name})
w = BertWordPiecer(
    v,
    vocab_file=str(vocabfile)
)
w.model = w.Model(w.cfg["vocab_file"])
nlp.add_pipe(w)

In [6]:
bert_dir = __dir__ / "../data/Japanese_L-12_H-768_A-12_E-30_BPE"
model_dir = __dir__ / "../data/bert_result_ene_0/"
init_checkpoint = str(bert_dir / "bert_model.ckpt")
with (model_dir / "label2id.json").open("r") as f:
    label2id = json.load(f)

bert_cfg = dict(
    bert_dir=str(bert_dir),
    model_dir=str(model_dir),
    num_labels=len(label2id) + 1,
    init_checkpoint=init_checkpoint,
    use_one_hot_embeddings=None,
    max_seq_length=128,
    batch_size=10,
)

ee = BertEntityExtractor.from_nlp(nlp, label2id=label2id, **bert_cfg)
ee.model = create_estimator(**bert_cfg)
ee.set_values()
ee.create_predictor()
nlp.add_pipe(ee)

W0911 18:09:40.200305 4382737856 deprecation_wrapper.py:119] From /Users/yohei_tamura/work/bedore-ner/bedoner/entity_extractors/bert_modeling.py:93: The name tf.gfile.GFile is deprecated. Please use tf.io.gfile.GFile instead.

W0911 18:09:40.203735 4382737856 deprecation_wrapper.py:119] From /Users/yohei_tamura/work/bedore-ner/bedoner/entity_extractors/bert_ner.py:168: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0911 18:09:40.208847 4382737856 deprecation_wrapper.py:119] From /Users/yohei_tamura/work/bedore-ner/bedoner/entity_extractors/bert_ner.py:218: The name tf.logging.info is deprecated. Please use tf.compat.v1.logging.info instead.

W0911 18:09:40.215194 4382737856 deprecation_wrapper.py:119] From /Users/yohei_tamura/work/bedore-ner/bedoner/entity_extractors/bert_modeling.py:171: The name tf.variable_scope is deprecated. Please use tf.compat.v1.variable_scope instead.

W0911 18:09:40.217982 4382737856 deprecation_wrapper.py:119] From /Use

In [9]:
expected=nlp(text).ents
pkgd, tmpd = create_package(nlp)

In [23]:
pkgd, tmpd=create_package(nlp)
nlp = spacy.load(tmpd.name)
tmpd.cleanup()
assert nlp(text).ents == expected

[38;5;2m✔ Loaded meta.json from file[0m
/var/folders/vc/4qw043p150b0gtkbm6rhqw9w0000gp/T/tmp14n_3cqe/meta.json
[38;5;2m✔ Successfully created package 'knp_entity_extractor-0.0.0'[0m
/Users/yohei_tamura/work/bedore-ner/scripts/../pkgs/knp_entity_extractor-0.0.0
To build the package, run `python setup.py sdist` in this directory.
