In [18]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [15]:
from pathlib import Path 
import shutil

In [25]:
def create_package(nlp):
    meta=nlp.meta
    pkgs = Path("../pkgs")
    tmpd = Path("/tmp") / meta["name"]
    if tmpd.exists(): shutil.rmtree(tmpd)
    nlp.to_disk(tmpd)
    
    package(tmpd, pkgs, force=True)
    model_name  =  meta["lang"] + "_" + meta["name"]
    pkgd = pkgs / (model_name+ "-" + meta["version"])
    return pkgd, tmpd

# date_ruler

In [17]:
import spacy
from bedoner.lang.mecab import Japanese
name="date_ruler"
nlp = Japanese(meta={"name": "date_ruler"})
p = nlp.create_pipe(name)
nlp.add_pipe(p)
pkgd, tmpd = create_package(nlp)

nlp = spacy.load(tmpd)
nlp("2019年11月8日に高松隆と東京タワーに行った").ents

[38;5;2m✔ Loaded meta.json from file[0m
/tmp/date_ruler/meta.json
[38;5;2m✔ Successfully created package 'mecab_date_ruler-0.0.0'[0m
../pkgs/mecab_date_ruler-0.0.0
To build the package, run `python setup.py sdist` in this directory.


(2019年11月8日,)

# person_ruler

- mecabのユーザ辞書を含める必要がありちょっと面倒

In [19]:
import os 
import spacy
from bedoner.lang.mecab import Japanese
from bedoner.entity_rulers.person import create_person_ruler
from pathlib import Path
from spacy.cli import package
from shutil import copy

name="person_ruler"
user_dic = os.path.expanduser("~/.bedoner/user.dic")
nlp = Japanese(meta={"tokenizer": {"userdic": user_dic,"assets": "./jinmei/"}, "name":"person_ruler", })
nlp.add_pipe(create_person_ruler(nlp))
pkgd, tmpd = create_package(nlp)

nlp=spacy.load(tmpd)
nlp("2019年11月8日に高松隆と東京タワーに行った").ents

[38;5;2m✔ Loaded meta.json from file[0m
/tmp/person_ruler/meta.json
[38;5;2m✔ Successfully created package 'mecab_person_ruler-0.0.0'[0m
../pkgs/mecab_person_ruler-0.0.0
To build the package, run `python setup.py sdist` in this directory.


(高松隆,)

# entity_ruler

- 上の二つの組みわせ

In [23]:
import spacy
from bedoner.entity_rulers.person import create_person_ruler
from bedoner.entity_rulers.date import DateRuler

name="entity_ruler"
nlp = Japanese(meta={"tokenizer": {"userdic": user_dic,"assets": "./jinmei/"}, "name":"person_ruler", })
nlp.add_pipe(DateRuler(nlp))
nlp.add_pipe(create_person_ruler(nlp))
create_package(nlp)

nlp = spacy.load(tmpd) 
nlp("2019年11月8日に高松隆と海に行った").ents

[38;5;2m✔ Loaded meta.json from file[0m
/tmp/person_ruler/meta.json
[38;5;2m✔ Successfully created package 'mecab_person_ruler-0.0.0'[0m
../pkgs/mecab_person_ruler-0.0.0
To build the package, run `python setup.py sdist` in this directory.


(2019年11月8日, 高松隆)

# knp entity extractor

In [29]:
import spacy
from bedoner.lang.knp import Japanese
from bedoner.entity_extractors.knp import KnpEntityExtractor

name = "knp_entity_extractor"
nlp = Japanese(meta={"name": name})
p = nlp.create_pipe("knp_entity_extractor")
nlp.add_pipe(p)
pkgd, tmpd=create_package(nlp)

nlp = spacy.load(tmpd)
nlp("2019年11月8日に高松隆と東京タワーに行った").ents

[38;5;2m✔ Loaded meta.json from file[0m
/tmp/knp_entity_extractor/meta.json
[38;5;2m✔ Successfully created package 'knp_knp_entity_extractor-0.0.0'[0m
../pkgs/knp_knp_entity_extractor-0.0.0
To build the package, run `python setup.py sdist` in this directory.


(2019年11月8日, 高松隆, 東京タワー)