In [1]:
import spacy
import random
from spacy.tokens import Doc
from spacy.training import Example

nlp = spacy.blank('zh')  # 英文为 en
nlp = spacy.load('zh_core_web_sm')
if 'ner' not in nlp.pipe_names:
    ner = nlp.create_pipe('ner')
    nlp.add_pipe(ner, last=True)
else:
    ner = nlp.get_pipe("ner")
    
# 添加实体标签
LABELS = ["WEAPON", "LOC", "COUNTRY", "EVENT"]
for label in LABELS:
    ner.add_label(label)

# 训练数据
train_data = [
    ("P-8A海上巡逻机（P-8A Poseidon），是美国波音公司设计生产的新一代海上巡逻机。", {"entities": [(0, 8, "WEAPON"), (10, 22, "WEAPON"), (26, 27, "COUNTRY")]}),
    ("美军方面则有濒海战斗舰吉佛兹号和一架P8-A海神式海上巡逻机参与联合巡逻。", {"entities": [(6, 14, "WEAPON"), (18, 29, "WEAPON"), (32, 35, "EVENT")]}),
    ("菲美联合海空巡逻周四结束，菲律宾战机和舰艇在过去3天巡逻近台湾的巴丹群岛，并在南海进行通行与战术演习，以强化菲美区域合作与联合行动力。", {"entities": [(2, 7, "EVENT"), (13, 15, "COUNTRY"), (39, 35, "LOC"), (39, 40, "LOC"), (43, 49, "EVENT")]}),
    ("菲律宾军方表示，3天的菲美海上合作活动从巴丹群岛附近启动，于南海结束演练。", {"entities": [(0, 2, "COUNTRY"), (21, 24, "LOC"), (31, 32, "LOC")]}),
    ("菲律宾空军派出2架FA-50战机和一架超级巨嘴鸟攻击机。", {"entities": [(0, 2, "COUNTRY"), (10, 16, "WEAPON"), (20, 27, "WEAPON")]}),
]

# 训练模型
for iteration in range(10):
    # 打乱数据
    random.shuffle(train_data)
    # 创建训练实例
    for text, annotations in train_data:
        doc = nlp.make_doc(text)
        example = Example.from_dict(doc, annotations)
        # 训练模型
        nlp.update([example], losses={})

# 测试模型
doc = nlp("菲律宾军方表示，3天的菲美海上合作活动从巴丹群岛附近启动，于南海结束演练。")
print([(ent.text, ent.label_) for ent in doc.ents])

  from .autonotebook import tqdm as notebook_tqdm


[('菲律宾军方', 'FAC'), ('南海', 'LOC')]
