In [1]:
import spacy
import json

### Load model

In [2]:
nlp= spacy.load('/src/ia2/ia2/models/ner_mor/model-best')

In [3]:
nlp.pipe_names

['tok2vec', 'morphologizer', 'ner']

### Add EntityRuler in NER pipeline

In [4]:
from ia2.pipeline.entity_ruler import fetch_ruler_patterns_by_tag

pipelines_tag = "todas"

ruler = nlp.add_pipe("entity_ruler", after='ner')
patterns = fetch_ruler_patterns_by_tag(pipelines_tag)
ruler.add_patterns(patterns)

In [5]:
nlp.pipe_names

['tok2vec', 'morphologizer', 'ner', 'entity_ruler']

In [6]:
nlp.get_pipe('ner').labels

('ARTÍCULO', 'DIRECCIÓN', 'LOC', 'PER')

In [7]:
nlp.get_pipe('entity_ruler').labels

('BANCO',
 'CBU',
 'CORREO_ELECTRÓNICO',
 'ESTUDIOS',
 'FECHA',
 'FECHA_NUMÉRICA',
 'LEY',
 'LINK',
 'MARCA_AUTOMÓVIL',
 'NACIONALIDAD',
 'NOMBRE_ARCHIVO',
 'NUM_CUIJ',
 'NUM_CUIT_CUIL',
 'NUM_DNI',
 'NUM_IP',
 'NUM_TELÉFONO',
 'PASAPORTE',
 'PATENTE_DOMINIO',
 'USUARIX')

### Save custom model

In [8]:
nlp.to_disk('/src/ia2/ia2/models/ner_mor_enti/')

### Evaluate custom model

In [9]:
! python -m spacy evaluate /src/ia2/ia2/models/ner_mor_enti/ /resources/datasets/docbin/test.spacy --output /resources/ouputs/metrics/ner_mor_ent.json --gold-preproc --code /src/ia2/ia2/pipeline/entity_ruler.py --displacy-path /resources/ouputs/displacy/ner_mor_ent --displacy-limit 15

[38;5;4mℹ Using CPU[0m
[38;5;4mℹ To switch to GPU 0, use the option: --gpu-id 0[0m
[1m

TOK     100.00
POS     -     
MORPH   -     
NER P   41.03 
NER R   81.05 
NER F   54.48 
SPEED   5991  

[1m

                         P       R       F
ARTÍCULO             70.46   86.09   77.50
LEY                   0.00    0.00    0.00
NUM_CUIJ              0.00    0.00    0.00
LOC                  67.66   81.29   73.86
FECHA                 0.00    0.00    0.00
DIRECCIÓN            72.93   88.99   80.17
PER                  50.63   64.02   56.54
CORREO_ELECTRÓNICO    0.00    0.00    0.00
NUM_DNI               0.00    0.00    0.00
NUM_TELÉFONO          0.00    0.00    0.00
FECHA_NUMÉRICA        0.00    0.00    0.00
ESTUDIOS              0.00    0.00    0.00
BANCO                 0.00    0.00    0.00
LINK                  0.00    0.00    0.00
NOMBRE_ARCHIVO        0.00    0.00    0.00
PATENTE_DOMINIO       0.00    0.00    0.00
MARCA_AUTOMÓVIL       0.00    0.00    0.00

[38;5;2m✔ Generated

In [10]:
path_data_testing = '/resources/datasets/unified/test.json'
with open(path_data_testing) as f:
    testing_data = json.load(f)

In [11]:
text = testing_data[1][0]
doc = nlp(text)

In [None]:
doc.ents

In [None]:
len(doc.ents)

In [None]:
spacy.displacy.render(doc, style='ent', jupyter=True)

### Deploy custom model

In [None]:
! python -m spacy info 

In [15]:
! python -m spacy package /src/ia2/ia2/models/ner_mor_enti/ /src/ia2/ia2/deploy --code /src/ia2/ia2/pipeline/entity_ruler.py -v 1.0.0

[38;5;4mℹ Building package artifacts: sdist[0m
[38;5;2m✔ Including 1 Python module(s) with custom code[0m
[38;5;2m✔ Loaded meta.json from file[0m
/src/ia2/ia2/models/ner_mor_enti/meta.json
[38;5;2m✔ Generated README.md from meta.json[0m
[38;5;2m✔ Successfully created package directory 'es_pipeline-1.0.0'[0m
/src/ia2/ia2/deploy/es_pipeline-1.0.0
running sdist
running egg_info
creating es_pipeline.egg-info
writing es_pipeline.egg-info/PKG-INFO
writing dependency_links to es_pipeline.egg-info/dependency_links.txt
writing entry points to es_pipeline.egg-info/entry_points.txt
writing requirements to es_pipeline.egg-info/requires.txt
writing top-level names to es_pipeline.egg-info/top_level.txt
writing manifest file 'es_pipeline.egg-info/SOURCES.txt'
reading manifest file 'es_pipeline.egg-info/SOURCES.txt'
reading manifest template 'MANIFEST.in'
writing manifest file 'es_pipeline.egg-info/SOURCES.txt'
running check
creating es_pipeline-1.0.0
creating es_pipeline-1.0.0/es_pipeline
c