In [1]:
import spacy
import json

### Load models

In [2]:
nlp_ner = spacy.load('/src/ia2/ia2/models/ner/model-best')

In [3]:
nlp_ner.pipe_names

['tok2vec', 'ner']

In [4]:
nlp_pipe= spacy.load('/src/ia2/ia2/models/all_pipe_t2v_ner/model-best')

In [5]:
nlp_pipe.pipe_names

['tok2vec', 'morphologizer', 'parser', 'ner', 'attribute_ruler', 'lemmatizer']

### Add morphologizer and parser in NER pipeline

In [6]:
nlp_ner.add_pipe('morphologizer', after='tok2vec', source=nlp_pipe)

<spacy.pipeline.morphologizer.Morphologizer at 0x7f2a61a1b1c0>

In [7]:
nlp_ner.pipe_names

['tok2vec', 'morphologizer', 'ner']

In [8]:
nlp_ner.add_pipe('parser', after='morphologizer', source=nlp_pipe)

<spacy.pipeline.dep_parser.DependencyParser at 0x7f2a5e509e70>

### Add EntityRuler in NER pipeline

In [9]:
from spacy.pipeline import EntityRuler
from ia2.pipeline.entity_ruler import fetch_ruler_patterns_by_tag

pipelines_tag = "todas"

ruler = nlp_ner.add_pipe("entity_ruler", before='ner')
ruler.add_patterns(fetch_ruler_patterns_by_tag(pipelines_tag))

In [10]:
nlp_ner.pipe_names

['tok2vec', 'morphologizer', 'parser', 'entity_ruler', 'ner']

In [11]:
nlp_ner.get_pipe('ner').labels

('ARTÍCULO', 'DIRECCIÓN', 'LOC', 'PER')

In [12]:
nlp_ner.get_pipe('entity_ruler').labels

('BANCO',
 'CBU',
 'CORREO_ELECTRÓNICO',
 'ESTUDIOS',
 'FECHA',
 'FECHA_NUMÉRICA',
 'LEY',
 'LINK',
 'MARCA_AUTOMÓVIL',
 'NACIONALIDAD',
 'NOMBRE_ARCHIVO',
 'NUM_CUIJ',
 'NUM_CUIT_CUIL',
 'NUM_DNI',
 'NUM_IP',
 'NUM_TELÉFONO',
 'PASAPORTE',
 'PATENTE_DOMINIO',
 'USUARIX')

### Add entity_matcher in NER pipeline

In [13]:
from ia2.pipeline.entity_matcher import (EntityMatcher, matcher_patterns, fetch_cb_by_tag)

entity_matcher = nlp_ner.add_pipe("matcher")

In [14]:
# from ia2.pipeline.entity_matcher import (EntityMatcher, matcher_patterns, fetch_cb_by_tag)

# entity_matcher = EntityMatcher(
#             nlp_ner,
#             matcher_patterns,
#             after_callbacks=[cb(nlp_ner) for cb in fetch_cb_by_tag(pipelines_tag)],
#         )

In [15]:
nlp_ner.pipe_names

['tok2vec', 'morphologizer', 'parser', 'entity_ruler', 'ner', 'matcher']

### Add entity_custom in NER pipeline

In [16]:
from ia2.pipeline.entity_custom import EntityCustom

# entity_custom = EntityCustom(nlp_ner, pipelines_tag)

In [17]:
nlp_ner.add_pipe("matcher_custom")

<ia2.pipeline.entity_custom.EntityCustom at 0x7f2bf4735d20>

In [18]:
nlp_ner.pipe_names

['tok2vec',
 'morphologizer',
 'parser',
 'entity_ruler',
 'ner',
 'matcher',
 'matcher_custom']

### Save custom model

In [19]:
nlp_ner.to_disk('/src/ia2/ia2/models/custom_model')

### Evaluate custom model

In [20]:
! python -m spacy evaluate /src/ia2/ia2/models/custom_model  /resources/datasets/docbin/test.spacy --output /resources/ouputs/metrics/custom_model/metics_v2.json --gold-preproc --code /src/ia2/ia2/pipeline/__init__.py --displacy-path /resources/ouputs/displacy/custom_model --displacy-limit 15

[38;5;4mℹ Using CPU[0m
[38;5;4mℹ To switch to GPU 0, use the option: --gpu-id 0[0m
[1m

TOK      100.00
POS      -     
MORPH    -     
UAS      -     
LAS      -     
NER P    11.53 
NER R    40.69 
NER F    17.96 
SENT P   -     
SENT R   -     
SENT F   -     
SPEED    4906  

[1m

                         P       R       F
NUM                   0.00    0.00    0.00
ARTÍCULO              9.38   11.52   10.34
LEY                   0.00    0.00    0.00
NUM_CUIJ              0.00    0.00    0.00
NUM_ACTUACIÓN         0.00    0.00    0.00
NUM_TELÉFONO          0.00    0.00    0.00
LOC                  64.86   69.06   66.90
FECHA                 0.00    0.00    0.00
DIRECCIÓN            66.67   80.73   73.03
PER                  48.12   67.72   56.26
PERIODO               0.00    0.00    0.00
CORREO_ELECTRÓNICO    0.00    0.00    0.00
NUM_DNI               0.00    0.00    0.00
FECHA_NUMÉRICA        0.00    0.00    0.00
FECHA_RESOLUCION      0.00    0.00    0.00
ESTUDIOS            

In [None]:
! python -m spacy evaluate /src/ia2/ia2/models/custom_model  /resources/datasets/docbin/test.spacy --output /resources/ouputs/metrics/custom_model/metics_custom.json --gold-preproc --displacy-path /resources/ouputs/displacy/custom_model --displacy-limit 15

In [None]:
path_data_testing = '/resources/datasets/unified/test.json'
with open(path_data_testing) as f:
    testing_data = json.load(f)

In [None]:
text = testing_data[3][0]
doc = nlp_ner(text)

In [None]:
doc.ents

In [None]:
len(doc.ents)

In [None]:
spacy.displacy.render(doc, style='ent', jupyter=True)

### Deploy custom model

In [21]:
! python -m spacy info /src/ia2/ia2/models/custom_model 

[1m

lang                es                            
name                pipeline                      
version             0.0.0                         
spacy_version       >=3.4.4,<3.5.0                
description                                       
author                                            
email                                             
url                                               
license                                           
spacy_git_version   Unknown                       
vectors             {'width': 300, 'vectors': 500000, 'keys': 500000, 'name': 'es_vectors', 'mode': 'default'}
pipeline            ['tok2vec', 'morphologizer', 'parser', 'entity_ruler', 'ner', 'matcher', 'matcher_custom']
components          ['tok2vec', 'morphologizer', 'parser', 'entity_ruler', 'ner', 'matcher', 'matcher_custom']
disabled            []                            
source              /src/ia2/ia2/models/custom_model



In [22]:
! python -m spacy package /src/ia2/ia2/models/custom_model /src/ia2/ia2/deploy --code /src/ia2/ia2/pipeline/__init__.py -v 1.1.0 -f

[38;5;4mℹ Building package artifacts: sdist[0m
[38;5;2m✔ Including 1 Python module(s) with custom code[0m
[38;5;2m✔ Including 1 package requirement(s) from meta and config[0m
ia2>=1.1.0,<1.2.0
[38;5;2m✔ Loaded meta.json from file[0m
/src/ia2/ia2/models/custom_model/meta.json
[38;5;4mℹ Using existing README.md from pipeline directory[0m
[38;5;2m✔ Successfully created package directory 'es_pipeline-1.1.0'[0m
/src/ia2/ia2/deploy/es_pipeline-1.1.0
running sdist
running egg_info
writing es_pipeline.egg-info/PKG-INFO
writing dependency_links to es_pipeline.egg-info/dependency_links.txt
writing entry points to es_pipeline.egg-info/entry_points.txt
writing requirements to es_pipeline.egg-info/requires.txt
writing top-level names to es_pipeline.egg-info/top_level.txt
reading manifest file 'es_pipeline.egg-info/SOURCES.txt'
reading manifest template 'MANIFEST.in'
writing manifest file 'es_pipeline.egg-info/SOURCES.txt'
running check
creating es_pipeline-1.1.0
creating es_pipeline-1.1