In [1]:
import spacy
import json

## Train CPU: NER

#### Download base model

In [3]:
# !python -m spacy download es_core_news_lg

#### Create config to train only NER

In [None]:
# ! python -m spacy init config config_NER.cfg --lang es --pipeline ner --optimize accuracy

#### Train model

In [2]:
! python -m spacy train config_NER.cfg --output /src/ia2/ia2/models/ner  --paths.train /resources/datasets/docbin/train.spacy --paths.dev /resources/datasets/docbin/val.spacy

[38;5;4mℹ Saving to output directory: /src/ia2/ia2/models/ner[0m
[38;5;4mℹ Using CPU[0m
[38;5;4mℹ To switch to GPU 0, use the option: --gpu-id 0[0m
[1m
[2023-01-04 18:03:51,046] [INFO] Set up nlp object from config
[2023-01-04 18:03:51,058] [INFO] Pipeline: ['tok2vec', 'ner']
[2023-01-04 18:03:51,061] [INFO] Created vocabulary
[2023-01-04 18:03:51,982] [INFO] Added vectors: es_core_news_lg
[2023-01-04 18:03:53,081] [INFO] Finished initializing nlp object
[2023-01-04 18:04:25,205] [INFO] Initialized pipeline components: ['tok2vec', 'ner']
[38;5;2m✔ Initialized pipeline[0m
[1m
[38;5;4mℹ Pipeline: ['tok2vec', 'ner'][0m
[38;5;4mℹ Initial learn rate: 0.001[0m
E    #       LOSS TOK2VEC  LOSS NER  ENTS_F  ENTS_P  ENTS_R  SCORE 
---  ------  ------------  --------  ------  ------  ------  ------
  0       0          0.00    716.22    0.00    0.00    0.00    0.00
  0     200        935.93  20105.38   53.34   47.64   60.60    0.53
  1     400        146.14   6236.62   60.65   68.08

## Train GPU: NER

#### Create config

In [None]:
# ! python -m spacy init config config_GPU.cfg --lang es --pipeline ner --gpu

#### Train model

In [None]:
# ! python -m spacy train config_GPU.cfg --output /src/ia2/ia2/models  --paths.train /resources/datasets/docbin/train.spacy  --paths.dev /resources/datasets/docbin/val.spacy --gpu-id 0

## Load Best Model: NER

In [3]:
nlp_ner = spacy.load('/src/ia2/ia2/models/ner/model-best')

In [4]:
nlp_ner.get_pipe('ner').labels

('ARTÍCULO', 'DIRECCIÓN', 'LOC', 'PER')

In [5]:
nlp_ner.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x7f1c711a2140>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x7f1c71159cb0>)]

## Evaluate NER

In [6]:
! python -m spacy evaluate /src/ia2/ia2/models/ner/model-best  /resources/datasets/docbin/test.spacy --output /resources/ouputs/metrics/NER/metics.json --gold-preproc

[38;5;4mℹ Using CPU[0m
[38;5;4mℹ To switch to GPU 0, use the option: --gpu-id 0[0m
[1m

TOK     100.00
NER P   66.19 
NER R   77.93 
NER F   71.58 
SPEED   6804  

[1m

                P       R       F
ARTÍCULO    72.83   83.91   77.98
LOC         72.18   69.06   70.59
DIRECCIÓN   70.40   80.73   75.21
PER         48.13   68.25   56.46

[38;5;2m✔ Saved results to /resources/ouputs/metrics/NER/metics.json[0m


# Train pipe

### Train CPU

Train T2V and NER and freeze another component becuse need morphologizer

In [4]:
! python -m spacy train config_PIPE.cfg --output /src/ia2/ia2/models/all_pipe_t2v_ner  --paths.train /resources/datasets/docbin/train.spacy --paths.dev /resources/datasets/docbin/val.spacy

[38;5;2m✔ Created output directory: /src/ia2/ia2/models/all_pipe_t2v_ner[0m
[38;5;4mℹ Saving to output directory: /src/ia2/ia2/models/all_pipe_t2v_ner[0m
[38;5;4mℹ Using CPU[0m
[38;5;4mℹ To switch to GPU 0, use the option: --gpu-id 0[0m
[1m
[2023-01-05 15:07:20,524] [INFO] Set up nlp object from config
[2023-01-05 15:07:20,534] [INFO] Pipeline: ['tok2vec', 'morphologizer', 'parser', 'ner', 'attribute_ruler', 'lemmatizer']
[2023-01-05 15:07:20,536] [INFO] Created vocabulary
[2023-01-05 15:07:21,604] [INFO] Added vectors: es_core_news_lg
[2023-01-05 15:07:22,522] [INFO] Finished initializing nlp object
[2023-01-05 15:07:35,046] [INFO] Initialized pipeline components: ['tok2vec', 'ner']
[38;5;2m✔ Initialized pipeline[0m
[1m
[38;5;4mℹ Pipeline: ['tok2vec', 'morphologizer', 'parser', 'ner',
'attribute_ruler', 'lemmatizer'][0m
[38;5;4mℹ Frozen components: ['morphologizer', 'parser', 'attribute_ruler',
'lemmatizer'][0m
[38;5;4mℹ Initial learn rate: 0.001[0m
E    #       LOSS

## Load Best Model

In [5]:
nlp_pipe= spacy.load('/src/ia2/ia2/models/all_pipe_t2v_ner/model-best')

In [6]:
nlp_pipe.pipe_names

['tok2vec', 'morphologizer', 'parser', 'ner', 'attribute_ruler', 'lemmatizer']

## Evaluate

In [7]:
! python -m spacy evaluate /src/ia2/ia2/models/all_pipe_t2v_ner/model-best  /resources/datasets/docbin/test.spacy --output /resources/ouputs/metrics/pipe/metics.json --gold-preproc

[38;5;4mℹ Using CPU[0m
[38;5;4mℹ To switch to GPU 0, use the option: --gpu-id 0[0m
[1m

TOK      100.00
TAG      -     
POS      -     
MORPH    -     
LEMMA    -     
UAS      -     
LAS      -     
NER P    30.52 
NER R    36.34 
NER F    33.18 
SENT P   -     
SENT R   -     
SENT F   -     
SPEED    5055  

[1m

                P       R       F
ARTÍCULO     7.44    8.48    7.93
LOC         55.28   64.03   59.33
DIRECCIÓN   64.71   80.73   71.84
PER         44.53   58.20   50.46

[38;5;2m✔ Saved results to /resources/ouputs/metrics/pipe/metics.json[0m
