## Span Labelling

In [191]:
import spacy

In [192]:
nlp = spacy.blank("en")
source_nlp = spacy.load("en_core_web_sm")

In [193]:
nlp.tokenizer.from_bytes(source_nlp.tokenizer.to_bytes())
nlp.add_pipe("tagger", source=source_nlp)

<spacy.pipeline.tagger.Tagger at 0x7fe648653dc0>

In [194]:
nlp.pipe_names

['tagger']

In [195]:
nlp.tokenizer.from_bytes(source_nlp.tokenizer.to_bytes())
nlp.add_pipe("attribute_ruler", source=source_nlp)

<spacy.pipeline.attributeruler.AttributeRuler at 0x7fe659d7e180>

In [196]:
nlp.pipe_names

['tagger', 'attribute_ruler']

In [197]:
nlp.add_pipe("lemmatizer", source=source_nlp)

<spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x7fe659d89b40>

In [198]:
from spacy.pipeline.spancat import DEFAULT_SPANCAT_MODEL
config = {
    "threshold": 0.5,
    "spans_key": "labeled_spans",
    "max_positive": None,
    "model": DEFAULT_SPANCAT_MODEL,
    "suggester": {"@misc": "spacy.ngram_suggester.v1", "sizes": [1, 2, 3]},
}
spancat = nlp.add_pipe("spancat", config=config)

In [199]:
nlp.pipe_names

['tagger', 'attribute_ruler', 'lemmatizer', 'spancat']

In [200]:
optimizer = nlp.create_optimizer()
print(optimizer)

<thinc.optimizers.Optimizer object at 0x7fe6593a4220>


In [201]:
spancat.add_label("SPANCAT")

1

In [202]:
nlp.analyze_pipes()

{'summary': {'tagger': {'assigns': ['token.tag'],
   'requires': [],
   'scores': ['tag_acc'],
   'retokenizes': False},
  'attribute_ruler': {'assigns': [],
   'requires': [],
   'scores': [],
   'retokenizes': False},
  'lemmatizer': {'assigns': ['token.lemma'],
   'requires': [],
   'scores': ['lemma_acc'],
   'retokenizes': False},
  'spancat': {'assigns': ['doc.spans'],
   'requires': [],
   'scores': ['spans_sc_f', 'spans_sc_p', 'spans_sc_r'],
   'retokenizes': False}},
 'problems': {'tagger': [],
  'attribute_ruler': [],
  'lemmatizer': [],
  'spancat': []},
 'attrs': {'doc.spans': {'assigns': ['spancat'], 'requires': []},
  'token.tag': {'assigns': ['tagger'], 'requires': []},
  'token.lemma': {'assigns': ['lemmatizer'], 'requires': []}}}

`spancat_bytes = spancat.to_bytes()`

In [203]:
optimizer = nlp.initialize()

ValueError: [E955] Can't find table(s) lemma_rules for language 'en' in spacy-lookups-data. Make sure you have the package installed or provide your own lookup tables if no default lookups are available for your language.

In [None]:
docs = nlp("Welcome to the Bank of China.")

In [None]:
docs.spans

In [None]:
from spacy.tokens import Span

In [None]:
print(docs.spans["labeled_spans"])

In [None]:
docs.spans["sc"] = docs.spans["labeled_spans"]
print(docs.spans["sc"])

In [None]:
from spacy import displacy

In [None]:
displacy.serve(docs, style="span")

# SanpGroup  

A group of arbitrary, potentially overlapping Span objects that all belong to the same Doc object

In [None]:
import spacy

In [None]:
nlp = spacy.blank("en")

In [None]:
doc = nlp("Their goi ng home")
spans = [doc[0:1], doc[1:3]]

In [None]:
from spacy.tokens import SpanGroup

In [None]:
group = SpanGroup(doc, name="errors", spans=spans, attrs={"annotator": "matt"})
doc.spans["errors"] = group

In [None]:
print(doc.spans)

In [None]:
print(type(doc.spans))

In [None]:
displacy.serve(docs, style="span")

## Displaying Labels

In [None]:
import spacy
from spacy import displacy

text = "Welcome to the Bank of China."

nlp = spacy.load("en_core_web_sm")
doc = nlp(text)

displacy.serve(doc, style="ent")

In [None]:
import spacy
from spacy import displacy
from spacy.tokens import Span

In [None]:
text = "Welcome to the Bank of China."

In [None]:
nlp = spacy.blank("en")
doc = nlp(text)

In [None]:
doc.spans["sc"] = [
    Span(doc, 3, 6, "ORG"), 
    Span(doc, 5, 6, "GPE"),
]

In [None]:
displacy.serve(doc, style="span")