In [1]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [2]:
# funkcja do wyświetlania podstawowych etykiet
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent.text+' - '+ent.label_+' - '+str(spacy.explain(ent.label_)))
    else:
        print('No named entities found.')

In [3]:
doc = nlp(u'May I go to Washington, DC next May to see the Washington Monument?')

show_ents(doc)

Washington, DC - GPE - Countries, cities, states
next May - DATE - Absolute or relative dates or periods
the Washington Monument - ORG - Companies, agencies, institutions, etc.


In [4]:
doc = nlp(u'Can I please borrow 500 dollars from you to buy some Microsoft stock?')

for ent in doc.ents:
    print(ent.text, ent.start, ent.end, ent.start_char, ent.end_char, ent.label_)

500 dollars 4 6 20 31 MONEY
Microsoft 11 12 53 62 ORG


## Podstawowe etykiety
<table>
<tr><td>`ent.text`</td><td>Oruginalny teks</td></tr>
<tr><td>`ent.label`</td><td>Wartość hashowa</td></tr>
<tr><td>`ent.label_`</td><td>Opis</td></tr>
<tr><td>`ent.start`</td><td>Początkowy indeks</td></tr>
<tr><td>`ent.end`</td><td>Końcowy index</td></tr>
<tr><td>`ent.start_char`</td><td>Znak początkowy</td></tr>
<tr><td>`ent.end_char`</td><td>Znak końcowy</td></tr>
</table>


___
## Dodawanie nazwy

In [11]:
doc = nlp(u'Honda to build a U.K. factory for $6 million')

show_ents(doc)

Honda - ORG - Companies, agencies, institutions, etc.
U.K. - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [12]:
from spacy.tokens import Span

# Get the hash value of the ORG entity label
ORG = doc.vocab.strings[u'ORG']  

# Create a Span for the new entity
new_ent = Span(doc, 0, 1, label=ORG)

# Add the entity to the existing Doc object
doc.ents = list(doc.ents) + [new_ent]

ValueError: [E103] Trying to set conflicting doc.ents: '(0, 1, 'ORG')' and '(0, 1, 'ORG')'. A token can only be part of one entity, so make sure the entities you're setting don't overlap.

## Frazy połączone z rzeczownikiem

In [14]:
doc = nlp(u"Autonomous cars shift insurance liability toward manufacturers.")

for chunk in doc.noun_chunks:
    print(chunk.text+' - '+chunk.root.text+' - '+chunk.root.dep_+' - '+chunk.root.head.text)

Autonomous cars - cars - nsubj - shift
insurance liability - liability - dobj - shift
manufacturers - manufacturers - pobj - toward
