<a href="https://colab.research.google.com/github/LxYuan0420/nlp/blob/main/notebooks/Quickstart_with_zshot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install zshot

In [None]:
!spacy download en_core_web_lg

In [1]:
import spacy
from zshot import PipelineConfig, displacy
from zshot.linker import LinkerRegen, LinkerSMXM
from zshot.mentions_extractor import MentionsExtractorSpacy
from zshot.utils.data_models import Entity

nlp = spacy.load('en_core_web_lg')

In [2]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [13]:
nlp.remove_pipe("zshot")

('zshot', <zshot.zshot.Zshot at 0x7f988b2c4f90>)

In [14]:
# zero shot definition of entities
nlp_config = PipelineConfig(
    entities=[
        Entity(name="person", description="A given name is the part of a personal name that identifies a person, potentially with a middle name as well, and differentiates that person from the other members of a group who have a common surname"),
        Entity(name="company", description="The name of a company"),
        Entity(name="location", description="A physical location"),
        Entity(name="chemical compound", description="Any of a large class of chemical compounds in which one or more atoms of carbon are covalently linked to atoms of other elements, most commonly hydrogen, oxygen, or nitrogen"),
        Entity(name="country", description="A country is a distinct part of the world, such as a state, nation, or other political entity"),
        Entity(name="city", description="A capital or capital city is the municipality holding primary status in a country, state, province, department, or other subnational entity, usually as its seat of the government"),
        Entity(name="election", description="An election is a formal group decision-making process by which a population chooses an individual or multiple individuals to hold public office")
    ], 
    linker=LinkerSMXM()
)
# add zshot spaCy component to spaCy pipeline
nlp.add_pipe('zshot', config=nlp_config, last=True)

text = 'Madrid is the capital of Spain'

doc = nlp(text)


In [15]:
displacy.render(doc, style='ent', jupyter=True)

In [16]:
text2 = """KUALA LUMPUR: Former youth and sports minister Syed Saddiq Syed Abdul Rahman was ordered by the High Court to enter his defence on four charges of misappropriation of assets, money laundering and abetting in criminal breach of trust (CBT). The Malaysian Insight reported on Friday that MUDA’s supreme council is expected to meet soon to decide whether Syed Saddiq should stand in GE15."""
doc = nlp(text2)

displacy.render(doc, style='ent', jupyter=True)

In [21]:
text3 = ("""\
TAIPEI: Chinese veterinary experts have been invited to Taiwan, zoo officials said on Friday (Oct 28), for a rare visit between the two sides after a male panda that symbolised an era of warmer ties was moved into end-of-life care. \
Relations between China and Taiwan have been on ice since 2016 with Beijing severing official communications and government visits between the two sides scrapped. \
But Taiwan has made an exception after Tuan Tuan, a male panda that was gifted to the island by Beijing in 2008, fell ill in recent weeks and looks to be entering his twilight days.\
""")

In [23]:
doc = nlp(text3)
displacy.render(doc, style="ent", jupyter=True)