In [4]:
from flair.models import SequenceTagger
from flair.data import Sentence
tagger = SequenceTagger.load('ner')
text = "Apple Inc. is a technology company headquartered in California."
sentence = Sentence(text)
tagger.predict(sentence)
for entity in sentence.get_spans('ner'):
    print(f"Entity: {entity.text}, Type: {entity.tag}")

2023-07-28 10:12:28,612 SequenceTagger predicts: Dictionary with 20 tags: <unk>, O, S-ORG, S-MISC, B-PER, E-PER, S-LOC, B-ORG, E-ORG, I-PER, S-PER, B-MISC, I-MISC, E-MISC, I-ORG, B-LOC, E-LOC, I-LOC, <START>, <STOP>
Entity: Apple Inc., Type: ORG
Entity: California, Type: LOC


In [22]:
import spacy
from spacy.pipeline import EntityRuler
# Load the transformer-based language model
nlp = spacy.load("en_core_web_trf")
patterns = [
    {"label": "LOCATION", "pattern": [{"lower": {"in": ["city", "town", "village"]}}]},
    {"label": "PRICE", "pattern": [{"lower": {"in": ["price", "cost", "budget"]}}]},
    {"label": "BHK", "pattern": [{"lower": {"in": ["bhk", "bedroom", "hall", "kitchen"]}}]},
    {"label": "PROPERTY_TYPE", "pattern": [{"lower": {"in": ["house", "flat", "apartment"]}}]},
    {"label": "PERSON", "pattern": [{"ent_type": "PERSON"}]},
    {"label": "NORP", "pattern": [{"ent_type": "NORP"}]},
    {"label": "FAC", "pattern": [{"ent_type": "FAC"}]},
    {"label": "ORG", "pattern": [{"ent_type": "ORG"}]},
    {"label": "GPE", "pattern": [{"ent_type": "GPE"}]},
    {"label": "LOC", "pattern": [{"ent_type": "LOC"}]},
    {"label": "PRODUCT", "pattern": [{"ent_type": "PRODUCT"}]},
    {"label": "LANGUAGE", "pattern": [{"ent_type": "LANGUAGE"}]},
    {"label": "PERCENT", "pattern": [{"ent_type": "PERCENT"}]},
    {"label": "MONEY", "pattern": [{"ent_type": "MONEY"}]},
    {"label": "QUANTITY", "pattern": [{"ent_type": "QUANTITY"}]},
    {"label": "ORDINAL", "pattern": [{"ent_type": "ORDINAL"}]},
    {"label": "CARDINAL", "pattern": [{"ent_type": "CARDINAL"}]}
]
ruler = EntityRuler(nlp)
ruler.add_patterns(patterns)
nlp.add_pipe('entity_ruler', before="ner", config={"overwrite_ents": True})
nlp.get_pipe("entity_ruler").add_patterns(patterns)
text = """7 BHK Independent House for sale in Sagarpur, Delhi. This 7 BHK unit is available in Sagarpur and offers a premium lifestyle at the best price. This property is posted by owner and there is no brokerage involved. Contact now, for details. It is a desired purchase for any homebuyer in Sagarpur. The price of this Independent House is Rs 55.0 L. Residents in this project also pay monthly maintenance charges of Rs 0. The built-up area is 1500 Square feet. There are 7 bedrooms and 4 bathroom. This property is at a walking distance from Deen Dayal Upadhyay Hospital, Mata Chanan Devi Hospital, and Sa..."""
doc = nlp(text)
entities_dict = {}
for ent in doc.ents:
    label = ent.label_
    if label not in entities_dict:
        entities_dict[label] = []
    entities_dict[label].append(ent.text)
for label, entities in entities_dict.items():
    print(f"{label}: {(entities)}")

BHK: ['BHK', 'BHK']
PROPERTY_TYPE: ['House', 'House']
GPE: ['Sagarpur', 'Delhi', 'Sagarpur', 'Sagarpur']
CARDINAL: ['7', '7', '4']
PRICE: ['price', 'price']
MONEY: ['Rs 55.0 L.', '0']
DATE: ['monthly']
QUANTITY: ['1500 Square feet']
ORG: ['Deen Dayal Upadhyay Hospital', 'Mata Chanan Devi Hospital']


In [7]:
import spacy

nlp = spacy.load('en_core_web_sm')
text = """7 BHK Independent House for sale in Sagarpur, Delhi. This 7 BHK unit is available in Sagarpur and offers a premium lifestyle at the best price. This property is posted by owner and there is no brokerage involved. Contact now, for details. It is a desired purchase for any homebuyer in Sagarpur. The price of this Independent House is Rs 55.0 L. Residents in this project also pay monthly maintenance charges of Rs 0. The built-up area is 1500 Square feet. There are 7 bedrooms and 4 bathroom. This property is at a walking distance from Deen Dayal Upadhyay Hospital, Mata Chanan Devi Hospital, and Sa..."""

doc = nlp(text)
entities = []
for ent in doc.ents:
    entities.append((ent.text, ent.label_))
for entity, label in entities:
    print(f"Entity: {entity}, Label: {label}")

Entity: 7, Label: CARDINAL
Entity: BHK Independent House, Label: ORG
Entity: Sagarpur, Label: GPE
Entity: Delhi, Label: GPE
Entity: BHK, Label: ORG
Entity: Sagarpur, Label: GPE
Entity: Sagarpur, Label: GPE
Entity: this Independent House, Label: ORG
Entity: monthly, Label: DATE
Entity: 1500 Square feet, Label: QUANTITY
Entity: 7, Label: CARDINAL
Entity: 4, Label: CARDINAL
Entity: Deen Dayal Upadhyay Hospital, Label: ORG
Entity: Mata Chanan Devi Hospital, Label: ORG


In [None]:
custom_patterns = [
    # Location names
    {"label": "LOCATION", "pattern": [{"lower": city.lower()} for city in ["New York", "London", "Mumbai"]]},
    {"label": "LOCATION", "pattern": [{"lower": state.lower()} for state in ["California", "Texas", "Ontario"]]},
    {"label": "LOCATION", "pattern": [{"lower": country.lower()} for country in ["United States", "United Kingdom", "India"]]},
    {"label": "LOCATION", "pattern": [{"lower": neighborhood.lower()} for neighborhood in ["Downtown", "Chelsea", "Beverly Hills"]]},
    # Property Types
    {"label": "PROPERTY_TYPE", "pattern": "Flat"},
    {"label": "PROPERTY_TYPE", "pattern": "House"},
    {"label": "PROPERTY_TYPE", "pattern": "Apartment"},
    {"label": "PROPERTY_TYPE", "pattern": "Condo"},
    {"label": "PROPERTY_TYPE", "pattern": "Villa"},
    {"label": "PROPERTY_TYPE", "pattern": "Townhouse"},
    {"label": "PROPERTY_TYPE", "pattern": "Bungalow"},
    # BHK patterns converted into a list of dictionaries
    {"label": "BHK", "pattern": [{"lower": "1 bhk"}]},
    {"label": "BHK", "pattern": [{"lower": "2 bhk"}]},
    {"label": "BHK", "pattern": [{"lower": "3 bhk"}]},
    {"label": "BHK", "pattern": [{"lower": "4 bhk"}]},
    {"label": "BHK", "pattern": [{"lower": "5 bhk"}]},
    {"label": "BHK", "pattern": [{"lower": {"REGEX": r"\d+\sBHK"}}]},
    # For other BHK configurations like "6 BHK", "7 BHK", etc.
    {"label": "PRICE", "pattern": [{"lower": term.lower()} for term in ["Price", "Cost", "Rent", "Selling Price", "Asking Price", "Budget"]]},
    {"label": "SQFT", "pattern": [{"lower": term.lower()} for term in ["Area", "Size", "Square Footage", "Built-up Area", "Carpet Area"]]},
    # Descriptions
    {"label": "DESCRIPTION", "pattern": [{"lower": term.lower()} for term in ["Description", "Details", "Features", "Amenities", "Specifications"]]}
]

In [21]:
import spacy
from spacy.pipeline import EntityRuler

nlp = spacy.load("en_core_web_sm")

# Remove existing "custom_ner" component if it exists
if "custom_ner" in nlp.pipe_names:
    nlp.remove_pipe("custom_ner")

ruler = EntityRuler(nlp)

patterns = [
    {"label": "LOCATION", "pattern": [{"lower": {"in": ["city", "town", "village"]}}]},
    {"label": "PRICE", "pattern": [{"lower": {"in": ["price", "cost", "budget"]}}]},
    {"label": "BHK", "pattern": [{"lower": {"in": ["bhk", "bedroom", "hall", "kitchen"]}}]},
    {"label": "PROPERTY_TYPE", "pattern": [{"lower": {"in": ["house", "flat", "apartment"]}}]},
    {"label": "PERSON", "pattern": [{"ent_type": "PERSON"}]},
    {"label": "NORP", "pattern": [{"ent_type": "NORP"}]},
    {"label": "FAC", "pattern": [{"ent_type": "FAC"}]},
    {"label": "ORG", "pattern": [{"ent_type": "ORG"}]},
    {"label": "GPE", "pattern": [{"ent_type": "GPE"}]},
    {"label": "LOC", "pattern": [{"ent_type": "LOC"}]},
    {"label": "PRODUCT", "pattern": [{"ent_type": "PRODUCT"}]},
    {"label": "LANGUAGE", "pattern": [{"ent_type": "LANGUAGE"}]},
    {"label": "PERCENT", "pattern": [{"ent_type": "PERCENT"}]},
    {"label": "MONEY", "pattern": [{"ent_type": "MONEY"}]},
    {"label": "QUANTITY", "pattern": [{"ent_type": "QUANTITY"}]},
    {"label": "ORDINAL", "pattern": [{"ent_type": "ORDINAL"}]},
    {"label": "CARDINAL", "pattern": [{"ent_type": "CARDINAL"}]}
]
ruler.add_patterns(custom_patterns)

@spacy.Language.factory("custom_ner")
def custom_ner_component(nlp, name):
    return ruler

nlp.add_pipe("custom_ner", before="ner")

text = """7 BHK Independent House for sale in Sagarpur, Delhi. This 7 BHK unit is available in Sagarpur and offers a premium lifestyle at the best price. This property is posted by owner and there is no brokerage involved. Contact now, for details. It is a desired purchase for any homebuyer in Sagarpur. The price of this Independent House is Rs 55.0 L. Residents in this project also pay monthly maintenance charges of Rs 0. The built-up area is 1500 Square feet. There are 7 bedrooms and 4 bathroom. This property is at a walking distance from Deen Dayal Upadhyay Hospital, Mata Chanan Devi Hospital, and Sa..."""

doc = nlp(text)

for ent in doc.ents:
    print(f"Entity: {ent.text}, Label: {ent.label_}")


ValueError: [E004] Can't set up pipeline component: a factory for 'custom_ner' already exists. Existing factory: <function custom_ner_component at 0x7f6b3d321900>. New factory: <function custom_ner_component at 0x7f6b79e50700>

In [6]:
import spacy
from spacy.tokens import Doc
from spacy.training import Example

nlp = spacy.load("en_core_web_sm")
doc = Doc(nlp.vocab, words=["I", "like", "stuff"])
gold_dict = {"tags": ["NOUN", "VERB", "NOUN"]}
example = Example.from_dict(doc, gold_dict)
print(example)

{'doc_annotation': {'cats': {}, 'entities': ['O', 'O', 'O'], 'spans': {}, 'links': {}}, 'token_annotation': {'ORTH': ['I', 'like', 'stuff'], 'SPACY': [True, True, True], 'TAG': ['NOUN', 'VERB', 'NOUN'], 'LEMMA': ['', '', ''], 'POS': ['', '', ''], 'MORPH': ['', '', ''], 'HEAD': [0, 1, 2], 'DEP': ['', '', ''], 'SENT_START': [1, 0, 0]}}


In [11]:
doc = nlp("Laura flew to Silicon Valley.")
gold_dict = {"entities": ["U-PERS", "O", "O", "B-LOC", "L-LOC"]}
example = Example.from_dict(doc, gold_dict)
print(example)

{'doc_annotation': {'cats': {}, 'entities': ['U-PERS', 'O', 'O', 'B-LOC', 'L-LOC', 'O'], 'spans': {}, 'links': {}}, 'token_annotation': {'ORTH': ['Laura', 'flew', 'to', 'Silicon', 'Valley', '.'], 'SPACY': [True, True, True, True, False, False], 'TAG': ['', '', '', '', '', ''], 'LEMMA': ['', '', '', '', '', ''], 'POS': ['', '', '', '', '', ''], 'MORPH': ['', '', '', '', '', ''], 'HEAD': [0, 1, 2, 3, 4, 5], 'DEP': ['', '', '', '', '', ''], 'SENT_START': [1, 0, 0, 0, 0, 0]}}


In [13]:
doc = nlp("Laura flew to Silicon Valley.")
gold_dict = {"entities": [(0, 5, "PERSON"), (14, 28, "LOC")]}
example = Example.from_dict(doc, gold_dict)
print(example)

{'doc_annotation': {'cats': {}, 'entities': ['U-PERSON', 'O', 'O', 'B-LOC', 'L-LOC', 'O'], 'spans': {}, 'links': {}}, 'token_annotation': {'ORTH': ['Laura', 'flew', 'to', 'Silicon', 'Valley', '.'], 'SPACY': [True, True, True, True, False, False], 'TAG': ['', '', '', '', '', ''], 'LEMMA': ['', '', '', '', '', ''], 'POS': ['', '', '', '', '', ''], 'MORPH': ['', '', '', '', '', ''], 'HEAD': [0, 1, 2, 3, 4, 5], 'DEP': ['', '', '', '', '', ''], 'SENT_START': [1, 0, 0, 0, 0, 0]}}


In [15]:
doc = nlp("I'm pretty happy about that!")
gold_dict = {"cats": {"POSITIVE": 1.0, "NEGATIVE": 0.0}}
example = Example.from_dict(doc, gold_dict)
print(example)

{'doc_annotation': {'cats': {'POSITIVE': 1.0, 'NEGATIVE': 0.0}, 'entities': ['O', 'O', 'O', 'O', 'O', 'O', 'O'], 'spans': {}, 'links': {}}, 'token_annotation': {'ORTH': ['I', "'m", 'pretty', 'happy', 'about', 'that', '!'], 'SPACY': [False, True, True, True, True, False, False], 'TAG': ['', '', '', '', '', '', ''], 'LEMMA': ['', '', '', '', '', '', ''], 'POS': ['', '', '', '', '', '', ''], 'MORPH': ['', '', '', '', '', '', ''], 'HEAD': [0, 1, 2, 3, 4, 5, 6], 'DEP': ['', '', '', '', '', '', ''], 'SENT_START': [1, 0, 0, 0, 0, 0, 0]}}


In [17]:
doc = nlp("Russ Cochran his reprints include EC Comics.")
gold_dict = {"entities": [(0, 12, "PERSON")],
             "links": {(0, 12): {"Q7381115": 1.0, "Q2146908": 0.0}},
             "sent_starts": [1, -1, -1, -1, -1, -1, -1, -1]}
example = Example.from_dict(doc, gold_dict)
print(example)

{'doc_annotation': {'cats': {}, 'entities': ['B-PERSON', 'L-PERSON', 'O', 'O', 'O', 'O', 'O', 'O'], 'spans': {}, 'links': {(0, 12): {'Q7381115': 1.0}}}, 'token_annotation': {'ORTH': ['Russ', 'Cochran', 'his', 'reprints', 'include', 'EC', 'Comics', '.'], 'SPACY': [True, True, True, True, True, True, False, False], 'TAG': ['', '', '', '', '', '', '', ''], 'LEMMA': ['', '', '', '', '', '', '', ''], 'POS': ['', '', '', '', '', '', '', ''], 'MORPH': ['', '', '', '', '', '', '', ''], 'HEAD': [0, 1, 2, 3, 4, 5, 6, 7], 'DEP': ['', '', '', '', '', '', '', ''], 'SENT_START': [1, 0, 0, 0, 0, 0, 0, 0]}}


In [18]:
import spacy
nlp = spacy.load("en_core_web_lg")  
text = "Laura flew to Silicon Valley. Elon Musk is the CEO of Tesla."
doc = nlp(text)
entities = [(ent.text, ent.label_) for ent in doc.ents]
for entity, label in entities:
    print(f"Entity: {entity}, Label: {label}")




Entity: Laura, Label: PERSON
Entity: Silicon Valley, Label: LOC
Entity: Elon Musk, Label: PERSON
Entity: Tesla, Label: ORG


In [19]:
import spacy
nlp = spacy.load("en_core_web_sm")
text = "Laura flew to Silicon Valley. Elon Musk is the CEO of Tesla."
doc = nlp(text)
entities = [(ent.text, ent.label_) for ent in doc.ents]
for entity, label in entities:
    print(f"Entity: {entity}, Label: {label}")

Entity: Laura, Label: PERSON
Entity: Silicon Valley, Label: LOC
Entity: Elon Musk, Label: PERSON
Entity: Tesla, Label: ORG


In [21]:
import spacy
import spacy_transformers
nlp = spacy_transformers.load_model("bert-base-uncased")
text = "Give me the interaction of the ner models using this methods."
doc = nlp(text)
for token in doc:
    print(token.text, token.vector[:5])

AttributeError: module 'spacy_transformers' has no attribute 'load_model'