In [None]:
import pandas as pd
import spacy
from spacy.pipeline import EntityRuler

# Load the transformer-based language model
nlp = spacy.load("en_core_web_trf")

# Define the NER patterns using the EntityRuler
patterns = [
    {"label": "LOCATION", "pattern": [{"lower": {"in": ["city", "town", "village"]}}]},
    {"label": "PRICE", "pattern": [{"lower": {"in": ["price", "cost", "budget"]}}]},
    {"label": "BHK", "pattern": [{"lower": {"in": ["bhk", "bedroom", "hall", "kitchen","bath","balcony"]}}]},
    {"label": "PROPERTY_TYPE", "pattern": [{"lower": {"in": ["house", "flat", "apartment"]}}]},
]

# Create the EntityRuler
ruler = EntityRuler(nlp)
ruler.add_patterns(patterns)

# Add the EntityRuler to the pipeline before the 'ner' component
nlp.add_pipe(ruler, before="ner")

# Sample Texts to be analyzed (assuming 'data' is a DataFrame with a 'Description' column)
data = ['Price']

# Initialize a dictionary to hold entity-wise results
entities_dict = {}

# Process each text and extract entities
for text in data['Description']:
    doc = nlp(text)

    # Collect entities and their labels in the dictionary
    for ent in doc.ents:
        label = ent.label_
        if label not in entities_dict:
            entities_dict[label] = []
        entities_dict[label].append(ent.text)

# Extract and categorize prices
price_categories = {
    "Cheap": [],
    "Moderate": [],
    "Expensive": []
}

for price in entities_dict.get("PRICE", []):
    price_value = float(price.strip('$').replace(',', ''))
    if price_value < 150000:
        price_categories["Cheap"].append(price)
    elif price_value >= 150000 and price_value <= 300000:
        price_categories["Moderate"].append(price)
    else:
        price_categories["Expensive"].append(price)

# Print the extracted entities and price categories
print("Extracted Entities:")
for label, entities in entities_dict.items():
    print(f"{label}: {', '.join(entities)}")

print("\nPrice Categories:")
for category, prices in price_categories.items():
    print(f"{category}: {', '.join(prices)}")
all_prices = [float(price.strip('$').replace(',', '')) for price in entities_dict.get("PRICE", [])]
average_price = sum(all_prices) / len(all_prices)
print(f"\nAverage Price: ${average_price:.2f}")


In [1]:
import spacy
from spacy.pipeline import EntityRuler

# Load the transformer-based language model
nlp = spacy.load("en_core_web_trf")

# Define the NER patterns using the EntityRuler
patterns = [
    {"label": "LOCATION", "pattern": [{"lower": {"in": ["city", "town", "village"]}}]},
    {"label": "PRICE", "pattern": [{"lower": {"in": ["price", "cost", "budget"]}}]},
    {"label": "BHK", "pattern": [{"lower": {"in": ["bhk", "bedroom", "hall", "kitchen"]}}]},
    {"label": "PROPERTY_TYPE", "pattern": [{"lower": {"in": ["house", "flat", "apartment"]}}]},
    {"label": "PERSON", "pattern": [{"ent_type": "PERSON"}]},
    {"label": "NORP", "pattern": [{"ent_type": "NORP"}]},
    {"label": "FAC", "pattern": [{"ent_type": "FAC"}]},
    {"label": "ORG", "pattern": [{"ent_type": "ORG"}]},
    {"label": "GPE", "pattern": [{"ent_type": "GPE"}]},
    {"label": "LOC", "pattern": [{"ent_type": "LOC"}]},
    {"label": "PRODUCT", "pattern": [{"ent_type": "PRODUCT"}]},
    {"label": "LANGUAGE", "pattern": [{"ent_type": "LANGUAGE"}]},
    {"label": "PERCENT", "pattern": [{"ent_type": "PERCENT"}]},
    {"label": "MONEY", "pattern": [{"ent_type": "MONEY"}]},
    {"label": "QUANTITY", "pattern": [{"ent_type": "QUANTITY"}]},
    {"label": "ORDINAL", "pattern": [{"ent_type": "ORDINAL"}]},
    {"label": "CARDINAL", "pattern": [{"ent_type": "CARDINAL"}]}
]
ruler = EntityRuler(nlp)
ruler.add_patterns(patterns)
nlp.add_pipe('entity_ruler', before="ner", config={"overwrite_ents": True})
nlp.get_pipe("entity_ruler").add_patterns(patterns)

# Text to be analyzed
text = """7 BHK Independent House for sale in Sagarpur, Delhi. This 7 BHK unit is available in Sagarpur and offers a premium lifestyle at the best price. This property is posted by owner and there is no brokerage involved. Contact now, for details. It is a desired purchase for any homebuyer in Sagarpur. The price of this Independent House is Rs 55.0 L. Residents in this project also pay monthly maintenance charges of Rs 0. The built-up area is 1500 Square feet. There are 7 bedrooms and 4 bathroom. This property is at a walking distance from Deen Dayal Upadhyay Hospital, Mata Chanan Devi Hospital, and Sa..."""
# Process the text and extract entities
doc = nlp(text)

# Initialize a dictionary to hold entity-wise results
entities_dict = {}

# Collect entities and their labels in the dictionary
for ent in doc.ents:
    label = ent.label_
    if label not in entities_dict:
        entities_dict[label] = []
    entities_dict[label].append(ent.text)

# Print the entities
for label, entities in entities_dict.items():
#     print(f"{label}: {', '.join(entities)}")
    print(f"{label}: {(entities)}")


  return torch._C._cuda_getDeviceCount() if nvml_count < 0 else nvml_count
2023-07-25 18:14:53.793001: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


BHK: ['BHK', 'BHK']
PROPERTY_TYPE: ['House', 'House']
GPE: ['Sagarpur', 'Delhi', 'Sagarpur', 'Sagarpur']
CARDINAL: ['7', '7', '4']
PRICE: ['price', 'price']
MONEY: ['Rs 55.0 L.', '0']
DATE: ['monthly']
QUANTITY: ['1500 Square feet']
ORG: ['Deen Dayal Upadhyay Hospital', 'Mata Chanan Devi Hospital']
