In [None]:
import spacy
from spacy.matcher import PhraseMatcher
from spacy.tokens import Span
import json
# Load spaCy's pre-trained model
nlp = spacy.load("en_core_web_sm")

# Domain knowledge base
with open("domain_knowledge.json", "r") as file:
    domain_knowledge = json.load(file)

# Create a PhraseMatcher object
matcher = PhraseMatcher(nlp.vocab, attr="LOWER")

# Add domain knowledge patterns to the matcher
for category, keywords in domain_knowledge.items():
    patterns = [nlp.make_doc(keyword) for keyword in keywords]
    matcher.add(category, patterns)

# Function to extract entities using spaCy
def extract_entities(text):
    doc = nlp(text)
    matches = matcher(doc)
    entities = []

    for match_id, start, end in matches:
        span = doc[start:end]
        category = nlp.vocab.strings[match_id]
        entities.append((span.text, category))

    return entities

# Example usage
text = "We’re expanding into North America and need a platform with strong analytics. CompetitorX offers a better discount."
entities = extract_entities(text)
print("Extracted Entities:", entities)

Extracted Entities: [('North America', 'regions'), ('analytics', 'features'), ('CompetitorX', 'competitors'), ('discount', 'pricing_keywords')]
