In [15]:
# !pip install spacy
# !pip install scikit-learn
# !pip install numpy

!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:01[0m
Installing collected packages: en-core-web-sm
Successfully installed en-core-web-sm-3.8.0
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')


In [16]:
import spacy
from spacy.pipeline import EntityRuler
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
import numpy as np

# Load SpaCy model
nlp = spacy.load("en_core_web_sm")


In [17]:

# Define intents and training data
intent_training_data = {
    "get_payment": [
        "What is the payment of tenant John Doe?",
        "How much does tenant 123 owe?",
        "Show me the payment details for tenant 456."
    ],
    "landlord_creation_date": [
        "When was landlord Smith created?",
        "Tell me the creation date of landlord 789.",
        "How old is landlord 123?"
    ]
}
intents = list(intent_training_data.keys())

# Prepare training data for intent recognition
vectorizer = CountVectorizer()
X_train = vectorizer.fit_transform(
    [sentence for examples in intent_training_data.values() for sentence in examples]
)
y_train = np.array(
    [intent for intent, examples in intent_training_data.items() for _ in examples]
)

# Train a classifier (e.g., Naive Bayes)
classifier = MultinomialNB()
classifier.fit(X_train, y_train)

# Add custom entity extraction rules with EntityRuler
ruler = nlp.add_pipe("entity_ruler", before="ner")
patterns = [
    {"label": "TENANT_NAME", "pattern": [{"LOWER": "tenant"}, {"IS_TITLE": True}]},
    {"label": "TENANT_ID", "pattern": [{"LOWER": "tenant"}, {"IS_DIGIT": True}]},
    {"label": "LANDLORD_NAME", "pattern": [{"LOWER": "landlord"}, {"IS_TITLE": True}]},
    {"label": "LANDLORD_ID", "pattern": [{"LOWER": "landlord"}, {"IS_DIGIT": True}]},
]
ruler.add_patterns(patterns)


In [18]:
# Function to process user input
def process_input(text):
    # Predict intent
    X_test = vectorizer.transform([text])
    predicted_intent = classifier.predict(X_test)[0]

    # Extract entities
    doc = nlp(text)
    entities = {ent.label_: ent.text for ent in doc.ents}

    return {"intent": predicted_intent, "entities": entities}



In [19]:
# Example usage
user_input = "What is the payment of tenant 123?"
result = process_input(user_input)
print(result)

{'intent': np.str_('get_payment'), 'entities': {'TENANT_ID': 'tenant 123'}}
