In [3]:
import numpy
import spacy

nlp = spacy.load("en_core_web_sm")

In [6]:
def merge_currency(doc):
    """
    Merge currency symbols like $ with adjacent numbers into a single token.
    """
    with doc.retokenize() as retokenizer:
        spans = []
        for i, token in enumerate(doc[:-1]):  # Avoid index out of bounds
            if token.text in ["$", "€", "£"] and doc[i + 1].like_num:
                span = doc[token.i : token.i + 2]  # Create span for "$ 20,000"
                spans.append(span)
        for span in spans:
            retokenizer.merge(span)
def extract_price_range_dynamic(user_input):
    doc = nlp(user_input)
    merge_currency(doc)

    price_range = {"MinPrice": None, "MaxPrice": None}

    def parse_number(text):
        try:
            text = text.lower().replace("k", "000").replace("$", "").replace(",", "").replace(" ", "").strip("?.!")
            return float(text)
        except ValueError:
            return None

    lower_bound = None
    upper_bound = None
    in_range = False
    single_number_encountered = False  # Flag for single numbers without context

    for token in doc:
        value = parse_number(token.text)

        # Handle range keywords like "between"
        if token.text.lower() == "between":
            in_range = True
            continue

        if value is not None:  # Numeric token
            # Check immediate head and ancestors
            head = token.head.text.lower()
            ancestors = [ancestor.text.lower() for ancestor in token.ancestors]
            children = [child.text.lower() for child in token.children]  # Include children for better context

            # Handle "more than", "less than" by looking at ancestors and children
            if ("more" in ancestors or "above" in ancestors or "over" in ancestors) and "than" in ancestors:
                lower_bound = value
            elif ("less" in ancestors or "below" in ancestors or "under" in ancestors) and "than" in ancestors:
                upper_bound = value

            # Handle explicit range
            elif in_range:
                if lower_bound is None:
                    lower_bound = value
                else:
                    upper_bound = value
                    in_range = False

            # Handle standalone comparison keywords
            elif head in ["under", "below", "less"]:
                upper_bound = value
            elif head in ["over", "above", "more"]:
                lower_bound = value

            # Handle ranges connected by "to", "and"
            elif token.text in ["to", "and"]:
                if lower_bound is None:
                    lower_bound = value
                else:
                    upper_bound = value

            # Default catch for exact values
            elif head in ["for", "within", "around"]:
                upper_bound = value

            # If no context is found, treat the number as an upper bound by default
            else:
                single_number_encountered = True
                upper_bound = value

        # Handle qualitative terms like "cheap" or "luxury"
        elif token.lemma_ in ["cheap", "affordable"]:
            upper_bound = 20000
        elif token.lemma_ in ["luxury", "expensive"]:
            lower_bound = 50000

    # Swap bounds if lower > upper
    if lower_bound and upper_bound and lower_bound > upper_bound:
        lower_bound, upper_bound = upper_bound, lower_bound

    # If a single number was encountered without context, assign it as an upper bound
    if single_number_encountered and lower_bound is None and upper_bound is None:
        upper_bound = value

    price_range["MinPrice"] = lower_bound
    price_range["MaxPrice"] = upper_bound
    return price_range


In [7]:
examples = [
    "I'm looking for a car under $40,000.",
    "Do you have anything between $20,000 and $50,000?",
    "I want an affordable car.",
    "Show me luxury cars over $70,000.",
    "Give me a cheap vehicle under $15k.",
    "I need a car below $25,000 but above $10,000.",
    "I’m searching for something costing more than 50k.",
    "What can I get for less than $30,000?",
    "Can I get something for $40,000?",
    "Is $20k to $50k within my range?",
    "I am looking for a car that costs less than 50000$?",
    "My budget is 20,000$",
    "I prefer below 15,000",
    "Show me cars costing between $25,000 and $30,000.",
    "Cars over $50k and below $100k are my preference.",
]
for example in examples:
    result = extract_price_range_dynamic(example)
    print(f"Input: {example}")
    print(f"Extracted Price Range: {result}")
    print("-" * 50)

Input: I'm looking for a car under $40,000.
Extracted Price Range: {'MinPrice': None, 'MaxPrice': 40000.0}
--------------------------------------------------
Input: Do you have anything between $20,000 and $50,000?
Extracted Price Range: {'MinPrice': 20000.0, 'MaxPrice': 50000.0}
--------------------------------------------------
Input: I want an affordable car.
Extracted Price Range: {'MinPrice': None, 'MaxPrice': 20000}
--------------------------------------------------
Input: Show me luxury cars over $70,000.
Extracted Price Range: {'MinPrice': 70000.0, 'MaxPrice': None}
--------------------------------------------------
Input: Give me a cheap vehicle under $15k.
Extracted Price Range: {'MinPrice': None, 'MaxPrice': 15000.0}
--------------------------------------------------
Input: I need a car below $25,000 but above $10,000.
Extracted Price Range: {'MinPrice': 10000.0, 'MaxPrice': 25000.0}
--------------------------------------------------
Input: I’m searching for something costin