In [2]:
import spacy

nlp = spacy.load("en_core_web_sm")

def parse_query(query):
    doc = nlp(query)

    column_name = None
    comparison_operator = None
    value = None
    for token in doc:
        if token.pos_ == "NOUN":
            column_name = token.text
        elif token.pos_ == "ADJ":
            comparison_operator = token.text
        elif token.pos_ == "NUM":
            value = float(token.text)

    return column_name, comparison_operator, value

query = "I want all employees with salary greater than 1000"
column, comparison, value = parse_query(query)
print("Column:", column)
print("Comparison:", comparison)
print("Value:", value)


Column: salary
Comparison: greater
Value: 1000.0


In [4]:
import spacy

nlp = spacy.load("en_core_web_sm")

def parse_query(query):
    doc = nlp(query)

    conditions = []
    current_condition = {'column': None, 'comparison': None, 'value': None}
    comparison_operators = {'greater': '>', 'less': '<', 'equal': '=', 'starts': 'starts_with'}
    ignore_words = ['with', 'than', 'and', 'or']  # Words to ignore

    for token in doc:
        if token.text.lower() in ['and', 'or']:
            if current_condition['column'] is not None:
                conditions.append(current_condition.copy())
                current_condition = {'column': None, 'comparison': None, 'value': None}
        elif token.pos_ == "NOUN" and token.text.lower() not in ignore_words:
            current_condition['column'] = token.text
        elif token.pos_ == "ADJ":
            if token.text.lower() in comparison_operators:
                current_condition['comparison'] = comparison_operators[token.text.lower()]
        elif token.pos_ == "NUM":
            current_condition['value'] = float(token.text)
        elif token.pos_ == "PROPN" and current_condition['column'] == "name":
            if current_condition['value']:
                current_condition['value'] += " " + token.text
            else:
                current_condition['value'] = token.text

    if current_condition['column'] is not None:
        conditions.append(current_condition)

    return conditions

# Example usage
queries = [
    "I want all employees with salary greater than 1000 and age less than 50",
    "I want employee whose name starts with S",
    "Show me the employees with age equal to 30 or salary less than 2000"
]

for query in queries:
    print("Query:", query)
    conditions = parse_query(query)
    for condition in conditions:
        print("Column:", condition['column'])
        print("Comparison:", condition['comparison'])
        print("Value:", condition['value'])
        print()
    print()


Query: I want all employees with salary greater than 1000 and age less than 50
Column: salary
Comparison: >
Value: 1000.0

Column: age
Comparison: <
Value: 50.0


Query: I want employee whose name starts with S
Column: name
Comparison: None
Value: S


Query: Show me the employees with age equal to 30 or salary less than 2000
Column: age
Comparison: =
Value: 30.0

Column: salary
Comparison: <
Value: 2000.0




In [5]:

queries = [
    "I want employee whose name starts with S and age greater than 100"
]

for query in queries:
    print("Query:", query)
    conditions = parse_query(query)
    for condition in conditions:
        print("Column:", condition['column'])
        print("Comparison:", condition['comparison'])
        print("Value:", condition['value'])
        print()
    print()

Query: I want employee whose name starts with S and age greater than 100
Column: name
Comparison: None
Value: S

Column: age
Comparison: >
Value: 100.0


