In [1]:
import sys
sys.path.append('..')

In [2]:
from src.query_parser import ParseQuery, Query_parser
import pandas as pd

parser=Query_parser()
print("Query parser initialized")

Query parser initialized


In [3]:
query1="46 year old male, knee surgery in Pune, 3 months old insurannce policy"
parsed1=parser.parse(query1)

print(f"Query: {query1}\n")
print(f"Results:")
print(f"  Age: {parsed1.age}")
print(f"  Gender: {parsed1.gender}")
print(f"  Procedure: {parsed1.procedure}")
print(f"  Location: {parsed1.location}")
print(f"  Policy Duration: {parsed1.policy_duration_months} months")
print(f"  Emergency: {parsed1.is_emergency}")

Query: 46 year old male, knee surgery in Pune, 3 months old insurannce policy

Results:
  Age: 46
  Gender: male
  Procedure: knee surgery
  Location: Pune
  Policy Duration: 3 months
  Emergency: False


In [4]:
query2 = "46M knee surgery Pune 3 month policy"
parsed2 = parser.parse(query2)

print(f"Query: {query2}\n")
print(f"Results:")
print(f"  Age: {parsed2.age}")
print(f"  Gender: {parsed2.gender}")
print(f"  Procedure: {parsed2.procedure}")
print(f"  Location: {parsed2.location}")
print(f"  Policy Duration: {parsed2.policy_duration_months} months")


Query: 46M knee surgery Pune 3 month policy

Results:
  Age: 46
  Gender: male
  Procedure: knee surgery
  Location: Pune
  Policy Duration: 46 months


In [5]:
# Testing various formats
test_queries = [
    "25F hip replacement Mumbai 8 month policy",
    "60 year old cardiac procedure Bangalore 1 month policy",
    "35 years male emergency knee surgery Delhi",
    "woman aged 42 cataract surgery Chennai policy 12 months",
    "28M accident spine surgery Hyderabad 6 month insurance",
    "50 year old female, cardiac surgery in Pune, policy of 4 months",
]

print("Testing multiple query formats:\n")
results = []

for query in test_queries:
    parsed = parser.parse(query)
    results.append({
        'Query': query[:50],
        'Age': parsed.age,
        'Gender': parsed.gender,
        'Procedure': parsed.procedure,
        'Location': parsed.location,
        'Duration': parsed.policy_duration_months,
        'Emergency': parsed.is_emergency
    })

df = pd.DataFrame(results)
print(df.to_string(index=False))


Testing multiple query formats:

                                             Query  Age Gender       Procedure  Location  Duration  Emergency
         25F hip replacement Mumbai 8 month policy   25 female hip replacement    Mumbai       8.0      False
60 year old cardiac procedure Bangalore 1 month po   60   None         cardiac Bangalore       1.0      False
        35 years male emergency knee surgery Delhi   35   male    knee surgery     Delhi       NaN       True
woman aged 42 cataract surgery Chennai policy 12 m   12 female        cataract   Chennai      12.0      False
28M accident spine surgery Hyderabad 6 month insur   28   male          spinal Hyderabad      28.0       True
50 year old female, cardiac surgery in Pune, polic   50 female         cardiac      Pune       4.0      False


In [6]:
# Test Validation
print("\n" + "="*80)
print("VALIDATION TESTS")
print("="*80 + "\n")

for query in test_queries[:3]:
    parsed = parser.parse(query)
    validation = parser.validate_parsed_query(parsed)
    missing = parser.get_missing_fields(parsed)
    
    print(f"Query: {query}")
    print(f"  Complete: {validation['is_complete']}")
    print(f"  Missing fields: {missing if missing else 'None'}")
    print()


VALIDATION TESTS

Query: 25F hip replacement Mumbai 8 month policy
  Complete: True
  Missing fields: None

Query: 60 year old cardiac procedure Bangalore 1 month policy
  Complete: True
  Missing fields: None

Query: 35 years male emergency knee surgery Delhi
  Complete: False
  Missing fields: ['policy_duration']



In [7]:
# Edge cases
edge_cases = [
    "knee surgery",  # Missing age, location, duration
    "46 years old",  # Missing procedure
    "surgery in Mumbai",  # Missing age, procedure type
    "3 month policy",  # Missing age, procedure, location
    "emergency",  # Only emergency keyword
]

print("="*80)
print("EDGE CASE TESTS")
print("="*80 + "\n")

for query in edge_cases:
    parsed = parser.parse(query)
    missing = parser.get_missing_fields(parsed)
    print(f"Query: '{query}'")
    print(f"  Extracted: age={parsed.age}, procedure={parsed.procedure}, "
          f"location={parsed.location}, duration={parsed.policy_duration_months}")
    print(f"  Missing: {', '.join(missing) if missing else 'None'}")
    print()

EDGE CASE TESTS

Query: 'knee surgery'
  Extracted: age=None, procedure=knee surgery, location=None, duration=None
  Missing: age, location, policy_duration

Query: '46 years old'
  Extracted: age=46, procedure=None, location=None, duration=None
  Missing: procedure, location, policy_duration

Query: 'surgery in Mumbai'
  Extracted: age=None, procedure=None, location=Mumbai, duration=None
  Missing: age, procedure, policy_duration

Query: '3 month policy'
  Extracted: age=3, procedure=None, location=None, duration=3
  Missing: procedure, location

Query: 'emergency'
  Extracted: age=None, procedure=emergency, location=None, duration=None
  Missing: age, location, policy_duration



In [8]:
# Test procedure variants
procedure_queries = [
    "knee surgery",
    "knee operation",
    "knee replacement",
    "knee arthroscopy",
    "hip replacement",
    "hip surgery",
    "cardiac procedure",
    "heart surgery",
    "bypass surgery",
]

print("\n" + "="*80)
print("PROCEDURE VARIANT TESTS")
print("="*80 + "\n")

for query in procedure_queries:
    parsed = parser.parse(query)
    print(f"'{query}' → Procedure: {parsed.procedure}")



PROCEDURE VARIANT TESTS

'knee surgery' → Procedure: knee surgery
'knee operation' → Procedure: knee surgery
'knee replacement' → Procedure: knee surgery
'knee arthroscopy' → Procedure: knee surgery
'hip replacement' → Procedure: hip replacement
'hip surgery' → Procedure: hip replacement
'cardiac procedure' → Procedure: cardiac
'heart surgery' → Procedure: cardiac
'bypass surgery' → Procedure: cardiac


In [9]:
# Test Location Recognition
location_queries = [
    "surgery in Pune",
    "surgery at Mumbai",
    "surgery from Bangalore",
    "surgery Delhi",  # Without preposition
    "Hyderabad surgery",
]

print("\n" + "="*80)
print("LOCATION TESTS")
print("="*80 + "\n")

for query in location_queries:
    parsed = parser.parse(query)
    print(f"'{query}' → Location: {parsed.location}")


LOCATION TESTS

'surgery in Pune' → Location: Pune
'surgery at Mumbai' → Location: Mumbai
'surgery from Bangalore' → Location: Bangalore
'surgery Delhi' → Location: Delhi
'Hyderabad surgery' → Location: Hyderabad


In [10]:
# Test Emergency Detection
emergency_queries = [
    "emergency knee surgery",
    "urgent cardiac procedure",
    "accident spine surgery",
    "critical hip replacement",
    "immediate surgery needed",
    "knee surgery",  # Non-emergency
]

print("\n" + "="*80)
print("EMERGENCY DETECTION TESTS")
print("="*80 + "\n")

for query in emergency_queries:
    parsed = parser.parse(query)
    print(f"'{query}' → Emergency: {parsed.is_emergency}")



EMERGENCY DETECTION TESTS

'emergency knee surgery' → Emergency: True
'urgent cardiac procedure' → Emergency: True
'accident spine surgery' → Emergency: True
'critical hip replacement' → Emergency: True
'immediate surgery needed' → Emergency: True
'knee surgery' → Emergency: False


In [11]:
# Create comprehensive test report
print("\n" + "="*80)
print("COMPREHENSIVE TEST REPORT")
print("="*80 + "\n")

all_test_queries = test_queries + edge_cases

total = len(all_test_queries)
successful_age = sum(1 for q in all_test_queries if parser.parse(q).age is not None)
successful_procedure = sum(1 for q in all_test_queries if parser.parse(q).procedure is not None)
successful_location = sum(1 for q in all_test_queries if parser.parse(q).location is not None)
successful_duration = sum(1 for q in all_test_queries if parser.parse(q).policy_duration_months is not None)

print(f"Total queries tested: {total}")
print(f"Age extraction rate: {successful_age}/{total} ({successful_age/total*100:.1f}%)")
print(f"Procedure extraction rate: {successful_procedure}/{total} ({successful_procedure/total*100:.1f}%)")
print(f"Location extraction rate: {successful_location}/{total} ({successful_location/total*100:.1f}%)")
print(f"Duration extraction rate: {successful_duration}/{total} ({successful_duration/total*100:.1f}%)")



COMPREHENSIVE TEST REPORT

Total queries tested: 11
Age extraction rate: 8/11 (72.7%)
Procedure extraction rate: 8/11 (72.7%)
Location extraction rate: 7/11 (63.6%)
Duration extraction rate: 6/11 (54.5%)


In [12]:
#Save sample parsed queries
import json
sample_results=[]

for query in test_queries:
    parsed =parser.parse(query)
    sample_results.append(parsed.model_dump())

with open("../data/processed/sample_parsed_queries.json","w") as f:
    json.dump(sample_results,f,indent=2)

print("\nSample queries saved to /data/processed/sample_parsed_queries.json")        





Sample queries saved to /data/processed/sample_parsed_queries.json
