In [1]:
import sys
sys.path.append("..")

In [2]:
%pip install rapidfuzz 

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [3]:
from src.query_parser import Query_parser
import pandas as pd


In [4]:
parser=Query_parser()
print("Enhanced query parser initialized.")
print(f"Known locations: {len(parser.known_locations)}")
print(f"Known procedures: {len(parser.known_procedures)}")

Enhanced query parser initialized.
Known locations: 47
Known procedures: 9


In [5]:
typo_queries = [
    "46M knee surgery Pume 3 month",  # Pume → Pune
    "35F hip surgery Mumbay 6 month",  # Mumbay → Mumbai
    "40M cardiac Bangalor 4 month",  # Bangalor → Bangalore
    "28F cataract Chenai 8 month",  # Chenai → Chennai
    "50M spine surgery Deli 2 month",  # Deli → Delhi
]

print("\n" + "="*80)
print("FUZZY MATCHING TEST - Location Typos")
print("="*80 + "\n")

for query in typo_queries:
    parsed = parser.parse(query)
    print(f"Query: '{query}'")
    print(f"  → Extracted location: {parsed.location}")
    print()


FUZZY MATCHING TEST - Location Typos

Query: '46M knee surgery Pume 3 month'
  → Extracted location: None

Query: '35F hip surgery Mumbay 6 month'
  → Extracted location: None

Query: '40M cardiac Bangalor 4 month'
  → Extracted location: None

Query: '28F cataract Chenai 8 month'
  → Extracted location: None

Query: '50M spine surgery Deli 2 month'
  → Extracted location: None



In [6]:
# Cell 3: Test Fuzzy Matching - Typos in Procedures
procedure_typos = [
    "46M nee surgery Pune 3 month",  # nee → knee
    "35F hip replcement Mumbai 6 month",  # replcement → replacement
    "40M cardac surgery Bangalore 4 month",  # cardac → cardiac
]

print("="*80)
print("FUZZY MATCHING TEST - Procedure Typos")
print("="*80 + "\n")

for query in procedure_typos:
    parsed = parser.parse(query)
    print(f"Query: '{query}'")
    print(f"  → Extracted procedure: {parsed.procedure}")
    print()

FUZZY MATCHING TEST - Procedure Typos

Query: '46M nee surgery Pune 3 month'
  → Extracted procedure: spinal

Query: '35F hip replcement Mumbai 6 month'
  → Extracted procedure: hip replacement

Query: '40M cardac surgery Bangalore 4 month'
  → Extracted procedure: cardiac



In [7]:
# Cell 4: Test New Procedures
new_procedure_queries = [
    "55M appendix surgery Delhi 5 month policy",
    "32F hernia repair Mumbai 4 month policy",
    "28F maternity delivery Pune 12 month policy",
    "45M gallbladder removal Bangalore 6 month policy",
    "38M ACL surgery Chennai 3 month policy",
]

print("="*80)
print("NEW PROCEDURES TEST")
print("="*80 + "\n")

results = []
for query in new_procedure_queries:
    parsed = parser.parse(query)
    results.append({
        'Query': query[:45] + '...',
        'Age': parsed.age,
        'Procedure': parsed.procedure,
        'Location': parsed.location,
        'Duration': parsed.policy_duration_months
    })

df = pd.DataFrame(results)
print(df.to_string(index=False))

NEW PROCEDURES TEST

                                           Query  Age    Procedure  Location  Duration
    55M appendix surgery Delhi 5 month policy...   55     appendix     Delhi        55
      32F hernia repair Mumbai 4 month policy...   32       hernia    Mumbai         4
  28F maternity delivery Pune 12 month policy...   28    maternity      Pune        12
45M gallbladder removal Bangalore 6 month pol...   45  gallbladder Bangalore        45
       38M ACL surgery Chennai 3 month policy...   38 knee surgery   Chennai        38


In [8]:
new_location_queries = [
    "46M knee surgery Coimbatore 3 month",
    "35F cardiac Jaipur 6 month",
    "40M spine surgery Lucknow 4 month",
    "28F cataract Nagpur 8 month",
    "50M hip replacement Indore 2 month",
]

print("\n" + "="*80)
print("NEW LOCATIONS TEST")
print("="*80 + "\n")

for query in new_location_queries:
    parsed = parser.parse(query)
    print(f"'{query}' → Location: {parsed.location}")



NEW LOCATIONS TEST

'46M knee surgery Coimbatore 3 month' → Location: Coimbatore
'35F cardiac Jaipur 6 month' → Location: Jaipur
'40M spine surgery Lucknow 4 month' → Location: Lucknow
'28F cataract Nagpur 8 month' → Location: Nagpur
'50M hip replacement Indore 2 month' → Location: Indore


In [9]:
complex_queries = [
    "46 year old male patient needs emergency knee surgery in Pune, has 3 month old policy",
    "Female aged 35, urgent cardiac procedure, Mumbai location, insurance policy 6 months",
    "Emergency! 28M accident, spine operation needed in Delhi, policy active for 1 month",
    "Patient: 50F, planned hip replacement at Bangalore hospital, policy duration 8 months",
]

print("\n" + "="*80)
print("COMPLEX QUERY TEST")
print("="*80 + "\n")

for query in complex_queries:
    parsed = parser.parse(query)
    validation = parser.validate_parsed_query(parsed)
    print(f"Query: {query[:60]}...")
    print(f"  Age: {parsed.age}, Gender: {parsed.gender}")
    print(f"  Procedure: {parsed.procedure}, Location: {parsed.location}")
    print(f"  Duration: {parsed.policy_duration_months} months")
    print(f"  Emergency: {parsed.is_emergency}")
    print(f"  Complete: {validation['is_complete']}")
    print()


COMPLEX QUERY TEST

Query: 46 year old male patient needs emergency knee surgery in Pun...
  Age: 46, Gender: male
  Procedure: knee surgery, Location: Pune
  Duration: 3 months
  Emergency: True
  Complete: True

Query: Female aged 35, urgent cardiac procedure, Mumbai location, i...
  Age: 6, Gender: female
  Procedure: cardiac, Location: Mumbai
  Duration: 6 months
  Emergency: True
  Complete: True

Query: Emergency! 28M accident, spine operation needed in Delhi, po...
  Age: 28, Gender: male
  Procedure: cataract, Location: Delhi
  Duration: 28 months
  Emergency: True
  Complete: True

Query: Patient: 50F, planned hip replacement at Bangalore hospital,...
  Age: 50, Gender: female
  Procedure: hip replacement, Location: Bangalore
  Duration: 8 months
  Emergency: False
  Complete: True



In [10]:
import time

test_queries = typo_queries + new_procedure_queries + new_location_queries + complex_queries

print("="*80)
print("PERFORMANCE TEST")
print("="*80 + "\n")

start_time = time.time()
for query in test_queries:
    parser.parse(query)
end_time = time.time()

avg_time = (end_time - start_time) / len(test_queries) * 1000  # Convert to ms

print(f"Total queries: {len(test_queries)}")
print(f"Total time: {end_time - start_time:.3f} seconds")
print(f"Average time per query: {avg_time:.2f} ms")
print(f"Queries per second: {len(test_queries) / (end_time - start_time):.1f}")


PERFORMANCE TEST

Total queries: 19
Total time: 0.007 seconds
Average time per query: 0.39 ms
Queries per second: 2545.5


In [11]:
all_test_queries = typo_queries + new_procedure_queries + new_location_queries + complex_queries

print("\n" + "="*80)
print("COMPREHENSIVE ACCURACY REPORT")
print("="*80 + "\n")

total = len(all_test_queries)
metrics = {
    'age': 0,
    'gender': 0,
    'procedure': 0,
    'location': 0,
    'duration': 0,
    'complete': 0
}

for query in all_test_queries:
    parsed = parser.parse(query)
    validation = parser.validate_parsed_query(parsed)
    
    if parsed.age: metrics['age'] += 1
    if parsed.gender: metrics['gender'] += 1
    if parsed.procedure: metrics['procedure'] += 1
    if parsed.location: metrics['location'] += 1
    if parsed.policy_duration_months: metrics['duration'] += 1
    if validation['is_complete']: metrics['complete'] += 1

print(f"Total test queries: {total}\n")
for field, count in metrics.items():
    percentage = (count / total) * 100
    print(f"{field.capitalize():15} : {count:2}/{total} ({percentage:5.1f}%)")


COMPREHENSIVE ACCURACY REPORT

Total test queries: 19

Age             : 19/19 (100.0%)
Gender          : 19/19 (100.0%)
Procedure       : 19/19 (100.0%)
Location        : 14/19 ( 73.7%)
Duration        : 19/19 (100.0%)
Complete        : 14/19 ( 73.7%)
