In [1]:
import os
import json
import asyncio
import re
import datetime
import traceback
from typing import Dict, List, Optional, Any, TypedDict, Union
from enum import Enum
import importlib.util

# --- Dependency Checks & Setup ---
# Required: pip install httpx beautifulsoup4 langchain langchain-google-genai

import httpx
from bs4 import BeautifulSoup

In [14]:
import dotenv
dotenv.load_dotenv('config/.env')
OPENCAGE_API_KEY = os.getenv("OPENCAGE_API_KEY")
def _opencage_address_impl(address: str) -> dict:
    """Real implementation using OpenCage Geocoder (Forward -> Reverse Verification)."""
    if not OPENCAGE_API_KEY:
        # Fallback deterministic normalizer
        normalized = " ".join(address.strip().title().split())
        return {
            "normalized_address": normalized, 
            "reverse_address": None, 
            "lat": None, "lng": None, 
            "match_quality": "fallback",
            "raw": {"fallback": True, "reason": "NO_OPENCAGE_KEY"}
        }
    
    base_url = "https://api.opencagedata.com/geocode/v1/json"
    
    try:
        # 1. Forward Geocoding: Address -> Lat/Lng
        params_fwd = {"q": address, "key": OPENCAGE_API_KEY, "limit": 1}
        resp_fwd = httpx.get(base_url, params=params_fwd, timeout=10.0)
        data_fwd = resp_fwd.json()
        
        if data_fwd.get("status", {}).get("code") == 200 and data_fwd.get("results"):
            fwd_res = data_fwd["results"][0]
            lat = fwd_res["geometry"]["lat"]
            lng = fwd_res["geometry"]["lng"]
            fwd_addr = fwd_res["formatted"]
            confidence = fwd_res.get("confidence", 0)
            
            # 2. Reverse Geocoding: Lat/Lng -> Address
            params_rev = {"q": f"{lat},{lng}", "key": OPENCAGE_API_KEY}
            resp_rev = httpx.get(base_url, params=params_rev, timeout=10.0)
            data_rev = resp_rev.json()
            
            rev_addr = "Unknown"
            if data_rev.get("status", {}).get("code") == 200 and data_rev.get("results"):
                rev_addr = data_rev["results"][0]["formatted"]
            
            return {
                "normalized_address": fwd_addr,      # from Forward
                "reverse_address": rev_addr,         # from Reverse
                "lat": lat,
                "lng": lng,
                "match_quality": f"confidence_{confidence}",
                "raw": {"forward": fwd_res, "reverse_sample": rev_addr},
                "source": "OPENCAGE_GEOCODER"
            }
        else:
            return {"error": "address_not_found", "raw": data_fwd}

    except Exception as e:
        return {"error": f"opencage_request_failed: {str(e)}"}


In [None]:
from playwright.sync_api import sync_playwright
import time
import json

def scrape_hospitals_by_coordinates(lat: float, lon: float):
    """
    Searches for 'hospitals' at the specific latitude/longitude on Google Maps
    and extracts the top 10 results.
    """
    # 1. Construct URL centered on coordinates with Zoom level 15
    # Format: https://www.google.com/maps/search/hospitals/@LAT,LON,15z
    url = f"https://www.google.com/maps/search/hospitals/@{lat},{lon},15z"

    results_data = []

    with sync_playwright() as p:
        # Headless=False is safer for Google Maps to avoid immediate bot detection
        browser = p.chromium.launch(headless=False)
        context = browser.new_context(
            viewport={"width": 1920, "height": 1080},
            locale="en-US", # Ensure English results
            user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
        )
        page = context.new_page()

        print(f"Navigating to coordinates: {lat}, {lon}")
        page.goto(url, timeout=60000)
        
        # 2. Wait for the results feed
        # Google Maps puts the list in a div with role="feed"
        try:
            feed_selector = 'div[role="feed"]'
            page.wait_for_selector(feed_selector, timeout=20000)
            print("Results feed loaded.")
        except Exception as e:
            print(f"Error: Could not load results (likely no hospitals found nearby). {e}")
            browser.close()
            return []

        # 3. Scroll to load at least 10 results
        required_count = 10
        attempts = 0
        
        while attempts < 5:
            # Find result cards (anchor tags that link to places)
            cards = page.locator('a[href*="/maps/place/"]').all()
            count = len(cards)
            
            print(f"Found {count} hospitals...")
            if count >= required_count:
                break

            # Scroll logic: Click feed to focus -> Press End
            page.locator(feed_selector).click()
            page.keyboard.press("End")
            time.sleep(3) # Wait for network loading
            attempts += 1

        # 4. Extract Data
        # Re-fetch cards to ensure we have the handles
        cards = page.locator('a[href*="/maps/place/"]').all()
        
        print(f"Extracting details for top {min(len(cards), 10)}...")
        
        for i, card in enumerate(cards[:10]):
            try:
                # Name is usually in 'aria-label'
                name = card.get_attribute("aria-label")
                if not name:
                    # Fallback: Split text by newlines
                    name = card.inner_text().split("\n")[0]
                
                # Google Maps Link
                link = card.get_attribute("href")
                
                # Rating extraction (Simple heuristic from card text)
                full_text = card.inner_text()
                rating = "N/A"
                if "stars" in full_text or "(" in full_text:
                    # Look for the rating pattern like "4.2(100)"
                    lines = full_text.split('\n')
                    for line in lines:
                        # Pattern matching a rating
                        if len(line) < 15 and "(" in line and any(c.isdigit() for c in line):
                            rating = line
                            break

                results_data.append({
                    "rank": i + 1,
                    "name": name,
                    "rating_info": rating,
                    "google_maps_url": link,
                    "coordinates_used": f"{lat},{lon}"
                })
                
            except Exception as e:
                print(f"Skipping card {i}: {e}")

        browser.close()

    return results_data

# --- EXAMPLE USAGE ---
if __name__ == "__main__":
    # Example: Hospitals near Tirupati, India
    # Lat: 13.6288, Lon: 79.4192
    hospitals = scrape_hospitals_by_coordinates(13.6288, 79.4192)
    
    print(json.dumps(hospitals, indent=2))

In [17]:
response=_opencage_address_impl("Door No 43-67/A, Opposite Kurnool Muncipal Corporation, NR Peta, Kurnool-518004, Andhra Pradesh")

In [None]:
def verify_location(address):
    print(f"\nüîé PHASE 1: Verifying facility at address: {address}...")
    
    # We construct a prompt specifically for the Supervisor/Planner
    prompt = f"""
    I need to verify if a medical facility exists at this specific address: "{address}".
    
    Task:
    1. Use Tavily Search to check this address and identify if a hospital or clinic operates there.
    2. If a facility is found, find its official name and website.
    3. Return a JSON object with:
       - "exists": true/false
       - "facility_name": "Name of hospital" (or null)
       - "verification_details": "How it was verified"
    """
    
    result = run_agent(prompt)
    
    # The agent returns a string (often containing JSON). 
    # We try to parse the output to see if it found anything.
    output_text = result.get("output", "")
    return output_text

{'normalized_address': 'Kurnool district, India',
 'reverse_address': 'Nandyal, - 518112, Andhra Pradesh, India',
 'lat': 15.58333,
 'lng': 78.33333,
 'match_quality': 'confidence_1',
 'raw': {'forward': {'annotations': {'DIGIPIN': '459-4MT-264F',
    'DMS': {'lat': "15¬∞ 34' 59.98800'' N", 'lng': "78¬∞ 19' 59.98800'' E"},
    'MGRS': '44PKC1401624635',
    'Maidenhead': 'MK95dn99xx',
    'Mercator': {'x': 8720026.408, 'y': 1745047.729},
    'OSM': {'note_url': 'https://www.openstreetmap.org/note/new#map=17/15.58333/78.33333&layers=N',
     'url': 'https://www.openstreetmap.org/?mlat=15.58333&mlon=78.33333#map=17/15.58333/78.33333'},
    'UN_M49': {'regions': {'ASIA': '142',
      'IN': '356',
      'SOUTHERN_ASIA': '034',
      'WORLD': '001'},
     'statistical_groupings': ['LEDC']},
    'callingcode': 91,
    'currency': {'alternate_symbols': ['Rs', '‡ß≥', '‡´±', '‡Øπ', '‡§∞‡•Å', '‚Ç®'],
     'decimal_mark': '.',
     'html_entity': '&#x20b9;',
     'iso_code': 'INR',
     'iso_nume

In [34]:
def verify_location_with_mappls(address, client_id, client_secret):
    """
    STAGE 1: Mappls API with Smart Filtering for Healthcare
    """
    print(f"\nüîé STAGE 1: Checking Mappls (MapmyIndia) API...")
    
    # 1. Authenticate
    token = get_mappls_token(client_id, client_secret)
    if not token:
        return {"verified": False, "source": "Mappls", "details": "Auth Failed"}

    # 2. Search
    url = "https://atlas.mappls.com/api/places/textsearch/json"
    params = {
        "query": address,
        # 'pod' filters for Point of Interest type, but text search is broader/safer
    }
    headers = {
        "Authorization": f"Bearer {token}",
        "User-Agent": "ValidationAgent/1.0"
    }

    try:
        response = requests.get(url, params=params, headers=headers)
        data = response.json()
        
        locations = data.get("suggestedLocations", [])
        if not locations:
            return {"verified": False, "source": "Mappls", "details": "No results found"}
        print(locations)
        # --- SMART FILTERING LOGIC ---
        # We define keywords that indicate a medical facility
        medical_keywords = ["Hospital", "Clinic", "Medical", "Doctor", "Dr.", "Nursing", "Scan", "Lab", "Pharmacy","Multispeciality","Super Speciality","critical care","emergency"]
        
        # Mappls category codes for Health usually start with 'HLT' (e.g., HLTHSP, HLTCLI)
        # We will scan through ALL results to find the best match
        best_match = None
        
        print(f"   found {len(locations)} potential matches. Filtering for healthcare...")

        for loc in locations:
            print(loc.get("placeName", ""))
            name = loc.get("placeName", "")
            category_codes = loc.get("keywords", []) # e.g., ['HLTHSP', 'GOVMJR']
            
            # 1. Check if it's explicitly a Health category (Mappls codes)
            is_health_code = any(code.startswith("HLT") or code in ["LABRAD", "HSPGEN"] for code in category_codes)
            
            # 2. Check if name contains medical terms
            name_is_medical = any(term.lower() in name.lower() for term in medical_keywords)

            # If it's a medical place, prioritize it immediately
            if is_health_code or name_is_medical:
                best_match = loc
                print(f"   ‚úì Match Found (Priority): {name} [{category_codes}]")
                break  # Stop at the first medical match
        
        # Fallback: If no medical place found, take the top result (it might just be the address)
        if not best_match:
            print("   ‚ö†Ô∏è No specific hospital identified in results. Using top result.")
            best_match = locations[0]

        # Return the winner
        name = best_match.get("placeName", "")
        loc_address = best_match.get("placeAddress", "")
        full_result = f"{name}, {loc_address}"
        
        return {
            "verified": True, 
            "source": "Mappls (Smart Filter)", 
            "name": full_result,
            "type": "Healthcare" if best_match == locations[0] else "Address/Landmark",
            "raw_data": best_match
        }
            
    except Exception as e:
        print(f"   [Mappls Search Error]: {e}")

    return {"verified": False, "source": "Mappls", "details": "Error"}

In [3]:

def get_mappls_token(client_id, client_secret):
    url = "https://outpost.mapmyindia.com/api/security/oauth/token"
    data = {"grant_type": "client_credentials", "client_id": client_id, "client_secret": client_secret}
    try:
        response = requests.post(url, data=data)
        response.raise_for_status()
        return response.json().get("access_token")
    except Exception as e:
        print(f"   [Mappls Auth Error]: {e}")
        return None

def strict_verify_address(user_input, result_name, result_address):
    """
    Compares user input vs API result to reject 'nearby' guesses.
    Returns: (match_bool, confidence_score)
    """
    user_input = user_input.lower()
    result_full = f"{result_name} {result_address}".lower()
    user_pincode = re.search(r'\b\d{6}\b', user_input)
    result_pincode = re.search(r'\b\d{6}\b', result_full)
    
    if user_pincode and result_pincode:
        if user_pincode.group() != result_pincode.group():
            print(f"   ‚ùå Pincode Mismatch! Input: {user_pincode.group()} vs Found: {result_pincode.group()}")
            return False, 0.0

    ignore_words = {"hospital", "clinic", "road", "street", "st", "dr", "doctor", "lane", "opp", "near", "beside", "andhra", "pradesh", "india"}
    user_tokens = [w for w in re.split(r'\W+', user_input) if len(w) > 3 and w not in ignore_words]
    
    matched_count = 0
    total_tokens = len(user_tokens)
    
    if total_tokens == 0:
        return True, 100.0 # Input was too short/generic to strict check, let it pass
        
    for token in user_tokens:
        if token in result_full:
            matched_count += 1
            
    match_percentage = (matched_count / total_tokens) * 100
    print(f"   üîç Match Confidence: {match_percentage:.1f}% ({matched_count}/{total_tokens} keywords found)")

    if match_percentage < 40: 
        return False, match_percentage
        
    return True, match_percentage
def strict_verify_location_with_mappls(address, client_id, client_secret):
    """
    STAGE 1: Mappls API with STRICT Filtering
    Returns: location_data with address_confidence_score
    """
    print(f"\nüîé STAGE 1: Checking Mappls (MapmyIndia) API...")
    
    # 1. Authenticate
    token = get_mappls_token(client_id, client_secret)
    if not token:
        return {"verified": False, "source": "Mappls", "details": "Auth Failed", "address_confidence_score": 0.0}

    # 2. Search
    url = "https://atlas.mappls.com/api/places/textsearch/json"
    params = { "query": address }
    headers = { "Authorization": f"Bearer {token}", "User-Agent": "ValidationAgent/1.0" }

    try:
        response = requests.get(url, params=params, headers=headers)
        data = response.json()
        
        locations = data.get("suggestedLocations", [])
        if not locations:
            return {"verified": False, "source": "Mappls", "details": "No results found", "address_confidence_score": 0.0}

        medical_keywords = ["Hospital", "Clinic", "Medical", "Doctor", "Dr.", "Nursing", "Scan", "Lab", "Pharmacy"]
        best_match = None
        confidence_score = 0.0
        
        print(f"   found {len(locations)} potential matches. Filtering for exact healthcare match...")

        for loc in locations:
            name = loc.get("placeName", "")
            addr = loc.get("placeAddress", "")
            category_codes = loc.get("keywords", []) 
            
            is_health_code = any(code.startswith("HLT") or code in ["LABRAD", "HSPGEN"] for code in category_codes)
            name_is_medical = any(term.lower() in name.lower() for term in medical_keywords)

            if is_health_code or name_is_medical:
                is_match, conf_score = strict_verify_address(address, name, addr)
                
                if is_match:
                    best_match = loc
                    confidence_score = conf_score
                    print(f"   ‚úì Match Found & Verified: {name}")
                    break 
                else:
                    print(f"   Rejecting nearby candidate: {name} (Address mismatch)")
        
        if not best_match:
            return {"verified": False, "source": "Mappls", "details": "Hospital not found at this exact location (Nearby results rejected)", "address_confidence_score": 0.0}

        name = best_match.get("placeName", "")
        loc_address = best_match.get("placeAddress", "")
        full_result = f"{name}, {loc_address}"
        
        return {
            "verified": True, 
            "source": "Mappls (Strict)", 
            "name": full_result,
            "type": "Healthcare",
            "raw_data": best_match,
            "address_confidence_score": confidence_score
        }
            
    except Exception as e:
        print(f"   [Mappls Search Error]: {e}")

    return {"verified": False, "source": "Mappls", "details": "Error", "address_confidence_score": 0.0}

In [12]:
# ... existing imports ...
import os
import requests
import dotenv
import re
dotenv.load_dotenv('config/.env')
def main():
    print("="*60)
    print("üè• VALIDATION SYSTEM (Mappls -> Google Browser Fallback)")
    print("="*60)
    
    #address ="Kims Hospital Park Square Complex, Opposite Income Tax Office, Muncipal Office Road, NR Peta, Kurnool-518004, Andhra Pradesh"
    #address="near New Ayyapa Swamy Temple, Mahalakshmi Nagar, Joharapuram, Kurnool, Andhra Pradesh 518004"
    address="18-2-148, opp. Devendra (CS) Theatre, Ashok Nagar, Korlagunta, Tirupati, Andhra Pradesh 517501"
    address="Russh Multi Speciality Hospital,10-14-576/6, 1st St, opp. Municipal Office Road, Reddy & Reddy's Colony, Reddy and Reddy's Colony, Tirupati, Andhra Pradesh 517501"
    address="Sankalpa Super Speciality Hospital,Karakambadi Bazar St, Tata Nagar, Tirupati, Andhra Pradesh 517501"# Get keys from environment or hardcode for testing
    address="Harshitha Hospital,10-25-03-109 Tilak Road Opp Manohari Nursing Home Reddy and Reddy's Colony Tirupati, Dist, Andhra Pradesh 517501"
    #address="Medicover Hospitals,NH -16, Door No- 1-1-83, New Venkojipalem MVP, near Hp Petrol Bunk, Sector- 6, MVP Colony, Visakhapatnam, Andhra Pradesh 530022"
    mappls_id = os.getenv("MAPPLS_CLIENT_ID")
    mappls_secret = os.getenv("MAPPLS_CLIENT_SECRET")
    
    location_result = {"verified": False}

    # --- STAGE 1: Mappls (Best for India) ---
    
    location_result = strict_verify_location_with_mappls(address, os.getenv("mappls_client_id"), os.getenv("mappls_client_secret"))
    print("\nMappls Result:", location_result)

In [13]:
main()

üè• VALIDATION SYSTEM (Mappls -> Google Browser Fallback)

üîé STAGE 1: Checking Mappls (MapmyIndia) API...
   found 10 potential matches. Filtering for exact healthcare match...
   üîç Match Confidence: 63.6% (7/11 keywords found)
   ‚úì Match Found & Verified: Harshitha Hospital

Mappls Result: {'verified': True, 'source': 'Mappls (Strict)', 'name': 'Harshitha Hospital, 10, 13, 568, Reddy & Reddy Colony, District Chittoor, Tirupati, Andhra Pradesh, 517501', 'type': 'Healthcare', 'raw_data': {'type': 'POI', 'placeAddress': '10, 13, 568, Reddy & Reddy Colony, District Chittoor, Tirupati, Andhra Pradesh, 517501', 'eLoc': 'G2T9BT', 'placeName': 'Harshitha Hospital', 'alternateName': '', 'keywords': ['HLTHSP'], 'distance': 0, 'orderIndex': 1, 'suggester': 'placeName'}, 'address_confidence_score': 63.63636363636363}


In [None]:
in testing.py  i got the hospital name from user address like this:given_hospital_name=address.strip().split(",")[0].strip() now i wnat to check the hospital on the mappls outptut["name"] and check whether the both hospital  names are same if not return human review needed