In [None]:
"""
Initialize and load all required packages, then configure and instantiate 
a WebDriver for seamless browser automation. 
"""

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import time
import json
import browser_cookie3
import ollama
import pandas as pd
import torch
import numpy as np
# Set up Chrome options to mimic a real browser
chrome_options = Options()
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
chrome_options.add_argument(f'--user-agent={user_agent}')
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option('useAutomationExtension', False)
chrome_options.add_argument('--headless')
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--disable-blink-features=AutomationControlled')

# Initialize driver
driver = webdriver.Chrome(options=chrome_options)
driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")

country_raw = pd.read_csv('countries.csv' ) # read the dataframe
country = country_raw.drop(['id','alpha2','alpha3'],axis=1 )
country = country['name'].to_numpy()
country = ",".join(country.astype(str))



location = "New Delhi, India"
    
# Scrape page
#url = f"https://www.google.com/maps/place/{location}?hl=en"
url = f"https://www.google.com/search?q={location}&lr=lang_en"
selector = "body"
   
    
driver.get(url)
if selector:
    wait = WebDriverWait(driver, 2)
    element = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, selector)))
    extracted_text = element.text
else:
    extracted_text = None
    
#print(extracted_text)

# Prepare RAG prompt for Ollama
prompt = f"""
        You are a precise geographical data extractor. Your task is to analyze the provided web search results and your 
        internal knowledge about the location '{location}' and return EXACTLY ONE valid JSON object with the following four keys:

        - "District": the smallest administrative division (e.g., county, borough, district, arrondissement). Use "NaN" if the location is a country or if no district-level division exists or can be determined.
        - "City": the city or municipality name. Use "NaN" if the location is a country or if it refers to a region/district rather than a specific city.
        - "Country": the full official English country name. You MUST choose the country name exclusively from the provided reference list below. Never invent or use a different spelling.
        - "Continent": one of ["Africa", "Antarctica", "Asia", "Australia", "Europe", "North America", "South America"]

        Reference country list (use exactly one of these strings for the "Country" field):
        {country}

        Rules:
        - If '{location}' is itself a country that appears in the reference list, set "District" = "NaN" and "City" = "NaN".
        - If the location is ambiguous, prioritize the most commonly known entity with that exact name.
        - Never return null, empty strings, or missing keys.
        - If information is not available or cannot be confidently determined, use "Unknown".
        - Output NOTHING except the JSON object. No explanations, no markdown, no ```json markers, no extra whitespace or newlines before/after the JSON.

        Context (web search results ):
        {extracted_text}

        Return only the JSON:
        """

# Use Ollama to perform RAG/generation
response = ollama.generate(model='gpt-oss:latest', prompt=prompt)
generated_json = response['response'].strip()
#For some Advanced Model      
#result = generated_json.split("</think>", 1)[1]
result = generated_json

        
try:
    data = json.loads(result)
    flag = True  
except (json.JSONDecodeError, TypeError):
    flag = False

if flag == True:
    # Print the JSON
    print(location + "========>" + result)
    data["locality"] = location
   
    
del response
torch.cuda.empty_cache()