In [15]:
import asyncio
import csv
import json
import os
import re
import time
import hashlib
from typing import List, Dict

from curl_cffi.requests import AsyncSession
from selectolax.parser import HTMLParser
from smolagents import OpenAIModel

# ==============================================================================
# CONFIGURATION
# ==============================================================================
TAVILY_API_KEY = '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'
CACHE_FILE = "query_cache.json"
MAX_CHARS_PER_SITE = 800
MAX_RESULTS = 3
SEARCH_DEPTH = "basic"
NETWORK_TIMEOUT = 7.0

# ============================ DETERMINISTIC TOOLS ==============================

class QueryCache:
    """Ensures 100% consistency by saving previous results."""
    def __init__(self, filename: str):
        self.filename = filename
        self.cache = self._load()

    def _load(self):
        if os.path.exists(self.filename):
            with open(self.filename, 'r') as f:
                return json.load(f)
        return {}

    def get(self, query: str):
        # Normalize query (lowercase, strip) to ensure 'Inflation' == 'inflation'
        key = hashlib.md5(query.lower().strip().encode()).hexdigest()
        return self.cache.get(key)

    def set(self, query: str, data: dict):
        key = hashlib.md5(query.lower().strip().encode()).hexdigest()
        self.cache[key] = data
        with open(self.filename, 'w') as f:
            json.dump(self.cache, f, indent=2)

class FastParser:
    @staticmethod
    def clean_html(html_content: str) -> str:
        if not html_content: return ""
        tree = HTMLParser(html_content)
        for tag in tree.css('script, style, nav, footer, header, aside, form, svg'):
            tag.decompose()
        text = tree.body.text(separator=' ', strip=True) if tree.body else ""
        return re.sub(r'\s+', ' ', text).strip()[:MAX_CHARS_PER_SITE]

# ============================== CORE LOGIC ===================================

async def fast_search(query: str, session: AsyncSession) -> List[Dict]:
    url = "https://api.tavily.com/search"
    payload = {
        "api_key": TAVILY_API_KEY,
        "query": f"{query} Bangladesh",
        "search_depth": SEARCH_DEPTH,
        "include_content": True,
        "max_results": MAX_RESULTS
    }
    try:
        resp = await session.post(url, json=payload, timeout=NETWORK_TIMEOUT)
        if resp.status_code == 200:
            data = resp.json()
            # DETERMINISM STEP 1: Sort results by URL so the LLM always sees 
            # the context in the exact same order regardless of search API jitter.
            unsorted_results = [
                {"url": r["url"], "content": FastParser.clean_html(r.get("content", ""))}
                for r in data.get("results", [])
            ]
            return sorted(unsorted_results, key=lambda x: x['url'])
    except Exception:
        pass
    return []

async def run_fast_rag(query: str):
    cache = QueryCache(CACHE_FILE)
    
    # Check Cache first
    cached_res = cache.get(query)
    if cached_res:
        print(f"\n[CACHE HIT - 0.0s]\nAnswer: {cached_res['output']}")
        return

    t_start = time.time()
    
    # DETERMINISM STEP 2: Set Temperature to 0 and provide a fixed Seed.
    model = OpenAIModel(
        model_id="cpatonn/Qwen3-30B-A3B-Instruct-2507-AWQ-4bit",
        api_base="http://localhost:5000/v1",
        api_key="no-key",
        # Pass parameters directly to the underlying engine
        temperature=0.0,
        seed=42 
    )
    
    async with AsyncSession() as session:
        results = await fast_search(query, session)
        
    if not results:
        print("No results found.")
        return

    context = "\n---\n".join([r["content"] for r in results])
    prompt = f"Src:\n{context}\n\nQ: {query}\nAns (1 sentence):"

    # DETERMINISM STEP 3: Use greedy sampling (handled by temperature=0)
    loop = asyncio.get_running_loop()
    ans_res = await loop.run_in_executor(None, lambda: model([{"role": "user", "content": prompt}]))

    output_text = ans_res.content.strip()
    total_time = round(time.time() - t_start, 2)
    
    # Save to Cache
    final_data = {"query": query, "output": output_text, "sources": [r['url'] for r in results]}
    cache.set(query, final_data)

    print(f"\n[Time: {total_time}s]\nAnswer: {output_text}")



In [19]:
if __name__ == "__main__":
    target_query = "Bangladesh e ajke gold price koto and last week theke price barse naki komse?"
    await run_fast_rag(target_query)


[CACHE HIT - 0.0s]
Answer: আজকের 24ক্যারেট সোনার দাম বাংলাদেশে ৳19,465.93 টাকা প্রতি গ্রাম, যা গত সপ্তাহের তুলনায় ৳965.76 টাকা বেশি বা প্রায় 5.26% বৃদ্ধি পেয়েছে।
