In [1]:
# DOWNLOADING OF IMPORTANT PACAGES
import re
import math
import heapq
from collections import defaultdict, deque

In [2]:
# -------------------------------------------------------------------
# 1. "DATABASE": Small, relevant knowledge base (can be moved to JSON)
# -------------------------------------------------------------------
KNOWLEDGE_BASE = [
    # === DISASTER ===
    {
        "id": "D1",
        "domain": "disaster",
        "language": "en",
        "title": "Boiled water advisory after flood",
        "text": (
            "After a flood, always boil drinking water for at least 10 minutes "
            "or use certified bottled water until the local authority declares it safe."
        ),
    },
    {
        "id": "D2",
        "domain": "disaster",
        "language": "en",
        "title": "Compensation for house damage",
        "text": (
            "Household compensation for fully damaged houses will be credited directly "
            "to the beneficiary bank account within 30 to 60 days after survey completion."
        ),
    },
    {
        "id": "D3",
        "domain": "disaster",
        "language": "hi",
        "title": "पीने का पानी सुरक्षित है या नहीं",
        "text": (
            "अगर पानी का रंग बदला हुआ हो या बदबू आए तो उसे सीधे न पिएँ। "
            "उसे छान कर कम से कम 10 मिनट उबालें।"
        ),
    },

    # === AGRICULTURE ===
    {
        "id": "A1",
        "domain": "agriculture",
        "language": "en",
        "title": "Pest control in paddy",
        "text": (
            "For brown planthopper attack in paddy, avoid overuse of nitrogen fertilizer "
            "and maintain proper drainage. Use recommended biopesticides from the "
            "agriculture department only."
        ),
    },
    {
        "id": "A2",
        "domain": "agriculture",
        "language": "en",
        "title": "Crop insurance claim timeline",
        "text": (
            "Crop insurance claims are usually settled within two months from the date "
            "of loss intimation, subject to field assessment and approval."
        ),
    },

    # === HEALTH ===
    {
        "id": "H1",
        "domain": "health",
        "language": "en",
        "title": "Diarrhoea after flood",
        "text": (
            "If you have diarrhoea after a flood, drink ORS solution, avoid street food, "
            "and visit the nearest health centre if symptoms persist beyond two days "
            "or if there is blood in stool."
        ),
    },
    {
        "id": "H2",
        "domain": "health",
        "language": "en",
        "title": "Safe drinking water (general)",
        "text": (
            "Safe drinking water should be clear, without unusual smell, and from a "
            "treated source. If you are unsure, boil it or use chlorine tablets as per "
            "health department guidelines."
        ),
    },
]

In [3]:

# ------------------------------------------------------------
# 2. Basic text preprocessing + simple multilingual tokeniser
# ------------------------------------------------------------

def tokenize(text: str):
    """
    Lowercase and split on non-alphanumeric characters.
    Includes basic support for Devanagari (Hindi/Odia-like) characters.
    """
    text = text.lower()
    # Keep English letters, digits, and Devanagari range
    text = re.sub(r"[^a-z0-9\u0900-\u097F]+", " ", text)
    return [t for t in text.split() if t]

In [4]:

# -------------------------------------------
# 3. Build a graph over snippets (knowledge)
# -------------------------------------------

def build_graph(snippets, min_overlap: int = 2):
    """
    Build an undirected graph where:
    - Each node is a snippet id.
    - Edge exists if two snippets have at least `min_overlap`
      overlapping tokens in their text+title.
    """
    graph = defaultdict(list)
    tokens_by_id = {}

    for s in snippets:
        tokens_by_id[s["id"]] = set(tokenize(s["title"] + " " + s["text"]))

    ids = [s["id"] for s in snippets]
    for i in range(len(ids)):
        for j in range(i + 1, len(ids)):
            id1, id2 = ids[i], ids[j]
            overlap = tokens_by_id[id1] & tokens_by_id[id2]
            if len(overlap) >= min_overlap:
                graph[id1].append(id2)
                graph[id2].append(id1)

    return graph



In [5]:
# ----------------------------------------------------
# 4. Very simple "embedding" + cosine similarity
#    (embedding = bag-of-words vector over vocabulary)
# ----------------------------------------------------

def build_vocabulary(snippets):
    vocab = {}
    for s in snippets:
        for tok in tokenize(s["title"] + " " + s["text"]):
            if tok not in vocab:
                vocab[tok] = len(vocab)
    return vocab


def vectorize(tokens, vocab):
    vec = [0.0] * len(vocab)
    for t in tokens:
        if t in vocab:
            vec[vocab[t]] += 1.0
    return vec


def cosine_similarity(v1, v2):
    dot = sum(a * b for a, b in zip(v1, v2))
    n1 = math.sqrt(sum(a * a for a in v1))
    n2 = math.sqrt(sum(b * b for b in v2))
    if n1 == 0 or n2 == 0:
        return 0.0
    return dot / (n1 * n2)

In [6]:
# ------------------------------------------------
# 5. Uninformed search: BFS over the snippet graph
# ------------------------------------------------

def bfs_best_snippet(query, snippets, graph):
    """
    Uninformed search:
    - BFS does not know anything about similarity to the query;
      it just explores neighbours level by level.
    - While exploring, we compute similarity for each visited node
      and remember the best-scoring snippet.
    Returns (best_id, best_similarity, expansions, visit_order)
    """
    vocab = build_vocabulary(snippets)
    q_vec = vectorize(tokenize(query), vocab)
    id_to_snip = {s["id"]: s for s in snippets}

    # arbitrarily start from the first snippet
    start = snippets[0]["id"]

    visited = set([start])
    queue = deque([start])

    best_id = None
    best_sim = -1.0
    expansions = 0
    visit_order = []

    while queue:
        nid = queue.popleft()
        visit_order.append(nid)
        expansions += 1

        s = id_to_snip[nid]
        s_vec = vectorize(tokenize(s["title"] + " " + s["text"]), vocab)
        sim = cosine_similarity(q_vec, s_vec)

        if sim > best_sim:
            best_sim = sim
            best_id = nid

        for nb in graph[nid]:
            if nb not in visited:
                visited.add(nb)
                queue.append(nb)

    return best_id, best_sim, expansions, visit_order

In [7]:
# ------------------------------------------------------
# 6. Informed search: A* using similarity heuristic h(n)
# ------------------------------------------------------

def astar_best_snippet(query, snippets, graph):
    """
    Informed search (A*):
    - We define heuristic h(n) = 1 - cosine_similarity(emb(n), emb(query)).
      => If snippet is very similar to query, h(n) is small.
    - Edge cost is 1 for every step (unweighted graph).
    - f(n) = g(n) + h(n), where g(n) is distance from start.
    - Nodes with smaller f(n) are expanded first.
    Returns (best_id, best_similarity, expansions, visit_order)
    """
    vocab = build_vocabulary(snippets)
    q_vec = vectorize(tokenize(query), vocab)
    id_to_snip = {s["id"]: s for s in snippets}
    start = snippets[0]["id"]

    def heuristic(nid):
        s = id_to_snip[nid]
        s_vec = vectorize(tokenize(s["title"] + " " + s["text"]), vocab)
        sim = cosine_similarity(q_vec, s_vec)
        return 1.0 - sim  # smaller is better

    open_heap = []
    g_cost = {start: 0.0}
    heapq.heappush(open_heap, (heuristic(start), start))  # f = g + h, here g=0 initially

    visited = set()
    best_id = None
    best_sim = -1.0
    expansions = 0
    visit_order = []

    while open_heap:
        f, nid = heapq.heappop(open_heap)
        if nid in visited:
            continue
        visited.add(nid)
        visit_order.append(nid)
        expansions += 1

        # compute similarity at this node
        s = id_to_snip[nid]
        s_vec = vectorize(tokenize(s["title"] + " " + s["text"]), vocab)
        sim = cosine_similarity(q_vec, s_vec)
        if sim > best_sim:
            best_sim = sim
            best_id = nid

        # expand neighbours
        for nb in graph[nid]:
            tentative_g = g_cost[nid] + 1  # edge cost = 1
            if nb not in g_cost or tentative_g < g_cost[nb]:
                g_cost[nb] = tentative_g
                hn = heuristic(nb)
                fn = tentative_g + hn
                heapq.heappush(open_heap, (fn, nb))

    return best_id, best_sim, expansions, visit_order

In [8]:

# ---------------------------------------------------
# 7. Selecting final snippet & generating explanation
# ---------------------------------------------------

def select_final_snippet(query, snippets):
    """
    - Build graph
    - Run BFS (uninformed) and A* (informed)
    - Compare both and select final snippet.
    - If similarity is too low, ask for clarification.
    """
    graph = build_graph(snippets)

    bfs_id, bfs_sim, bfs_exp, bfs_order = bfs_best_snippet(query, snippets, graph)
    astar_id, astar_sim, astar_exp, astar_order = astar_best_snippet(query, snippets, graph)

    id_to_snip = {s["id"]: s for s in snippets}

    # Choose the method that gives higher similarity; break ties towards A*.
    if astar_sim >= bfs_sim:
        chosen_id = astar_id
        chosen_sim = astar_sim
        method = "A* (informed search)"
    else:
        chosen_id = bfs_id
        chosen_sim = bfs_sim
        method = "BFS (uninformed search)"

    chosen_snippet = id_to_snip[chosen_id] if chosen_id is not None else None

    # If similarity is very low, we treat it as ambiguous and ask clarification.
    clarification_needed = chosen_sim < 0.15

    reasoning = {
        "bfs": {
            "best_id": bfs_id,
            "best_similarity": bfs_sim,
            "expansions": bfs_exp,
            "visit_order": bfs_order,
        },
        "astar": {
            "best_id": astar_id,
            "best_similarity": astar_sim,
            "expansions": astar_exp,
            "visit_order": astar_order,
        },
        "final_choice": {
            "method": method,
            "chosen_id": chosen_id,
            "chosen_similarity": chosen_sim,
            "clarification_needed": clarification_needed,
        },
    }

    return chosen_snippet, clarification_needed, reasoning


def generate_answer(query, snippet, clarification_needed):
    """
    Simple, safe answer generator:
    - If unclear => ask user for more context.
    - Else => return guideline snippet + short explanation.
    """
    if snippet is None:
        return (
            "I could not find any relevant official guideline for your question. "
            "Please contact your local government helpdesk directly."
        )

    if clarification_needed:
        return (
            "I found some related information, but I am not fully confident it matches "
            "your situation.\n\n"
            f"Possible relevant guideline:\n"
            f"- [{snippet['domain'].upper()}] {snippet['title']}\n"
            f"{snippet['text']}\n\n"
            "To give a safer answer, please clarify your location and more details."
        )

    # Normal confident answer
    return (
        f"Your question: {query}\n\n"
        f"Based on the official/verified guideline I found:\n"
        f"- [{snippet['domain'].upper()}] {snippet['title']}\n"
        f"{snippet['text']}\n\n"
        "If you are still unsure or your situation is different from the example above, "
        "please contact your local authority or health worker for confirmation."
    )



In [None]:
# -------------------------------------------
# 8. Demo / main function for quick testing
# -------------------------------------------

def demo():
    print("=== AI-Driven Context-Aware QA Demo ===")
    print("Type 'exit' to quit.\n")

    while True:
        query = input("User query: ").strip()
        if not query or query.lower() == "exit":
            print("Exiting.")
            break

        snippet, clarification_needed, reasoning = select_final_snippet(query, KNOWLEDGE_BASE)
        answer_text = generate_answer(query, snippet, clarification_needed)

        print("\n--- ANSWER ---")
        print(answer_text)

        print("\n--- REASONING / COMPARISON ---")
        print("BFS (uninformed search):")
        print(f"  Best snippet id      : {reasoning['bfs']['best_id']}")
        print(f"  Similarity           : {reasoning['bfs']['best_similarity']:.3f}")
        print(f"  Nodes expanded       : {reasoning['bfs']['expansions']}")
        print(f"  Visit order          : {reasoning['bfs']['visit_order']}")

        print("\nA* (informed search with similarity heuristic):")
        print(f"  Best snippet id      : {reasoning['astar']['best_id']}")
        print(f"  Similarity           : {reasoning['astar']['best_similarity']:.3f}")
        print(f"  Nodes expanded       : {reasoning['astar']['expansions']}")
        print(f"  Visit order          : {reasoning['astar']['visit_order']}")

        final = reasoning["final_choice"]
        print("\nFinal selected snippet:")
        print(f"  Chosen by method     : {final['method']}")
        print(f"  Chosen snippet id    : {final['chosen_id']}")
        print(f"  Chosen similarity    : {final['chosen_similarity']:.3f}")
        print(f"  Clarification needed : {final['clarification_needed']}")
        print("=============================\n")



if __name__ == "__main__":
    demo()


=== AI-Driven Context-Aware QA Demo ===
Type 'exit' to quit.

User query: what to do in case of floods

--- ANSWER ---
Your question: what to do in case of floods

Based on the official/verified guideline I found:
- [AGRICULTURE] Pest control in paddy
For brown planthopper attack in paddy, avoid overuse of nitrogen fertilizer and maintain proper drainage. Use recommended biopesticides from the agriculture department only.

If you are still unsure or your situation is different from the example above, please contact your local authority or health worker for confirmation.

--- REASONING / COMPARISON ---
BFS (uninformed search):
  Best snippet id      : A1
  Similarity           : 0.311
  Nodes expanded       : 6
  Visit order          : ['D1', 'D2', 'A1', 'H1', 'H2', 'A2']

A* (informed search with similarity heuristic):
  Best snippet id      : A1
  Similarity           : 0.311
  Nodes expanded       : 6
  Visit order          : ['D1', 'A1', 'D2', 'H1', 'H2', 'A2']

Final selected snipp