### 1. IMPORT REQUIRED LIBRARIES

In [52]:
import re
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity
from nltk.tokenize import sent_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from collections import defaultdict




In [53]:
from openai import OpenAI
client = OpenAI(api_key="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")

###   2. DOCUMENT LOADER FUNCTION
#####    Reads PDF, TXT, and JSON files - Converts all content into plain text

In [54]:
import os
import json
import pdfplumber

def load_documents(folder="inputfiles"):
    docs = []

    for filename in os.listdir(folder):
        filepath = os.path.join(folder, filename)

        # ---- PDF FILES ----
        if filename.lower().endswith(".pdf"):
            with pdfplumber.open(filepath) as pdf:
                text = ""
                for page in pdf.pages:
                    page_text = page.extract_text()
                    if page_text:
                        text += page_text + "\n"
                docs.append(text)

        # ---- TEXT FILES ----
        elif filename.lower().endswith(".txt"):
            with open(filepath, "r", encoding="utf-8") as f:
                docs.append(f.read())

        # ---- JSON FILES ----
        elif filename.lower().endswith(".json"):
            with open(filepath, "r", encoding="utf-8") as f:
                data = json.load(f)
                # Convert JSON to readable string
                docs.append(json.dumps(data, indent=2))

    return docs


documents = load_documents()


### 3.Section-Based Text Chunking Using Rule-Based Header Detection

##### The function scans the document line by line, identifies section headings, and groups all text under that heading until the next section appears.

In [55]:
import re

def section_chunk(text):
    sections = []
    current_title = None
    buffer = []

    for line in text.split("\n"):
        line = line.strip()
        if not line:
            continue

        # Detect top-level section headers
        is_header = (
            re.match(r"^PAGE\s+\d+", line, re.IGNORECASE) or
            re.match(r"^\d+\.\s+[A-Za-z]", line)  
        )

        if is_header:
            if current_title and buffer:
                sections.append({
                    "title": current_title,
                    "content": "\n".join(buffer)
                })

            current_title = line
            buffer = [line]   
        else:
            buffer.append(line)

    if current_title and buffer:
        sections.append({
            "title": current_title,
            "content": "\n".join(buffer)
        })

    return sections


### 4.Semantic Embedding of Document Sections

##### Each extracted section is transformed into a dense vector representation using the all-mpnet-base-v2 embedding model. These embeddings enable semantic similarity comparison and efficient information retrieval in downstream tasks.

In [56]:
embedder = SentenceTransformer("all-mpnet-base-v2")

sections = []
for doc in documents:
    sections.extend(section_chunk(doc))

section_texts = [s["title"] + " " + s["content"] for s in sections]
section_embeddings = embedder.encode(section_texts)


### Generic Term Identification using TF-IDF (Corpus-Level)

##### This function identifies common terms across document sections using TF-IDF vectorization. Terms that appear in a high percentage of sections are considered generic and filtered out during retrieval to enhance relevance.

In [57]:
def build_generic_terms(sections, top_percent=0.15):
    texts = [s["title"] + " " + s["content"] for s in sections]

    vectorizer = TfidfVectorizer(
        stop_words="english",
        max_df=0.95,     
        min_df=2
    )

    tfidf = vectorizer.fit_transform(texts)
    terms = np.array(vectorizer.get_feature_names_out())

    # Mean TF-IDF per term across corpus
    mean_scores = tfidf.mean(axis=0).A1

    # Lowest TF-IDF â†’ most generic
    cutoff = int(len(terms) * top_percent)
    generic_terms = set(terms[np.argsort(mean_scores)[:cutoff]])

    return generic_terms


### Learning using TF-IDF

##### This function learns the most informative unigrams and bigrams from section content using TF-IDF scoring.The highest-scoring terms represent domain-specific keywords that help guide query understanding and rule-based retriev

In [58]:
def learn_key_terms(sections, top_k=40):
    texts = [s["content"] for s in sections if s["content"]]

    vectorizer = TfidfVectorizer(
        stop_words="english",
        ngram_range=(1, 2),
        max_df=0.85
    )
    X = vectorizer.fit_transform(texts)

    terms = vectorizer.get_feature_names_out()
    scores = X.mean(axis=0).A1

    ranked = sorted(zip(terms, scores), key=lambda x: x[1], reverse=True)
    return [t for t, _ in ranked[:top_k]]

learned_terms = learn_key_terms(sections)


### Automatic Rule-Based Knowledge Graph Construction

##### This function automatically builds a lightweight, in-memory knowledge structure by linking learned key terms to relevant section lines.It captures descriptive statements and numeric constraints using simple pattern matching, enabling structured, explainable knowledge lookup without a graph database.

In [59]:

knowledge_graph = defaultdict(lambda: defaultdict(list))

def build_kg_automatically(sections, learned_terms):
    for sec in sections:
        content = sec["content"]
        if not content:
            continue

        for line in content.split("\n"):
            line_l = line.lower()

            matched_terms = [t for t in learned_terms if t in line_l]
            if not matched_terms:
                continue

            numbers = re.findall(r"\d+\s+(days|weeks|months)", line_l)

            for term in matched_terms:
                if numbers:
                    knowledge_graph[term]["limits"].append(line.strip())
                else:
                    knowledge_graph[term]["description"].append(line.strip())

build_kg_automatically(sections, learned_terms)


### Rule-Based Knowledge Graph Querying

##### This function performs a lightweight knowledge lookup by matching query terms against the in-memory knowledge structure.

In [60]:
def query_knowledge_graph(query):
    q = query.lower()
    collected = []

    for entity, facts in knowledge_graph.items():
        if entity in q:
            for v in facts.values():
                collected.extend(v)

    return collected if collected else None


### Corpus Construction for Semantic Retrieval
##### This function builds a corpus of section texts for semantic retrieval. It combines section titles and content into single text entries, which are then used for embedding and similarity search.

In [61]:
def build_corpus(sections):
    return [(s["title"] + " " + s["content"]).lower() for s in sections]

corpus = build_corpus(sections)


### Clustering Document Sections Using K-Means

##### This block groups semantically similar document sections into clusters using K-Means on their embeddings, enabling topic-based organization. Cluster centroids are then computed to represent the average semantic meaning of each cluster for efficient retrieval and matching.

In [62]:
NUM_CLUSTERS = 6
kmeans = KMeans(n_clusters=NUM_CLUSTERS, random_state=42)
cluster_ids = kmeans.fit_predict(section_embeddings)

cluster_map = {}
for i, cid in enumerate(cluster_ids):
    cluster_map.setdefault(cid, []).append(sections[i])

cluster_centroids = {
    cid: np.mean(
        embedder.encode([s["title"] + " " + s["content"] for s in sec]),
        axis=0
    )
    for cid, sec in cluster_map.items()
}


### Relevant Cluster Selection Using Semantic Similarity

##### This function converts the user query into an embedding and compares it with each cluster centroid using cosine similarity. The cluster with the highest similarity score is selected as the most relevant group of document sections for answering the query.

In [63]:
def select_cluster(query):
    q_emb = embedder.encode([query])[0]
    scores = {
        cid: cosine_similarity([q_emb], [centroid])[0][0]
        for cid, centroid in cluster_centroids.items()
    }
    return max(scores, key=scores.get)


### Learning Corpus-Wide Generic Terms

##### This step identifies frequently occurring, low-information terms across all document sections using TF-IDF statistics.

In [64]:
GENERIC_TERMS = build_generic_terms(sections)
print(f"Generic terms learned: {list(GENERIC_TERMS)}")


Generic terms learned: ['task', 'arrangement', 'download', 'attendance', 'insufficient', 'measurable', 'installation', 'learning', 'effectiveness', 'accrual', 'outside', 'chair', 'pro', 'submitted', '80', 'december', 'strategic', 'parents', 'dependent', 'july', 'does', 'activities', 'address', 'examples', 'updated', 'deadlines', 'possible', 'budget', 'dates', 'permanent', 'prepared', 'longer', 'timeline', 'standard', 'evaluated', 'bonuses', 'rated', 'tickets', 'guide', 'individual', 'ins', 'higher', 'pickup', 'messages', 'smart', 'maintaining', 'limit', 'peers', 'stipend', 'hour', 'emails', 'receipts', 'worked', 'innovation', 'overall', 'previous', 'preparation', 'arm', 'amounts', 'responsiveness', 'open', 'locations', 'answer', 'signed', 'matters', '401', 'public', 'online', 'payment', 'thinking', 'marketing', 'following', 'adjustment', 'march', 'camera', 'rate', 'sales', 'career', 'colleagues', 'acknowledgment', '5500', 'tied', 'clearly', 'participation', 'update', 'general', 'februa

### Query Normalization for Knowledge-Guided Retrieval

##### This function removes corpus-wide generic terms from the user query to emphasize informative keywords.The normalized query improves both rule-based knowledge lookup and semantic section retrieval accuracy.

In [65]:
def normalize_query_for_retrieval(query):
    tokens = query.lower().split()
    filtered = [t for t in tokens if t not in GENERIC_TERMS]
    return " ".join(filtered) if filtered else query


### Best Section Selection Using Similarity Threshold

##### This function identifies the most relevant section by comparing the query embedding with section title embeddings using cosine similarity. A similarity threshold ensures that only sufficiently relevant sections are selected, improving answer precision and reducing noise.

In [66]:
def select_best_section(sections, query, threshold=0.45):
    q_emb = embedder.encode([query])[0]

    # ---- TITLE MATCHING ----
    titles = [s["title"] for s in sections]
    title_embeddings = embedder.encode(titles)
    title_scores = cosine_similarity([q_emb], title_embeddings)[0]

    # ---- CONTENT MATCHING ----
    contents = [s["content"] for s in sections]
    content_embeddings = embedder.encode(contents)
    content_scores = cosine_similarity([q_emb], content_embeddings)[0]

    # ---- COMBINE SCORES (Weighted) ----
    combined_scores = [
        0.6 * title_scores[i] + 0.4 * content_scores[i]
        for i in range(len(sections))
    ]

    best_idx = int(np.argmax(combined_scores))
    #best_score = combined_scores[best_idx]

    return sections[best_idx]



### Section Relevance Scoring with Length Normalization

##### This function computes semantic similarity between a query and a document section using embeddings.A length-normalized score is applied to favor content-rich sections while filtering out weak matches using a relevance threshold.

In [67]:
def is_section_relevant(section, query, threshold=0.55):
    section_text = (section["title"] + " " + section["content"]).strip()

    sec_emb = embedder.encode(section_text)
    qry_emb = embedder.encode(query)

    score = cosine_similarity([sec_emb], [qry_emb])[0][0]

    # ðŸ”§ LENGTH NORMALIZATION (NO keywords)
    length_factor = min(len(section_text) / 300, 1.0)
    score = score * (0.7 + 0.3 * length_factor)

    return score >= threshold


### Knowledge-Guided Section Grouping and Selection

##### These functions group knowledge graphâ€“retrieved facts by their originating document sections and select the most relevant section using semantic similarity with lexical overlap.This enables precise, explainable section-level retrieval guided by lightweight structured knowledge.

In [68]:
def group_by_section(sections, candidate_lines):
    section_map = {}
    for sec in sections:
        sec_lines = sec["content"].split("\n")
        matched = [l for l in sec_lines if l in candidate_lines]
        if matched:
            section_map[sec["title"]] = matched
    return section_map


def select_best_kg_section(query, section_map):
    if not section_map:
        return None, None

    query_l = query.lower()
    query_terms = set(query_l.split())

    titles = list(section_map.keys())
    title_embs = embedder.encode(titles)
    q_emb = embedder.encode([query])[0]

    sims = cosine_similarity([q_emb], title_embs)[0]

    final_scores = []

    for i, title in enumerate(titles):
        title_l = title.lower()

        # lexical overlap bonus (NO hardcoding)
        overlap = sum(1 for t in query_terms if t in title_l)

        # final score
        score = sims[i] + (0.25 * overlap)
        final_scores.append(score)

    best_idx = max(range(len(final_scores)), key=lambda i: final_scores[i])

    return titles[best_idx], section_map[titles[best_idx]]

### Maximal Marginal Relevance (MMR) for Diverse Sentence Selection

##### This function selects the most relevant yet diverse sentences by balancing query relevance and redundancy using the MMR algorithm. It ensures that retrieved results are both highly related to the query and minimally repetitive, improving answer quality and coverage.

In [69]:
def mmr(query, sentences, k=6, lambda_param=0.7):
    #Gives 70% importance to relevance,Gives 30% importance to diversity
    sent_embs = embedder.encode(sentences)
    q_emb = embedder.encode([query])[0]

    selected = []
    used = set()

    for _ in range(min(k, len(sentences))):
        scores = []
        for i, emb in enumerate(sent_embs):
            if i in used:
                continue
            relevance = cosine_similarity([q_emb], [emb])[0][0]
            diversity = max(
                [cosine_similarity([emb], [sent_embs[j]])[0][0] for j in used],
                default=0
            )
            score = lambda_param * relevance - (1 - lambda_param) * diversity
            scores.append((score, i))

        if not scores:
            break

        best = max(scores)[1]
        used.add(best)
        selected.append(sentences[best])

    return selected


### Prompt Construction for Grounded LLM Responses

##### This function constructs a strict, context-bound prompt that limits the LLM to retrieved policy content only.It explicitly prevents hallucination by instructing the model to avoid external knowledge and unsupported generalizations.

In [70]:
def build_llm_prompt(query, retrieved_sentences):

    context = "\n".join(f"- {s}" for s in retrieved_sentences)

    prompt = f"""
You are an enterprise HR policy assistant.

Answer the question using ONLY the information provided in the context below.
Do NOT add external knowledge.
Do NOT generalize.
If a detail is not explicitly mentioned, say "Not specified in the policy".

Context:
{context}

Question:
{query}

Answer:
"""
    return prompt


### LLM-Based Answer Generation

##### This function invokes a language model to generate a response based strictly on the constructed prompt and retrieved context.A low temperature is used to ensure deterministic, policy-grounded answers suitable for enterprise use.

In [71]:
def generate_llm_answer(prompt):
    response = client.chat.completions.create(
        model="gpt-4o-mini",   # or gpt-3.5-turbo
        messages=[
            {"role": "system", "content": "You are a helpful enterprise policy assistant."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.2
    )
    return response.choices[0].message.content.strip()


### Content Richness Validation for Sections

##### This function checks whether a document section contains sufficient meaningful text to be used for retrieval and answer generation.It prevents title-only or sparse sections from being selected, improving retrieval quality and response relevance.

In [72]:
def is_content_rich(section):
    return (
        len(section["content"].strip()) > 60 and
        len(section["content"].split()) > 10
    )


### End-to-End Query Answering with Knowledge-Guided RAG

##### This function answers user queries by first leveraging rule-based structured knowledge to identify relevant sections, followed by semantic fallback when needed.The selected content is refined using MMR and passed to an LLM to generate a grounded, context-aware response.

In [83]:
def answer_query(query):

    # STEP 1: Knowledge Graph candidate generation
    normalized_query = normalize_query_for_retrieval(query)
    kg_candidates = query_knowledge_graph(normalized_query)

    if kg_candidates:
        grouped = group_by_section(sections, kg_candidates)
        best_title, _ = select_best_kg_section(normalized_query, grouped)

        if best_title:
            candidate = next(s for s in sections if s["title"] == best_title)


            if not is_content_rich(candidate):
               print("Title-only section detected, falling back to content section")

                # fallback: closest section with real content
               content_sections = [s for s in sections if is_content_rich(s)]
               # reuse your existing semantic logic
               candidate = select_best_section(content_sections, normalized_query)

            full_section = candidate
            #print("SELECTED SECTION:", full_section["title"])
            #print("CONTENT PREVIEW:", full_section["content"][:300])

            #normalized_query = normalize_query_for_retrieval(query)
            sentences = sent_tokenize(full_section["content"])
            final = mmr(normalized_query, sentences)
            retrieved_indices = [sentences.index(s) for s in final if s in sentences]

            #print("MMR SENTENCES:", final)

            contextual_query = f"{full_section['title']} details"
            prompt = build_llm_prompt(contextual_query, final)

            try:
              llm_answer = " ".join(final)
            except Exception as e:
              llm_answer = "LLM unavailable due to quota limit. Showing retrieved context only."
              print("LLM Error:", e)            

            print("\n===== ANSWER (KG + RAG + LLM) =====\n")
            print(f" Section: {best_title}\n")
            print("LLM answer")
            print(llm_answer)
            return {
            "answer": llm_answer,
            "retrieved_indices": retrieved_indices,
            "sentences": sentences
            }

    # STEP 2: Semantic fallback
    cluster_id = select_cluster(normalized_query)
    cluster_sections = cluster_map[cluster_id]

    full_section = select_best_section(cluster_sections, normalized_query)

    if not full_section:
        print("\n===== ANSWER =====\n")
        print("The requested information is not available in the current knowledge base.")
        return

    #normalized_query = normalize_query_for_retrieval(query)
    sentences = sent_tokenize(full_section["content"])
    final = mmr(normalized_query, sentences)
    retrieved_indices = [sentences.index(s) for s in final if s in sentences]

    print("MMR SENTENCES:", final)

    contextual_query = f"{full_section['title']} details"
    prompt = build_llm_prompt(contextual_query, final)

    try:
       llm_answer =" ".join(final)
    except Exception as e:
       llm_answer = "LLM unavailable due to quota limit. Showing retrieved context only."
       print("LLM Error:", e)

    print("\n===== ANSWER (RAG + LLM) =====\n")
    print(f"Section: {full_section['title']}\n")
    print("LLM answer")
    print(llm_answer)
    return {
    "answer": llm_answer,
    "retrieved_indices": retrieved_indices,
    "sentences": sentences
    }


In [84]:
def precision_at_k(retrieved, relevant, k):
    """
    Precision@K = (# relevant retrieved in top K) / K
    retrieved: ranked list of sentence indices
    relevant: list of relevant sentence indices
    k: cutoff
    """
    if k == 0:
        return 0.0

    retrieved_k = retrieved[:k]
    rel_count = len(set(retrieved_k) & set(relevant))

    return rel_count / k

def recall_at_k(retrieved, relevant, k):
    """
    Recall@K = (# relevant retrieved in top K) / (# relevant)
    retrieved: ranked list of sentence indices
    relevant: list of relevant sentence indices
    k: cutoff
    """
    if not relevant:
        return 0.0

    retrieved_k = retrieved[:k]
    rel_count = len(set(retrieved_k) & set(relevant))

    return rel_count / len(relevant)


def mean_reciprocal_rank_single(retrieved, relevant):
    """
    Computes MRR for a single query
    retrieved: ranked list of sentence indices
    relevant: list of relevant sentence indices
    """
    for rank, idx in enumerate(retrieved, start=1):
        if idx in relevant:
            return 1 / rank
    return 0.0
def confidence_score_from_retrieval(retrieved, k=5):
    if not retrieved:
        return 0.0
    return round(min(len(retrieved), k) / k, 3)


In [85]:
result = answer_query("pregnancy leave policy")
retrieved = result["retrieved_indices"]
sentences = result["sentences"]




Title-only section detected, falling back to content section

===== ANSWER (KG + RAG + LLM) =====

 Section: PAGE 4 - MATERNITY LEAVE POLICY

LLM answer
MATERNITY LEAVE POLICY
4.1 Eligibility:
Female employees who have completed 6 months of continuous service are eligible
for maternity leave. 4.2 Leave Duration:
- Total maternity leave: 16 weeks (112 days)
- Can be taken up to 4 weeks before expected delivery date
- Minimum 12 weeks must be taken after delivery
4.3 Paid Leave:
- First 12 weeks: 100% of base salary
- Weeks 13-16: 50% of base salary
- Benefits continue during entire leave period
4.4 Application Process: 4.


In [86]:
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np

# ---------------------------------
# Build corpus ONLY from the section
# ---------------------------------
corpus = sentences
query_text = "pregnancy leave policy"

vectorizer = TfidfVectorizer(stop_words="english")
tfidf_matrix = vectorizer.fit_transform(corpus)
query_vec = vectorizer.transform([query_text])

# ---------------------------------
# Compute relevance scores
# ---------------------------------
similarity_scores = (tfidf_matrix @ query_vec.T).toarray().ravel()

# ---------------------------------
# Define relevant sentences
# Rule: sentences with score >= mean score
# ---------------------------------
mean_score = np.mean(similarity_scores)
relevant = [
    i for i, score in enumerate(similarity_scores)
    if score >= mean_score
]

# ---------------------------------
# Metrics
# ---------------------------------
k = min(3, len(retrieved))

precision = precision_at_k(retrieved, relevant, k)
recall = recall_at_k(retrieved, relevant, k)
mrr = mean_reciprocal_rank_single(retrieved, relevant)

print(f"Precision@{k}: {precision:.3f}")
print(f"Recall@{k}: {recall:.3f}")
print(f"MRR: {mrr:.3f}")


Precision@3: 0.333
Recall@3: 1.000
MRR: 1.000


In [4]:
!pip install gradio


Collecting gradio
  Downloading gradio-6.5.1-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting brotli>=1.1.0 (from gradio)
  Downloading brotli-1.2.0-cp310-cp310-win_amd64.whl.metadata (6.3 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.128.0-py3-none-any.whl.metadata (30 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-1.0.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==2.0.3 (from gradio)
  Downloading gradio_client-2.0.3-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.11.6-cp310-cp310-win_amd64.whl.metadata (43 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipar


[notice] A new release of pip is available: 25.3 -> 26.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import gradio as gr
import traceback

def chat_interface(user_query):
    try:
        result = answer_query(user_query)
        print("DEBUG result:", result)  # shows in notebook output

        # CASE 1: result is dict
        if isinstance(result, dict):
            return result.get("answer", "No 'answer' key found")

        # CASE 2: result is already string
        if isinstance(result, str):
            return result

        return "Unexpected return type"

    except Exception as e:
        print("ERROR TRACEBACK:")
        traceback.print_exc()
        return f"Error occurred: {str(e)}"

gr.Interface(
    fn=chat_interface,
    inputs=gr.Textbox(label="Ask a question"),
    outputs=gr.Textbox(label="Answer"),
    title="Simple RAG Chatbot"
).launch(share=False)


* Running on local URL:  http://127.0.0.1:7864
* To create a public link, set `share=True` in `launch()`.




ERROR TRACEBACK:


Traceback (most recent call last):
  File "C:\Users\91979\AppData\Local\Temp\ipykernel_10860\631478515.py", line 6, in chat_interface
    result = answer_query(user_query)
NameError: name 'answer_query' is not defined. Did you mean: 'user_query'?


Created dataset file at: .gradio\flagged\dataset1.csv


: 

In [None]:
# %%writefile midsemcode.py

# # --------- COPY FROM HERE ---------

# # Paste ALL imports that answer_query depends on
# # Example (yours may have more):
# import re
# import numpy as np
# from 
#  import SentenceTransformer
# from sklearn.metrics.pairwise import cosine_similarity

# # Paste any global variables/models used by answer_query
# # Example:
# # model = SentenceTransformer("all-MiniLM-L6-v2")
# # sentences = [...]
# # embeddings = [...]

# def answer_query(query):
#     # â¬‡â¬‡â¬‡â¬‡â¬‡â¬‡â¬‡â¬‡â¬‡â¬‡â¬‡â¬‡â¬‡â¬‡â¬‡â¬‡â¬‡â¬‡â¬‡â¬‡
#     # COPY THE *ENTIRE CONTENT* OF YOUR
#     # EXISTING answer_query FUNCTION HERE
#     # EXACTLY AS IT IS
#     # â¬†â¬†â¬†â¬†â¬†â¬†â¬†â¬†â¬†â¬†â¬†â¬†â¬†â¬†â¬†â¬†â¬†â¬†â¬†â¬†

#     return result

# # --------- END COPY ---------


Writing midsemcode.py


In [None]:
def answer_query(query):
    # COPY EVERYTHING INSIDE YOUR EXISTING answer_query FUNCTION
    # EXACTLY AS IT IS
    return result
