In [2]:
import sys
import torch

print("Python:", sys.version)
print("Executable:", sys.executable)
print("Torch:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())


Python: 3.10.19 | packaged by Anaconda, Inc. | (main, Oct 21 2025, 16:41:31) [MSC v.1929 64 bit (AMD64)]
Executable: c:\Users\AICOE\anaconda3\envs\rag_env\python.exe
Torch: 2.5.1+cu121
CUDA available: True


In [4]:
import os
import numpy as np
import faiss

from sentence_transformers import SentenceTransformer
from openai import OpenAI


In [5]:
import os
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

assert OPENAI_API_KEY is not None, "OPENAI_API_KEY is not set in environment variables."

In [6]:
client = OpenAI(
    api_key=os.environ.get("OPENAI_API_KEY")
)

assert os.environ.get("OPENAI_API_KEY") is not None, "OPENAI_API_KEY not set"


In [7]:
policy_chunks = [
    {
        "text": (
            "Grievances must be submitted within 90 days from the date of occurrence. "
            "Grievances submitted after 90 days shall not be entertained unless supported "
            "by valid documentary evidence."
        ),
        "metadata": {
            "policy_id": "HED_GRS",
            "section": "Time Limit for Raising Grievances",
            "clause_id": "4.1"
        }
    },
    {
        "text": (
            "A grievance submission must include applicant’s full name, registered mobile number "
            "and email ID, institution name, detailed description of the grievance, and supporting "
            "documents if applicable. Incomplete submissions may be rejected."
        ),
        "metadata": {
            "policy_id": "HED_GRS",
            "section": "Required Information",
            "clause_id": "4.2"
        }
    },
    {
        "text": "Scholarship-related grievances shall be resolved within 30 working days.",
        "metadata": {
            "policy_id": "HED_GRS",
            "section": "Resolution Timeline",
            "clause_id": "5.2-SCH"
        }
    },
    {
        "text": (
            "A grievance shall be rejected if it falls outside the defined scope, "
            "is submitted beyond the 90-day limit without justification, "
            "mandatory information is missing, or the matter is sub judice."
        ),
        "metadata": {
            "policy_id": "HED_GRS",
            "section": "Grounds for Rejection",
            "clause_id": "6.0"
        }
    }
]


In [8]:
device = "cuda" if torch.cuda.is_available() else "cpu"

embedder = SentenceTransformer(
    "all-MiniLM-L6-v2",
    device=device
)

print("Embedding model loaded on:", device)


Embedding model loaded on: cuda


In [9]:
policy_texts = [c["text"] for c in policy_chunks]

policy_embeddings = embedder.encode(
    policy_texts,
    normalize_embeddings=True
)

policy_embeddings.shape


(4, 384)

In [10]:
dim = policy_embeddings.shape[1]

index = faiss.IndexFlatIP(dim)  # cosine similarity
index.add(np.array(policy_embeddings))

print("Total policy vectors:", index.ntotal)


Total policy vectors: 4


In [19]:
SIMILARITY_THRESHOLD = 0.60
TOP_K = 5

def retrieve_policy_chunks(email_text):
    query_embedding = embedder.encode([email_text], normalize_embeddings=True)
    scores, indices = index.search(query_embedding, TOP_K)

    results = []
    for score, idx in zip(scores[0], indices[0]):
        if idx == -1:
            continue
        if score >= SIMILARITY_THRESHOLD:
            results.append({
                "text": policy_chunks[idx]["text"],
                "metadata": policy_chunks[idx]["metadata"],
                "score": float(score)
            })

    # Fallback: include top-1 if nothing passed threshold
    if not results and indices[0][0] != -1:
        idx = indices[0][0]
        results.append({
            "text": policy_chunks[idx]["text"],
            "metadata": policy_chunks[idx]["metadata"],
            "score": float(scores[0][0]),
            "fallback": True
        })

    return results



In [20]:
email_text = "I submitted my grievance after 5 months. Will it be accepted?"

retrieved_chunks = retrieve_policy_chunks(email_text)

for r in retrieved_chunks:
    print("Score:", r["score"])
    print("Clause:", r["metadata"]["clause_id"])
    print("Text:", r["text"])
    print("-" * 60)


Score: 0.7077915668487549
Clause: 4.2
Text: A grievance submission must include applicant’s full name, registered mobile number and email ID, institution name, detailed description of the grievance, and supporting documents if applicable. Incomplete submissions may be rejected.
------------------------------------------------------------
Score: 0.6974130272865295
Clause: 6.0
Text: A grievance shall be rejected if it falls outside the defined scope, is submitted beyond the 90-day limit without justification, mandatory information is missing, or the matter is sub judice.
------------------------------------------------------------
Score: 0.601677656173706
Clause: 4.1
Text: Grievances must be submitted within 90 days from the date of occurrence. Grievances submitted after 90 days shall not be entertained unless supported by valid documentary evidence.
------------------------------------------------------------


In [21]:
retrieved = retrieve_policy_chunks(email_text)

print("Retrieved chunks:", len(retrieved))

# TEMP: print raw scores for debugging
query_embedding = embedder.encode([email_text], normalize_embeddings=True)
scores, indices = index.search(query_embedding, 5)

for score, idx in zip(scores[0], indices[0]):
    print("Score:", score, "| Clause:", policy_chunks[idx]["metadata"]["clause_id"])


Retrieved chunks: 3
Score: 0.70779157 | Clause: 4.2
Score: 0.697413 | Clause: 6.0
Score: 0.60167766 | Clause: 4.1
Score: 0.4711115 | Clause: 5.2-SCH
Score: -3.4028235e+38 | Clause: 6.0


In [22]:
SYSTEM_PROMPT = """
You are a policy-bound compliance assistant for the Higher Education Department.

MANDATORY RULES:
- Use ONLY the provided policy excerpts.
- Do NOT use general or prior knowledge.
- Do NOT invent rules, timelines, or exceptions.
- If the policy does not explicitly cover the case, clearly say so.
- Cite the policy clause(s) used.
- Maintain a formal, official government tone.
"""


In [23]:
def build_user_prompt(email_text, policy_chunks):
    policy_context = "\n\n".join(
        f"[Clause {c['metadata']['clause_id']}] {c['text']}"
        for c in policy_chunks
    )

    return f"""
USER EMAIL:
{email_text}

APPLICABLE POLICY EXCERPTS:
{policy_context}

TASK:
Generate a suggested reply strictly based on the policy.
Include:
1. Decision
2. Policy justification
3. Formal email reply
"""


In [24]:
def call_openai(system_prompt, user_prompt):
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        temperature=0.1,  # VERY IMPORTANT
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]
    )

    return response.choices[0].message.content


In [25]:
def generate_policy_reply(email_text):
    retrieved = retrieve_policy_chunks(email_text)

    if not retrieved:
        return "The submitted grievance is not covered under the current SOP."

    user_prompt = build_user_prompt(email_text, retrieved)

    return call_openai(SYSTEM_PROMPT, user_prompt)


In [27]:
final_reply = generate_policy_reply(
    "I submitted my grievance after 91 days. Will it be accepted?"
)

print(final_reply)


**Decision:** Your grievance will be rejected as it was submitted after the 90-day limit without justification.

**Policy Justification:** According to Clause 4.1, grievances must be submitted within 90 days from the date of occurrence. Grievances submitted after this period shall not be entertained unless supported by valid documentary evidence. Since your submission was made after 91 days and no justification was provided, it falls outside the defined scope as stated in Clause 6.0.

**Formal Email Reply:**

Subject: Grievance Submission Status

Dear [User's Name],

Thank you for your email regarding the status of your grievance submission.

After reviewing your case, I regret to inform you that your grievance will be rejected as it was submitted after the 90-day limit without the necessary justification. According to Clause 4.1 of the grievance policy, grievances must be submitted within 90 days from the date of occurrence. Furthermore, Clause 6.0 states that grievances submitted bey