In [1]:
import json
with open ('../data/database.json' , 'rt') as f_in:
    documents = json.load(f_in)

In [2]:
import minsearch

In [4]:
from sentence_transformers import SentenceTransformer

In [5]:
model_name = 'multi-qa-MiniLM-L6-cos-v1'
model = SentenceTransformer(model_name)

In [3]:
query = 'I am an upcoming Phd Student at NIT warangal, can you please let me know how to the registration and document uploading on SMILE ERP portal for Ph.D?'
# qv = model.encode(query)
# len(qv)

In [4]:
from elasticsearch import Elasticsearch
es_client = Elasticsearch('http://localhost:9200')
index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "url": {"type": "text"},
            "date": {"type": "text"},
            "title": {"type": "text"},
            "info": {"type": "text"}
        }
    }
}

index_name = "course-questions"

es_client.indices.delete(index=index_name, ignore_unavailable=True)
es_client.indices.create(index=index_name, body=index_settings)


ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'course-questions'})

In [5]:
from tqdm.auto import tqdm
for doc in tqdm(documents):
    es_client.index(index=index_name, document=doc)

  0%|          | 0/89 [00:00<?, ?it/s]

In [6]:
def elastic_search(query):
    search_query = {
        "size": 5,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["title^2", "info"],
                        "type": "best_fields"
                    }
                }
            }
        }
    }

    response = es_client.search(index=index_name, body=search_query)
    
    result_docs = []
    
    for hit in response['hits']['hits']:
        result_docs.append(hit['_source'])
    
    return result_docs

In [7]:
search_results = elastic_search(query=query)

In [8]:
search_results

[{'url': 'https://nitw.ac.in/cdis/assets/images/docs/Admission_Steps_ERP.pdf',
  'date': '2025-05-26',
  'title': 'Ph.D./ MBA/ M.Tech. (Self-Financed) Admissions - 2025 Registration and Document Uploading on SMILE ERP Portal | Instructions for registration and document uploading on SMILE ERP portal for Ph.D./ MBA / M.Tech. (Self-Financed) students 2025.',
  'info': 'SMILE Instruction for Reporting \n\nStep 1 Go to:- https://erp.nitw.ac.in/ext/adm/login \n\nStep 2 Follow the below Steps \n\nContact Details: For Admissions related quiries : email id -> admissions@nitw.ac.in For Technical related quiries : email id -> erp_support@nitw.ac.in \n\nClick on \n\nEnter the Email, Mobile No, Full Name & Date of Birth. The data must be entered same as provided during application by the candidate. Then click on \n\nA Token and an OTP will be sent to your registered email as two separate mails. \n\nEnter the Token and an OTP sent to your registered email. create your own password and Confirm Passwo

In [1]:
def prompt_builder(query, search_results):       
        prompt_template = """You are an AI assistant designed to help students of NIT Warangal (NITW) by answering their questions accurately and responsibly.

        You are provided with CONTEXT retrieved from trusted NITW sources. Your job is to:
        
        1.  Base your answer *primarily* on the provided CONTEXT. Synthesize information from the context to address the user's query as accurately as possible.
        2.  Do NOT use any outside or prior knowledge. Your response must be derived *solely* from the provided context.
        3.  **Handling Insufficient or Loosely Related Context:**
            *   If the context does not contain a direct or complete answer to the query, do *not* invent information.
            *   Instead, summarize the most relevant information found in the context related to the query.
            *   If the context is only loosely related or minimal, acknowledge the query and provide the relevant context snippets or simply list the source URL(s) as the best available information based on the text provided.
            *   Do *not* use the phrase "I could not find a verified answer to that in the available information."
        4.  **Crucially:** Do NOT hallucinate. Only state facts or information that are explicitly mentioned or clearly inferable *from the provided context*.
        5.  ALWAYS cite the source(s) used by including the URL(s) at the end of your response.
        
        Now, answer the following question:{query}
        URL: {url}
        
        CONTEXT: {context}
        
        INSTRUCTIONS:
        - Only use facts and information derived *strictly* from the context.
        - Do not assume, generate, or state information not backed by the context.
        - If a direct answer isn't possible from the context, provide relevant summaries or snippets from the context instead.
        - Make your response clear and concise.
        - At the end of your answer, include a reference to the source like:
        (SOURCE: {url})""".strip()
        
        
        context = ""
        sources = ""
        for doc in search_results:
            context = context + f"title: {doc['title']}\ninfo: {doc['info']}\n\n"
            sources += f"url: {doc['url']}\n"
            
        
        prompt = prompt_template.format(url=sources, context=context, query = query).strip()
        return prompt

Collecting genai
  Downloading genai-2.1.0-py3-none-any.whl.metadata (6.5 kB)
Collecting openai<0.28.0,>=0.27.0 (from genai)
  Downloading openai-0.27.10-py3-none-any.whl.metadata (13 kB)
Collecting tabulate<0.10.0,>=0.9.0 (from genai)
  Downloading tabulate-0.9.0-py3-none-any.whl.metadata (34 kB)
Collecting tiktoken<0.4.0,>=0.3.2 (from genai)
  Downloading tiktoken-0.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.2 kB)
Downloading genai-2.1.0-py3-none-any.whl (16 kB)
Downloading openai-0.27.10-py3-none-any.whl (76 kB)
Downloading tabulate-0.9.0-py3-none-any.whl (35 kB)
Downloading tiktoken-0.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: tabulate, tiktoken, openai, genai
  Attempting uninstall: tiktoken
    Found existing installation: tiktoken 0.9.0
    Uninstalli

In [10]:
from openai import OpenAI

client = OpenAI(
    base_url="https://openrouter.ai/api/v1"
)

In [11]:
def llm(prompt):
    response = client.chat.completions.create(
        model="mistralai/mixtral-8x7b-instruct",  # Or any model from OpenRouter
        messages=[
            {"role": "user", "content": prompt }
        ]
    )
    
    return response.choices[0].message.content   

In [12]:
def rag(query):

    search_results = elastic_search(query)
    prompt = prompt_builder(query, search_results)
    answer = llm(prompt)
    return answer

In [13]:
answer = rag(query)
answer

" To register and upload documents for Ph.D./ MBA/ M.Tech. (Self-Financed) admissions at NIT Warangal, follow these steps:\n\n1. Go to: https://erp.nitw.ac.in/ext/adm/login\n2. After entering your email, mobile number, full name, and date of birth, click on 'Generate OTP and Token'. A Token and OTP will be sent to your registered email.\n3. Enter the Token and OTP sent to your email, create your own password and Confirm Password, then click on 'Register'.\n4. After successful registration, sign in using your email ID and password.\n5. Select your concerned advertisement, click on 'Apply', select the program and fill in the required details.\n6. Upload all the required documents and pay the fees (if applicable).\n7. Once each section is completed, 'Freeze' the application.\n8. After verification of your application, take a printout of the biodata from your login and submit it during physical reporting.\n\n(SOURCE: url: https://nitw.ac.in/cdis/assets/images/docs/Admission_Steps_ERP.pdf\n

In [15]:
Index = minsearch.Index(
    text_fields = [ "title" ,  "info" ],
    keyword_fields= []
)

In [16]:
Index.fit(documents)

<minsearch.Index at 0x7fd20f3e1c30>

In [17]:
 def search (query):
        boost = {"title": 1 , 'info': 1}
        results = Index.search(
            query=query,
            boost_dict= boost,
            num_results=5
        )

        return results

In [18]:
search(query)

[{'url': 'https://nitw.ac.in/cdis/assets/images/docs/Admission_Steps_ERP.pdf',
  'date': '2025-05-26',
  'title': 'Ph.D./ MBA/ M.Tech. (Self-Financed) Admissions - 2025 Registration and Document Uploading on SMILE ERP Portal | Instructions for registration and document uploading on SMILE ERP portal for Ph.D./ MBA / M.Tech. (Self-Financed) students 2025.',
  'info': 'SMILE Instruction for Reporting \n\nStep 1 Go to:- https://erp.nitw.ac.in/ext/adm/login \n\nStep 2 Follow the below Steps \n\nContact Details: For Admissions related quiries : email id -> admissions@nitw.ac.in For Technical related quiries : email id -> erp_support@nitw.ac.in \n\nClick on \n\nEnter the Email, Mobile No, Full Name & Date of Birth. The data must be entered same as provided during application by the candidate. Then click on \n\nA Token and an OTP will be sent to your registered email as two separate mails. \n\nEnter the Token and an OTP sent to your registered email. create your own password and Confirm Passwo