In [1]:
import json
##query_vector_database_tool (RAG)
import os
from openai import OpenAI

open_router_key = os.getenv("OPENROUTER_API_KEY", "")
BASE_URL = "https://openrouter.ai/api/v1"
MODEL = "google/gemini-3-flash-preview"
openrouter = OpenAI(api_key=open_router_key, base_url=BASE_URL)

def call_llm(MODEL, system_message, user_message):
    messages = [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_message},
    ]
    response = openrouter.chat.completions.create(model=MODEL, messages=messages)
    return response.choices[0].message.content



## vector DB utilities for DHRP documents
import chromadb
from sentence_transformers import SentenceTransformer
from pathlib import Path
DB_PATH = r"C:\Users\rauna\projects\My Projects\IPO_Checker\ChromaDB"
chroma_client = chromadb.PersistentClient(path=DB_PATH)
COLLECTION_NAME = 'dhrp_embeddings_collection'
EMBEDDING_MODEL = 'multi-qa-mpnet-base-dot-v1'
embedding_model = SentenceTransformer(EMBEDDING_MODEL)
dhrp_doc_collection = chroma_client.get_collection(COLLECTION_NAME)



def get_relevant_docs(question, collection, top_k=5):
    print('reteriving sentence documents for company...')

    embedded_question = embedding_model.encode(question)
    print(f"Embedded question:")

    results = dhrp_doc_collection.query(
            query_embeddings=[embedded_question],
            n_results=top_k
            )
        # display(f"Top {top_k} relevant documents for question '{question}':{results}\n")
    return results


def build_context_json(documents):
    print('building context in JSON...')

    docs = documents.get("documents", [[]])[0]      
    metadatas = documents.get("metadatas", [[]])[0]

    context_list = []

    for doc, meta in zip(docs, metadatas):
        entry = {
            "source": meta.get("source", "unknown_source.txt").replace(".txt", ""),
            "page": meta.get("page", "Unknown page"),
            "content": doc.strip()
        }
        context_list.append(entry)

    return json.dumps({"context_results": context_list}, indent=4, ensure_ascii=False)

def rag_pipeline(parameters, collection):

    answers = {}
    for questions in parameters :

        for key, question in questions.items() :
            documents = get_relevant_docs(question, collection)
            context_json = build_context_json(documents)
        

            system_message = f'''You are a Senior IPO Investment Analyst and SEBI-registered Research Analyst equivalent. 
                Use Indian IPO Draft Red Herring Prospectus (DRHP) documents to answer the questions accurately.
                Summaralize and reference the relevant sections from the DRHP documents in your answers.
                Use the following context to answer the questions.\n\nContext:\n
                {context_json}'''
          
            # print(f'system_message prepared.{system_message}')

            answer = call_llm(MODEL,system_message,question)
            print('llm answers')
            answers[key] =answer

    return answers


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
eval_parameters = [
    {"Manufacturing Capacity Utilization": "Closer to 100% utilization (to witness economies of scale and reduce cost per unit)"},
    {"Raw Material Source Reliability": "Analyze potential trade disruptions if raw material comes from sensitive locations (e.g., China)"},
    {"Management Future Plans": "A clean and clear thought process on future plans and strategies is preferred"},

    {"GDP Growth Rate (Annual)": "Check the specific rate and comparative performance (e.g., India growing faster than China)"},
    {"Labor Cost": "Check relative costs (e.g., labor cost in India was much lower than China)"},
    {"Macro Indicators": "Review unemployment rate, CRR, balance of trade, and current account to GDP percentage"},
    {"Industry Growth Rate (CAGR)": "The overall industry should be performing well. Compare current growth rate to expected growth rate"},
    {"Expected Industry Growth Rate": "Prefer expected growth rate to be higher than the current growth rate (e.g., Indian specialty chemical industry expected at 10.8% vs 9.8% previously)"},

    {"Revenue, EBITDA, and Profit After Tax (PAT) Growth Rate": "Should ideally be higher than the industry growth rate (e.g., 24% company growth vs 14% industry growth)"},
    {"Debt to Equity Ratio": "A ratio up to 2:1 is generally considered okay"},
    {"Cash Flow from Operations (CFO)": "Must be positive and growing at a good rate"},

    {"P/E Ratio (Price to Earning Ratio)": "Compare against listed peers; caution if the P/E is significantly higher (Use P/S ratio if the company is in losses)"},
    {"Return on Net Worth (RONW)": "Compare against peers; higher RONW is preferable"},
    {"Comparable Transaction Multiple (CTM)": "IPO price should be justified if it is way higher than the price the company issued shares at recently"},
    {"Gray Market Premium (GMP)": "Provides guidance: IPO Price + GMP = Expected Listing Price (not guaranteed)"},

    {"Type of Issue": "Prefer a Fresh Issue over a major Offer For Sale (OFS)"},
    {"Use of Proceeds": "Prefer funds used for expansion purpose over general corporate purpose"},
    {"Litigation/Investigation": "Be cautious if serious agencies like CBI or SFIO are investigating the promoters"},
    {"Customer Concentration": "Avoid when a small number of clients account for a high percentage of revenue (e.g., top 10 customers = 90%)"},
]

In [15]:
eval_questions = [
    {"Manufacturing Capacity Utilization": "What is the current utilization level of the company's manufacturing capacity (to witness economies of scale and reduce cost per unit)?"},

    {"Raw Material Source Reliability": "How reliable are the company's raw material sources, and are there potential trade disruptions due to sourcing from sensitive locations such as China?"},

    {"Management Future Plans": "What are the company management's clearly stated future plans and strategies?"},

    {"GDP Growth Rate (Annual)": "What is the current GDP growth rate and how does it compare with other relevant economies (e.g., India vs China)?"},

    {"Labor Cost": "What is the comparative labor cost for the regions in which the company operates (e.g., India vs China)?"},

    {"Macro Indicators": "What do the key macro indicators—unemployment rate, CRR, balance of trade, and current account to GDP percentage—indicate about the economic environment?"},

    {"Industry Growth Rate (CAGR)": "What is the current CAGR of the industry, and is the overall industry performing well compared to historical or expected growth?"},

    {"Expected Industry Growth Rate": "What is the expected future industry growth rate, and is it higher than the current growth rate?"},

    {"Revenue, EBITDA, and Profit After Tax (PAT) Growth Rate": "What are the company's Revenue, EBITDA, and PAT growth rates, and are they higher than the industry growth rate?"},

    {"Debt to Equity Ratio": "What is the company's current debt-to-equity ratio, and is it within the generally acceptable threshold of up to 2:1?"},

    {"Cash Flow from Operations (CFO)": "Is the company's cash flow from operations positive and growing at a healthy rate?"},

    {"P/E Ratio (Price to Earning Ratio)": "How does the company's P/E ratio compare with listed peers, and is the valuation justified (or should P/S be used if the company is loss-making)?"},

    {"Return on Net Worth (RONW)": "What is the company's Return on Net Worth, and how does it compare to peer companies?"},

    {"Comparable Transaction Multiple (CTM)": "Is the IPO price justified when compared with recent share issuance prices based on Comparable Transaction Multiples?"},

    {"Gray Market Premium (GMP)": "What is the current Gray Market Premium (GMP), and what expected listing price does it imply (while noting it is not guaranteed)?"},

    {"Type of Issue": "Is the IPO structured as a Fresh Issue, an Offer For Sale (OFS), or a mix—and what is the proportion of each?"},

    {"Use of Proceeds": "How does the company intend to use the IPO proceeds, and what portion is allocated toward expansion versus general corporate purposes?"},

    {"Litigation/Investigation": "Are there any ongoing litigations or investigations involving the company or promoters, especially by serious agencies like CBI or SFIO?"},

    {"Customer Concentration": "What is the level of customer concentration, and do a small number of clients contribute disproportionately to revenue (e.g., top 10 customers = 90%)?"}
]


In [4]:
for i in range(len(eval_questions)) :
    topics = list(eval_questions[i].keys())
    print(topics)

['Manufacturing Capacity Utilization']
['Raw Material Source Reliability']


In [None]:
def ipo_rag_answerer(parameters,vector_collection = dhrp_doc_collection):
    results = []
    results.append(rag_pipeline(parameters, vector_collection))
    return results

In [None]:
# ipo_tester(eval_questions)

In [None]:
# def ipo_evaluator(company_questions, ideal_answers):
#     results = []
#     for i in range(len(company_questions)):
#         company_answer = ipo_rag_answerer([company_questions[i]])
#         # company_answer = company_questions[i]
#         ideal_answer = ideal_answers[i]
#         ipo_expert_prompt = '''You are an expert IPO analyst.
#                         Evaluate the company answer against the ideal answer and provide a score from 1 to 10, where 10 indicates a perfect match.  
#                         Consider accuracy, completeness, relevance, clarity, and depth of analysis in your evaluation.
#                         Only provide the score as a number from 1 to 10.'''
#         evaluation_prompt = f"Evaluate the following company answer against the ideal answer.\n\nCompany Answer: {company_answer}\n\nIdeal Answer: {ideal_answer}\n\nProvide a score from 1 to 10."
#         evaluation = call_llm(MODEL, ipo_expert_prompt, evaluation_prompt)
#         results.append(evaluation)
#     return results

In [None]:
def ipo_evaluator(company_questions, ideal_answers):

    results = []

    for i in range(len(company_questions)):

        topics = list(company_questions[i].keys())
        # questions = company_questions[i].values()

        company_answer = ipo_rag_answerer([company_questions[i]])[0].values()

        for topic, company_answer in zip(topics, company_answer):
            # print(f"Topic: {topic}, Answer: {company_answer}")
            results.append({topic: company_answer})


        ideal_answer = ideal_answers[i]
        print(ideal_answer)
        results.append({"ideal_answer": ideal_answer})

        ipo_expert_prompt = '''You are an expert IPO analyst.
                        Evaluate the company answer against the ideal answer and provide a score from 1 to 10, where 10 indicates a perfect match.  
                        Consider accuracy, completeness, relevance, clarity, and depth of analysis in your evaluation.
                        Provide only score and nothing else. 
                        '''
        
        evaluation_prompt = f'''Evaluate the following company answer against the ideal answer.\n\n
                            Topic: {topic}\n\n
                            Company Answer: {company_answer}\n\n
                            Ideal Answer: {ideal_answer}\n\n
                            Understand the Company Answer and give the result a score from 1 to 10.
                            Only provide the score as a number from 1 to 10.'''
        
        evaluation = call_llm(MODEL, ipo_expert_prompt, evaluation_prompt)
        results.append({"evaluation_score": evaluation})
    return results

In [8]:
output = ipo_evaluator(eval_questions, eval_parameters)

reteriving sentence documents for company...
Embedded question:
building context in JSON...
llm answers
{'Manufacturing Capacity Utilization': 'Closer to 100% utilization (to witness economies of scale and reduce cost per unit)'}


In [10]:

display(output)
len(output)

[{'Manufacturing Capacity Utilization': 'Based on the DRHP of KSH International Limited, the company\'s current utilization level and manufacturing capacity details are as follows:\n\n### **Manufacturing Capacity Utilization (Fiscal 2025)**\nFor the full **Fiscal Year 2025**, the company operated at the following levels:\n*   **Total Production Capacity:** 29,045 MT\n*   **Magnet Winding Wires Sales Volume:** 23,324 MT\n*   **Implied Utilization:** Based on sales volume against total capacity, the utilization for FY25 stands at approximately **80.30%**.\n\n### **Recent Performance (Q1 Fiscal 2026)**\nFor the three-month period ended **June 30, 2025**:\n*   **Total Production Capacity:** 29,045 MT (Annualized basis)\n*   **Magnet Winding Wires Sales Volume:** 6,114 MT\n*   **Implied Utilization:** Approximately **84.20%** (on an annualized quarterly basis).\n\n### **Historical Capacity & Growth**\nThe company has been increasing its capacity and sales volume consistently to drive econom

3

In [11]:
def avg_score(evaluation_result):
    total_score = 0
    count = 0

    for item in evaluation_result:
        if "evaluation_score" in item:
            try:
                score = int(item["evaluation_score"])
                total_score += score
                count += 1
            except ValueError:
                print(f"Invalid score format: {item['evaluation_score']}")

    average = total_score / count if count > 0 else 0
    return average  

In [30]:
def ipo_result(ipo_name,history):
    evaluation_result = ipo_evaluator(eval_questions, eval_parameters)
    average_score = avg_score(evaluation_result)
    result_summary = {
        "IPO Name": ipo_name,
        "Average Evaluation Score": average_score,
        "Detailed Evaluation": evaluation_result
    }
    return result_summary

In [20]:
final_result = ipo_result("KSH")

reteriving sentence documents for company...
Embedded question:
building context in JSON...
llm answers
{'Manufacturing Capacity Utilization': 'Closer to 100% utilization (to witness economies of scale and reduce cost per unit)'}
reteriving sentence documents for company...
Embedded question:
building context in JSON...
llm answers
{'Raw Material Source Reliability': 'Analyze potential trade disruptions if raw material comes from sensitive locations (e.g., China)'}
reteriving sentence documents for company...
Embedded question:
building context in JSON...
llm answers
{'Management Future Plans': 'A clean and clear thought process on future plans and strategies is preferred'}
reteriving sentence documents for company...
Embedded question:
building context in JSON...
llm answers
{'GDP Growth Rate (Annual)': 'Check the specific rate and comparative performance (e.g., India growing faster than China)'}
reteriving sentence documents for company...
Embedded question:
building context in JSON.

In [21]:
display(final_result)

{'IPO Name': 'KSH',
 'Average Evaluation Score': 8.263157894736842,
 'Detailed Evaluation': [{'Manufacturing Capacity Utilization': "Based on the DRHP for KSH International, the current capacity utilization for the most recent period (three-month period ended June 30, 2025) is approximately **84.20%**.\n\n### **Data Analysis (Derived from Operating KPIs):**\n| Metric | 3-Months Ended June 30, 2025 | Fiscal 2025 | Fiscal 2024 | Fiscal 2023 |\n| :--- | :--- | :--- | :--- | :--- |\n| **Production Capacity (MT)** | 29,045 (Annualized) | 29,045 | 28,436 | 25,265 |\n| **Sales Volume (MT)** | 6,114 | 23,324 | 21,495 | 17,645 |\n| **Calculated Utilization** | ~84.20%* | ~80.30% | ~75.59% | ~69.84% |\n\n*\\*Calculation Note: For the three-month period, the sales volume of 6,114 MT against an available quarterly capacity (approx. 7,261 MT) reflects an utilization of ~84.2%.*\n\n**Reference:** \n*   **Operating KPIs Table (Pages 129 & 360):** These sections provide cumulative production capacity 

In [23]:
import gradio as gr

In [32]:
chat = gr.ChatInterface(fn=ipo_evaluator)
chat.launch()


* Running on local URL:  http://127.0.0.1:7865
* To create a public link, set `share=True` in `launch()`.




Traceback (most recent call last):
  File "c:\Users\rauna\projects\My Projects\IPO_Checker\.venv_ipo\Lib\site-packages\gradio\queueing.py", line 763, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    ...<5 lines>...
    )
    ^
  File "c:\Users\rauna\projects\My Projects\IPO_Checker\.venv_ipo\Lib\site-packages\gradio\route_utils.py", line 354, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    ...<11 lines>...
    )
    ^
  File "c:\Users\rauna\projects\My Projects\IPO_Checker\.venv_ipo\Lib\site-packages\gradio\blocks.py", line 2125, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
    ...<8 lines>...
    )
    ^
  File "c:\Users\rauna\projects\My Projects\IPO_Checker\.venv_ipo\Lib\site-packages\gradio\blocks.py", line 1605, in call_function
    prediction = await fn(*processed_input)
               