In [1]:
# Required Libraries Installation
!pip install --upgrade langchain-google-genai
!pip install langchain-community
!pip install PyPDF2
!pip install faiss-cpu
!pip install tiktoken
!pip install scikit-learn

Collecting langchain-google-genai
  Downloading langchain_google_genai-2.0.4-py3-none-any.whl.metadata (3.8 kB)
Downloading langchain_google_genai-2.0.4-py3-none-any.whl (41 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.8/41.8 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: langchain-google-genai
Successfully installed langchain-google-genai-2.0.4
Collecting langchain-community
  Downloading langchain_community-0.3.7-py3-none-any.whl.metadata (2.9 kB)
Collecting SQLAlchemy<2.0.36,>=1.4 (from langchain-community)
  Downloading SQLAlchemy-2.0.35-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting httpx-sse<0.5.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting langchain-core<0.4.0,>=0.3.17 (from lan

In [3]:
## Import Dependencies##
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain.vectorstores import FAISS
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from sklearn.metrics.pairwise import cosine_similarity
from langchain.docstore.document import Document
import os

# Initialize API Key
os.environ["GOOGLE_API_KEY"] = "Your Gemini API key"

# Load and Preprocess PDF
pdf_path = "/content/SJS Transcript Call.pdf"
pdf_reader = PdfReader(pdf_path)
raw_text = ''.join([page.extract_text() for page in pdf_reader.pages if page.extract_text()])

# Text Splitting
text_splitter = CharacterTextSplitter(separator="\n", chunk_size=600, chunk_overlap=150, length_function=len)
texts = text_splitter.split_text(raw_text)

# Embedding Initialize
embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

# Generate embeddings and store in FAISS Database
vector_store = FAISS.from_texts(texts, embedding_model)

# Define financial queries to use as context In simple Terms (FineTune For this specific Question).
financial_queries = [
    "What were the total consolidated revenues for the company in the last reported quarter?",
    "How much did the company’s revenue grow year-over-year?",
    "What are the projected revenue growth figures for the next fiscal year?",
    "What was the gross profit margin reported by the company?",
    "What were the EBITDA margins for the company and how have they changed?",
    "What is the current net debt of the company after recent acquisitions?",
    "How will recent acquisitions impact the company’s growth and margins?",
    # Revenue and Profitability
    "What were the total consolidated revenues for the company in the last reported quarter?",
    "How much did the company’s revenue grow year-over-year?",
    "What are the projected revenue growth figures for the next fiscal year?",
    "What is the gross profit margin reported by the company?",
    "What was the revenue contribution by different segments like automotive, two-wheelers, and consumer appliances?",

    # Operating Expenses and Margins
    "What were the EBITDA margins for the company and how have they changed over the previous quarters?",
    "What are the operating expenses for the company in the last reported quarter?",
    "What is the sustainable EBITDA margin expected over the next three years?",
    "Did the company report any one-time expenses that impacted EBITDA?",

    # Profit After Tax (PAT)
    "What is the Profit After Tax (PAT) reported for the latest quarter?",
    "What PAT growth has management projected for the next year?",

    # Debt and Cash Flow
    "What is the current net debt of the company after recent acquisitions?",
    "How much cash and cash equivalents does the company hold post-acquisition?",
    "What cash flow was generated in the last quarter?",

    # Capital Expenditure (CAPEX) and Investments
    "What are the CAPEX plans for the company in the current fiscal year?",
    "How much capital expenditure is planned for expansion or acquisition?",
    "Are there any deferred or postponed capital expenditures?",

    # Segment Analysis and Contributions
    "What percentage of revenue does each segment, such as automotive, passenger vehicles, and consumer appliances, contribute?",
    "How has the two-wheeler segment performed for the company in comparison to the automotive industry?",
    "What is the revenue split between domestic and export markets?",

    # Export Growth and Market Expansion
    "How has the company's export revenue grown in the last quarter?",
    "What is the company’s strategy to grow exports in regions like South America and Europe?",
    "What percentage of total revenue currently comes from exports?",

    # New Client and Customer Expansion
    "Which new clients were added by the company in the last quarter?",
    "What is the revenue potential from new customers added recently?",
    "How does the company plan to grow its wallet share with key customers like Mahindra, Hyundai, and Honda?",

    # Acquisition Impact and Synergies
    "How will recent acquisitions impact the company’s growth and margins?",
    "What revenue synergies does the company anticipate from recent acquisitions and integrations?",
    "What percentage of capacity is currently utilized, and what is the revenue potential at full capacity?",

    # Cost Management and Efficiency
    "What cost efficiencies does the company expect to achieve from debottlenecking or deferring expansion?",
    "How does outsourcing certain processes impact the company's operating margins?",

    # Research, Development, and New Products
    "What are the new product initiatives in technology areas like in-mold electronics (IME) and optical plastics?",
    "What is the anticipated timeline for market introduction of products using new technologies?",

    # Market Position and Strategic Outlook
    "What growth guidance has management provided for the company’s key segments over the next few years?",
    "What are the projected market expansion plans for the company in North American and European markets?",
    "How does the company plan to maintain its market leadership in its core business?",

    # Environmental, Social, and Governance (ESG) Initiatives
    "What CSR initiatives has the company undertaken in recent quarters?",
    "What impact have the company’s CSR initiatives had on local communities or environmental goals?",

    # Advanced Projections and Strategic Planning
    "What are the expected earnings projections for the company in the next quarter?",
    "How has the management revised its growth and margin outlook following recent acquisitions?",
    "What are the key growth drivers identified by the company for the medium term?"


]

# Retrieve relevant financial information
def retrieve_financial_info(query, top_k=20):
    query_embedding = embedding_model.embed_query(query)
    stored_embeddings = vector_store.index.reconstruct_n(0, vector_store.index.ntotal)
    similarities = cosine_similarity([query_embedding], stored_embeddings).flatten()
    top_k_indices = similarities.argsort()[-top_k:][::-1]
    top_k_texts = [texts[i] for i in top_k_indices]
    return [Document(page_content=text) for text in top_k_texts]

# Enhanced Prompt for Investor-Focused Answers
prompt_template = """




Analyze the context and respond in a clear, investor-focused manner, with emphasis on:

Analyze the Document and extract key elements such as future growth prospects, key changes in the business, key triggers, important information that might have a material effect on next year's earnings and growth.
and extract key information from the same for an investor looking to evaluate the company.

Instructions:
1. Format your response using **markdown** for readability, with formatting options such as headings, bold, italic, links, tables, lists, code blocks, and blockquotes.
2. Avoid explicitly mentioning markdown syntax in your response; apply it solely for readability.
3. Highlight essential sections in **bold**.
4. Use bullet points to structure longer responses for better clarity.
5. Provide a comprehensive and context-aligned answer, avoiding contradictions. Exclude irrelevant information if it does not directly apply to the question.

Context:
{context}

Question:
{question}

Answer:


"""

prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

# Initialize the LLM for answering questions
model = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.3)

# QA Chain Setup
qa_chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)

# Answer Investor-Related Questions Using Both PDF and Financial Queries as Context
def answer_investor_question(question):
    # Retrieve top relevant document chunks based on financial queries and the user question
    context_documents = []
    for query in financial_queries:
        context_documents.extend(retrieve_financial_info(query))

    top_k_documents = retrieve_financial_info(question) + context_documents[:20]  # Ensure limited context for efficiency

    # Generate answer with the QA chain
    answer = qa_chain.run({"input_documents": top_k_documents, "question": question})

    # Display the structured, investor-focused answer
    print("Answer:\n", answer)

#Investor Query "Hello Sir......"!!

investor_question = """ What is the Profit After Tax (PAT) reported for the latest quarter?"""
answer_investor_question(investor_question)




Answer:
 The Profit After Tax (PAT) reported for the latest quarter is Rs.180 million.


In [4]:
# Answer Investor-Related Questions Using Both PDF and Financial Queries as Context
def answer_investor_question(question):
    # Retrieve top relevant document chunks based on financial queries and the user question
    context_documents = []
    for query in financial_queries:
        context_documents.extend(retrieve_financial_info(query))

    top_k_documents = retrieve_financial_info(question) + context_documents[:20]  # Ensure limited context for efficiency

    # Generate answer with the QA chain
    answer = qa_chain.run({"input_documents": top_k_documents, "question": question})

    # Display the structured, investor-focused answer
    print("Answer:\n", answer)

#Investor Query "Hello Sir......"!!

investor_question = """ I am Investor and I want to invest in this company So why I invest in this company what is my profit?"""
answer_investor_question(investor_question)

Answer:
 **Key Elements:**

**Future Growth Prospects:**

* Acquisition of Walter Pack India (WPI) to expand into new markets and product lines.
* Cross-selling opportunities between SJS, Exotech, and WPI.
* Growing demand for Exotech's products.
* Recovery in consumer sector and export markets.

**Key Changes in the Business:**

* Acquisition of WPI, a leading manufacturer of automotive interior parts.
* Deferral of Exotech's capacity expansion plans to align with WPI synergies.
* Focus on organic growth through new product introductions and customer acquisitions.

**Key Triggers:**

* Successful integration of WPI into SJS's operations.
* Growth in automotive and export markets.
* New product launches and customer wins.

**Important Information with Potential Impact on Next Year's Earnings and Growth:**

* Pro forma revenue growth of 48.2% in Q1 FY24 with WPI acquisition.
* Pro forma EBITDA margin expansion of 120 bps in Q1 FY24.
* Post-acquisition net debt of Rs. 539.8 million.
* Gu