In [51]:
from dotenv import load_dotenv
import os   
from langchain_google_genai.chat_models import ChatGoogleGenerativeAI
from langchain_google_genai.embeddings import GoogleGenerativeAIEmbeddings
from langchain_pinecone import PineconeVectorStore
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain_core.prompts import PromptTemplate
import requests


In [52]:
load_dotenv()
model=ChatGoogleGenerativeAI(model="gemini-2.0-flash")

In [53]:
URL="https://www.tcs.com/content/dam/tcs/investor-relations/financial-statements/2023-24/ar/annual-report-2023-2024.pdf"
filename=os.path.join(os.getcwd(),"documents","tcs.pdf")
response=requests.get(URL,stream=True)
if response.status_code==200:
    with open(filename,"wb") as f:
        for chunk in response.iter_content(chunk_size=1024):
            if chunk:
                f.write(chunk)
        print(f"File saved successfully as {filename}")



File saved successfully as c:\Users\user\Desktop\Financial_Analyzer\documents\tcs.pdf


In [54]:
def document_loader(directory_path):
    loader = PyPDFDirectoryLoader(directory_path)
    documents = loader.load()
    return documents

In [55]:
documents=document_loader(r"C:\Users\user\Desktop\Financial_Analyzer\documents")

In [56]:
def chunk_splitter(documents,chunk_size=1000,chunk_overlap=200):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    docs = text_splitter.split_documents(documents)
    return docs

In [57]:
splitted_doc=chunk_splitter(documents)

In [58]:
len(splitted_doc)

1453

In [59]:
embeddings=GoogleGenerativeAIEmbeddings(model="models/embedding-001",google_api_key=os.environ['GOOGLE_API_KEY'])

In [60]:
# Import the Pinecone library
from pinecone import Pinecone
load_dotenv()
# Initialize a Pinecone client with your API key
pc = Pinecone(api_key=os.environ["PINECONE_API_KEY"])

# Create a dense index with integrated embedding
index_name = "langchain"
if not pc.has_index(index_name):
    pc.create_index_for_model(
        name=index_name,
        cloud="aws",
        region="us-east-1",
        embed={
            "model":"llama-text-embed-v2",
            "field_map":{"text": "chunk_text"}
        }
    )

In [61]:
index=PineconeVectorStore.from_documents(documents=splitted_doc,embedding=embeddings,index_name=index_name)

In [62]:
def retrive_query(query,k=2):
    matching_result=index.similarity_search(query=query,k=k)
    return matching_result


In [134]:
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are a highly skilled financial analyst. 
Answer the question strictly using ONLY the information from the provided context. 
Do not use outside knowledge.

When answering:
- If the question is **numeric or factual** → Give the exact number with units.  
- If the question is **comparative** → Show side-by-side values and state which is higher/lower.  
- If the question is **analytical ("why" or "how")** → Extract and explain reasons from context.  
- If the question is **summary/overview** → Highlight revenue, profit, margins, and growth.  

Always structure your answer as follows:
**Answer:** <clear, direct response>  
**Supporting Context:** <quote or summarize the part of the context that justifies your answer>  

If the answer is not available in the context, respond with:  
"The context does not provide this information."

Context:
{context}

Question:
{question}
"""
)


In [135]:
question="What is the revenue of TCS in 2023-24 ?"
docs=retrive_query(question)
context="\n".join([doc.page_content for doc in docs])


In [125]:
final_prompt=prompt.invoke({"context":context,"question":question})

In [136]:
answer=model.invoke(final_prompt)


In [127]:
answer

AIMessage(content='**Summary of Financial Performance:**\n- Revenue: ₹240,893 crore, 6.8% growth\n- EBIT: ₹59,311 crore, 24.6% margin\n- Profit After Tax: ₹46,585 crore, 10.5% growth\n- EPS: ₹127.74\n- Key Observations: Revenue and profit after tax increased, although revenue growth slowed compared to the previous year.', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'safety_ratings': []}, id='run--1c012fd0-0c3b-4b07-abd2-5a9248950db1-0', usage_metadata={'input_tokens': 873, 'output_tokens': 103, 'total_tokens': 976, 'input_token_details': {'cache_read': 0}})

### By using Chains

In [137]:
from langchain_core.runnables import RunnableParallel,RunnablePassthrough,RunnableLambda
from langchain_core.output_parsers import StrOutputParser

In [138]:
def format_docs(retrieved_docs):
    formatted = []
    for doc in retrieved_docs:
        if hasattr(doc, "page_content"):   # Document object
            formatted.append(doc.page_content)
        else:   # plain string
            formatted.append(str(doc))
    return "\n\n".join(formatted)


In [139]:
parallel_chain = RunnableParallel({
    'context': RunnableLambda(retrive_query) | RunnableLambda(format_docs),
    'question': RunnablePassthrough()
})

In [140]:
parser=StrOutputParser()


In [141]:
chain= parallel_chain | prompt | model | parser


In [142]:
question="How resilient was TCS’ performance compared to the previous year’s growth momentum?"
chain.invoke(question)

'**Answer:** TCS\' revenue growth in FY 2024 was 6.8%, while in the prior year it was 17.6%. This indicates a decrease in growth momentum. However, the company\'s performance showed remarkable resilience against the backdrop of macro uncertainty and geo-political volatility.\n\n**Supporting Context:** "On a reported basis, TCS’ revenue grew 6.8% in FY 2024, compared to 17.6% in the prior year . The company’s performance showed a remarkable resilience against the backdrop of macro uncertainty and geo-political volatility."'