In [45]:
import os, json, faiss
import pandas as pd
import numpy as np
from langchain.vectorstores import FAISS
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from PyPDF2 import PdfReader
from dotenv import load_dotenv
load_dotenv()

True

In [46]:
def readPdf(path):
    text = ""
    reader = PdfReader(path)
    for i in range(1, len(reader.pages)):
        page = reader.pages[i]
        text += page.extract_text()
    return str(text)

In [47]:
def generate_embedding(text, key, model):
    embeddings = GoogleGenerativeAIEmbeddings(google_api_key=key,model=model)
    response = embeddings.embed_query(text)
    return response

In [48]:
def store_embeddings(embeddings, index_path="vectors.index"):
    embeddings_array = np.array(embeddings, dtype='float32')

    if embeddings_array.ndim == 1:
        embeddings_array = np.expand_dims(embeddings_array, axis=0)

    dimension = embeddings_array.shape[1]

    index = faiss.IndexFlatL2(dimension)
    index.add(embeddings_array)
    faiss.write_index(index, index_path)
    print(f"FAISS index stored locally at {index_path}.")

In [49]:
def retrieve_document_from_faiss(query, index_path="vectors.index", documents=None):

    index = faiss.read_index(index_path)
    query_embedding = generate_embedding(query, key=os.getenv("GOOGLE_API"), model='models/embedding-001')

    query_embedding = np.array(query_embedding, dtype='float32').reshape(1, -1)

    indices = index.search(query_embedding, 1)
    return documents[int(indices[0][0][0])]

In [50]:
def generate_response(query, retrieved_data, google_api_key=None, model_name="gemini-1.5-flash"):

    if not google_api_key:
        google_api_key = os.getenv("GOOGLE_API")
    if not google_api_key:
        raise ValueError("Google API key is required and should be set in environment variables.")
    prompt = (
        f"Context:\n{retrieved_data}\n\n"
        f"Question: {query}\n"
        f"Answer:"
    )
    try:
        model = ChatGoogleGenerativeAI(google_api_key=google_api_key, model=model_name)
        response = model.invoke(prompt)
        return response.content
    except Exception as e:
        raise RuntimeError(f"Error generating response: {e}")

In [51]:
path = f"Data.pdf"
key = os.getenv("GOOGLE_API")
model = "models/embedding-001"

In [57]:
text = readPdf(path)
embeddings = generate_embedding(text,key, model)

In [58]:
store_embeddings(embeddings)

FAISS index stored locally at vectors.index.


In [73]:
query = "What is the gross profit for Q3 2024?"
top_chunks = retrieve_document_from_faiss(query, documents=[text])

In [74]:
combined_context = " ".join(top_chunks)
response = generate_response(query, combined_context)
print(response)

The gross profit for Q3 2024 (three months ended March 31, 2024) is ₹11,175 crore.


In [75]:
query = "How do the net income and operating expenses compare for Q1 2024?"
top_chunks = retrieve_document_from_faiss(query, documents=[text])
# print(top_chunks)

In [76]:
combined_context = " ".join(top_chunks)
response = generate_response(query, combined_context)
print(response)

For Q1 2024, Infosys Limited and Subsidiaries reported a net profit (profit for the period) of ₹7,975 crore.  Operating expenses totaled ₹30,412 crore.


In [77]:
query = "What are the total expenses for Q2 2023?"
top_chunks = retrieve_document_from_faiss(query, documents=[text])

In [78]:
combined_context = " ".join(top_chunks)
response = generate_response(query, combined_context)
print(response)


The provided text does not contain a breakdown of expenses by quarter.  It only shows the total expenses for the year ended March 31, 2023 (₹29,646 crore) and for the three months ended March 31, 2024 (₹30,412 crore).  Therefore, the total expenses for Q2 2023 cannot be determined from this document.


In [79]:
query = "Show the operating margin for the past 6 months"
top_chunks = retrieve_document_from_faiss(query, documents=[text])

In [80]:
combined_context = " ".join(top_chunks)
response = generate_response(query, combined_context)
print(response)

The provided text does not contain a 6-month operating margin.  It only shows data for the three months ended March 31, 2024 and 2023, and the full fiscal years ended March 31, 2024 and 2023.  To calculate the operating margin for a six-month period, you would need data for two consecutive three-month periods.
