In [1]:
!pip install transformers sentence-transformers faiss-cpu python-docx python-pptx pymupdf pandas

Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0.post1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.0 kB)
Collecting python-docx
  Downloading python_docx-1.2.0-py3-none-any.whl.metadata (2.0 kB)
Collecting python-pptx
  Downloading python_pptx-1.0.2-py3-none-any.whl.metadata (2.5 kB)
Collecting pymupdf
  Downloading pymupdf-1.26.3-cp39-abi3-manylinux_2_28_x86_64.whl.metadata (3.4 kB)
Collecting XlsxWriter>=0.5.7 (from python-pptx)
  Downloading xlsxwriter-3.2.5-py3-none-any.whl.metadata (2.7 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Do

In [2]:
import os
import fitz
import docx
import pptx
import csv
import numpy as np
import pandas as pd

from sentence_transformers import SentenceTransformer
from transformers import pipeline
from google.colab import files


In [58]:
file_paths = [f for f in os.listdir() if os.path.isfile(f)]
print("Uploaded Files:", file_paths)

Uploaded Files: ['test_final.txt', 'test.txt', 'test2.txt']


In [60]:
#MCP Message Format
def create_mcp_message(sender, receiver, msg_type, trace_id, payload):
    return {
        "sender": sender,
        "receiver": receiver,
        "type": msg_type,
        "trace_id": trace_id,
        "payload": payload
    }

#File Parser
def parse_document(path):
    ext = os.path.splitext(path)[1].lower()
    if ext == ".pdf":
        return parse_pdf(path)
    elif ext == ".docx":
        return parse_docx(path)
    elif ext == ".pptx":
        return parse_pptx(path)
    elif ext == ".csv":
        return parse_csv(path)
    elif ext in [".txt", ".md"]:
        return parse_txt(path)
    return []

def parse_pdf(path):
    doc = fitz.open(path)
    return [page.get_text() for page in doc]

def parse_docx(path):
    doc = docx.Document(path)
    return [para.text for para in doc.paragraphs if para.text.strip()]

def parse_pptx(path):
    prs = pptx.Presentation(path)
    return [shape.text for slide in prs.slides for shape in slide.shapes if hasattr(shape, "text")]

def parse_csv(path):
    with open(path, newline='', encoding='utf-8') as f:
        reader = csv.reader(f)
        return [" | ".join(row) for row in reader]

def parse_txt(path):
    with open(path, encoding='utf-8') as f:
        return f.readlines()

#IngestionAgent
class IngestionAgent:
    def __init__(self, name="IngestionAgent"):
        self.name = name

    def handle(self, files, trace_id):
        all_chunks = []
        for file_path in files:
            chunks = parse_document(file_path)
            all_chunks.extend(chunks)
        return create_mcp_message(
            self.name, "RetrievalAgent", "INGESTION_RESULT", trace_id,
            {"chunks": all_chunks}
        )

In [61]:
import faiss

class RetrievalAgent:
    def __init__(self, name="RetrievalAgent"):
        self.name = name
        self.model = SentenceTransformer("all-MiniLM-L6-v2")
        self.index = None
        self.chunk_map = []

    def build_index(self, chunks):
        embeddings = self.model.encode(chunks)
        self.index = faiss.IndexFlatL2(embeddings.shape[1])
        self.index.add(np.array(embeddings))
        self.chunk_map = chunks

    def handle(self, message):
        if message["type"] == "INGESTION_RESULT":
            chunks = message["payload"]["chunks"]
            self.build_index(chunks)
            return "RetrievalAgent Ready"

        elif message["type"] == "QUERY":
            query = message["payload"]["query"]
            query_emb = self.model.encode([query])
            distances, indices = self.index.search(np.array(query_emb), 8)
            top_chunks = list(dict.fromkeys([self.chunk_map[i] for i in indices[0]]))
            return create_mcp_message(
                self.name,
                "LLMResponseAgent",
                "RETRIEVAL_RESULT",
                message["trace_id"],
                {"retrieved_context": top_chunks, "query": query}
            )

In [6]:
class LLMResponseAgent:
    def __init__(self, name="LLMResponseAgent"):
        self.name = name
        self.llm = pipeline("text2text-generation", model="google/flan-t5-base")

    def handle(self, message):
        context = "\n".join(message["payload"]["retrieved_context"])
        query = message["payload"]["query"]

        prompt = (
            f"You are a helpful assistant.Use the following context to answer the question.\n\n"
            f"Context:\n{context}\n\n"
            f"Question: {query}\n\n"
            f"Your complete, helpful answer:"
        )

        result = self.llm(prompt, max_length=256, do_sample=True)

        return {
            "answer": result[0]['generated_text'].strip(),
            "source_chunks": message["payload"]["retrieved_context"]
        }

In [7]:
#Step 1: Run IngestionAgent
trace_id = "test-001"
ingestor = IngestionAgent()
msg_ingested = ingestor.handle(file_paths, trace_id)

#Step 2: Run RetrievalAgent
retriever = RetrievalAgent()
retriever.handle(msg_ingested)

#Step 3: Ask question
user_question = input(" Ask a question based on uploaded files: ")

#Step 4: Run query
query_msg = create_mcp_message(
    "UI", "RetrievalAgent", "QUERY", trace_id, {"query": user_question}
)
retrieval_response = retriever.handle(query_msg)

#Step 5: Get response
responder = LLMResponseAgent()
final_answer = responder.handle(retrieval_response)

#Step 6: Output
print("\n Final Answer:\n", final_answer["answer"])
print("\n Source Chunks Used:\n")
for i, chunk in enumerate(final_answer["source_chunks"]):
    print(f" Chunk {i+1}:\n{chunk.strip()}\n---")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

 Ask a question based on uploaded files: How is the Q1 performance?


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0
Both `max_new_tokens` (=256) and `max_length`(=256) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)



 Final Answer:
 sales improved

 Source Chunks Used:

 Chunk 1:
Q2 Performance:
---
 Chunk 2:
Q1 sales improved.
---
 Chunk 3:
Q3 Strategy:
---
 Chunk 4:
Q1 Highlights:
---
 Chunk 5:
Revenue growth in Q2.
---
 Chunk 6:
- Retention rate hit 87% in Q2
---
 Chunk 7:
- Customer satisfaction improved
---
 Chunk 8:
Customer Insights:
---


In [9]:
# Step 1: Run IngestionAgent
trace_id = "test-001"
ingestor = IngestionAgent()
msg_ingested = ingestor.handle(file_paths, trace_id)

# Step 2: Run RetrievalAgent
retriever = RetrievalAgent()
retriever.handle(msg_ingested)

# Step 3: Ask question
user_question = input(" Ask a question based on uploaded files: ")

# Step 4: Run query
query_msg = create_mcp_message(
    "UI", "RetrievalAgent", "QUERY", trace_id, {"query": user_question}
)
retrieval_response = retriever.handle(query_msg)

# Step 5: Get response
responder = LLMResponseAgent()
final_answer = responder.handle(retrieval_response)

# Step 6: Output
print("\n Final Answer:\n", final_answer["answer"])
print("\n Source Chunks Used:\n")
for i, chunk in enumerate(final_answer["source_chunks"]):
    print(f" Chunk {i+1}:\n{chunk.strip()}\n---")

 Ask a question based on uploaded files: How is the Q2 perfomance?


Device set to use cuda:0
Both `max_new_tokens` (=256) and `max_length`(=256) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)



 Final Answer:
 Q3 Strategy: Revenue growth in Q2. Q1 sales improved. - Retention rate hit 87% in Q2 Q1 Highlights: - Customer satisfaction improved

 Source Chunks Used:

 Chunk 1:
Q2 Performance:
---
 Chunk 2:
Q3 Strategy:
---
 Chunk 3:
Revenue growth in Q2.
---
 Chunk 4:
Q1 sales improved.
---
 Chunk 5:
- Retention rate hit 87% in Q2
---
 Chunk 6:
Q1 Highlights:
---
 Chunk 7:
- Customer satisfaction improved
---
 Chunk 8:
Customer Insights:
---


In [10]:
file_paths = [f for f in os.listdir() if os.path.isfile(f)]
print(" Uploaded Files:", file_paths)

 Uploaded Files: ['test.txt', 'test2.txt']


In [11]:
file_paths = ["test2.txt"]

In [12]:
trace_id = "test-002"

# Ingest new file
ingestor = IngestionAgent()
msg_ingested = ingestor.handle(file_paths, trace_id)

# Build FAISS index
retriever = RetrievalAgent()
retriever.handle(msg_ingested)

user_question = input(" Ask a question based on uploaded test2.txt: ")
query_msg = create_mcp_message("UI", "RetrievalAgent", "QUERY", trace_id, {"query": user_question})
retrieval_response = retriever.handle(query_msg)

responder = LLMResponseAgent()
final_answer = responder.handle(retrieval_response)

# Display
print("\n Final Answer:\n", final_answer["answer"])
print("\n Source Chunks Used:\n")
for i, chunk in enumerate(final_answer["source_chunks"]):
    print(f" Chunk {i+1}:\n{chunk.strip()}\n---")

 Ask a question based on uploaded test2.txt: How is the Q2 performance?


Device set to use cuda:0
Both `max_new_tokens` (=256) and `max_length`(=256) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)



 Final Answer:
 Retention rate hit 87% in Q2 Q1 Highlights: - Customer satisfaction improved - Most churn came from Tier-2 cities Customer Insights: - Operational cost reduced by 20%

 Source Chunks Used:

 Chunk 1:
Q2 Performance:
---
 Chunk 2:
Q3 Strategy:
---
 Chunk 3:
- Retention rate hit 87% in Q2
---
 Chunk 4:
Q1 Highlights:
---
 Chunk 5:
- Customer satisfaction improved
---
 Chunk 6:
- Most churn came from Tier-2 cities
---
 Chunk 7:
Customer Insights:
---
 Chunk 8:
- Operational cost reduced by 20%
---


Mistral model is used from now on(Downloaded from huggingface by login)

In [None]:
from huggingface_hub import login

login("hf_.................")
#token is hidden by me

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch

model_id = "mistralai/Mistral-7B-Instruct-v0.1"
token = "hf_................."  # same token

tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype=torch.float16,
    token=token
)

mistral_pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)


tokenizer_config.json:   0%|          | 0.00/2.10k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Device set to use cuda:0


In [39]:
class LLMResponseAgent:
    def __init__(self, name="LLMResponseAgent"):
        self.name = name
        self.llm = mistral_pipe

    def handle(self, message):
        context = "\n".join(message["payload"]["retrieved_context"])
        query = message["payload"]["query"]

        prompt = (
            f"You are an intelligent business analyst assistant.\n"
            f"Use ONLY the context below to answer the question clearly.\n"
            f"Do not invent new questions. Just provide a direct, helpful answer.\n\n"
            f"Context:\n{context}\n\n"
            f"Question: {query}\n\n"
            f"Answer:"
        )

        result = self.llm(prompt, max_new_tokens=300)
        return {
            "answer": result[0]['generated_text'].replace(prompt, "").strip(),
            "source_chunks": message["payload"]["retrieved_context"]
        }

**Now response will be genrated w.r.t Test_final(3rd test file)**

In [77]:
with open("test_final.txt", "r") as f:
    raw_text = f.read()

docs = [raw_text]
print("Raw text:\n", raw_text)


Raw text:
 
    Q1 Highlights:
    - Revenue increased by 40%
    - Market share grew in Tier-1 cities
    - Launched referral campaigns

    Q2 Performance:
    - Reduced churn by 10%
    - Improved delivery TAT by 25%
    - Introduced voice-based ordering

    Q3 Plans:
    - Launch in 2 new countries
    - Expand into B2B logistics

    Customer Feedback:
    - 90% satisfaction score
    - High praise for delivery speed
    - Referral program well received
    


In [78]:
def parse_txt(path):
    with open(path, encoding='utf-8') as f:
        lines = f.readlines()

    chunks = []
    chunk = ""
    for line in lines:
        # New section heading starts a new chunk (like "Q2 Performance:")
        if line.strip().endswith(":") and chunk:
            chunks.append(chunk.strip())
            chunk = line
        else:
            chunk += line

    if chunk:
        chunks.append(chunk.strip())
    return chunks

In [79]:
ingestor = IngestionAgent()
msg_ingested = ingestor.handle(["/content/test_final.txt"], "test-final")

In [80]:
retriever = RetrievalAgent()
retrieval_response = retriever.handle(msg_ingested)

In [81]:
query_msg = create_mcp_message("UI", "RetrievalAgent", "QUERY", "test-final", {"query": "Summarize the Q2 performance"})
final_answer = responder.handle(retriever.handle(query_msg))
print("Final Answer:\n", final_answer["answer"])

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Final Answer:
 The Q2 performance of the business involved reducing churn by 10% and improving delivery TAT by 25%. Additionally, voice-based ordering was introduced.


In [82]:
#  Final Evaluation Test Loop

queries = [
    "What were the Q1 highlights?",
    "How did the company perform in Q2?",
    "What initiatives were taken to reduce churn in Q2?",
    "What operational improvements were introduced in Q2?",
    "Was any new feature launched in Q2?",
    "What are the company’s plans for Q3?",
    "Is the company planning international expansion?",
    "What kind of customer feedback was received?",
    "How did customers respond to the referral program?",
    "Summarize the key insights from Q2 performance and customer feedback."
]

responder = LLMResponseAgent()

for i, q in enumerate(queries, 1):
    print(f"\n Query {i}: {q}")

    query_msg = create_mcp_message("UI", "RetrievalAgent", "QUERY", f"final-test-{i}", {"query": q})
    retrieval_response = retriever.handle(query_msg)

    final_answer = responder.handle(retrieval_response)

    print(f"\n Answer:\n{final_answer['answer']}")

    print("\n Source Chunks:")
    for idx, chunk in enumerate(final_answer["source_chunks"]):
        print(f"\n Chunk {idx+1}:\n{chunk.strip()}")
    print("\n" + "-"*80)


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



 Query 1: What were the Q1 highlights?


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



 Answer:
Q1 highlights include revenue increased by 40%, market share grew in Tier-1 cities, and referral campaigns were launched.

 Source Chunks:

 Chunk 1:
Q1 Highlights:
    - Revenue increased by 40%
    - Market share grew in Tier-1 cities
    - Launched referral campaigns

 Chunk 2:
Q2 Performance:
    - Reduced churn by 10%
    - Improved delivery TAT by 25%
    - Introduced voice-based ordering

 Chunk 3:
Q3 Plans:
    - Launch in 2 new countries
    - Expand into B2B logistics

 Chunk 4:


 Chunk 5:
Customer Feedback:
    - 90% satisfaction score
    - High praise for delivery speed
    - Referral program well received

--------------------------------------------------------------------------------

 Query 2: How did the company perform in Q2?


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



 Answer:
The company performed well in Q2. They reduced churn by 10% and improved delivery TAT by 25%. Additionally, they introduced voice-based ordering.

 Source Chunks:

 Chunk 1:
Q2 Performance:
    - Reduced churn by 10%
    - Improved delivery TAT by 25%
    - Introduced voice-based ordering

 Chunk 2:
Q3 Plans:
    - Launch in 2 new countries
    - Expand into B2B logistics

 Chunk 3:
Q1 Highlights:
    - Revenue increased by 40%
    - Market share grew in Tier-1 cities
    - Launched referral campaigns

 Chunk 4:
Customer Feedback:
    - 90% satisfaction score
    - High praise for delivery speed
    - Referral program well received

 Chunk 5:


--------------------------------------------------------------------------------

 Query 3: What initiatives were taken to reduce churn in Q2?


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



 Answer:
The company took an initiative to reduce churn in Q2 by reducing it by 10%.

 Source Chunks:

 Chunk 1:
Q2 Performance:
    - Reduced churn by 10%
    - Improved delivery TAT by 25%
    - Introduced voice-based ordering

 Chunk 2:
Q3 Plans:
    - Launch in 2 new countries
    - Expand into B2B logistics

 Chunk 3:
Q1 Highlights:
    - Revenue increased by 40%
    - Market share grew in Tier-1 cities
    - Launched referral campaigns

 Chunk 4:
Customer Feedback:
    - 90% satisfaction score
    - High praise for delivery speed
    - Referral program well received

 Chunk 5:


--------------------------------------------------------------------------------

 Query 4: What operational improvements were introduced in Q2?


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



 Answer:
In Q2, the company introduced voice-based ordering.

 Source Chunks:

 Chunk 1:
Q3 Plans:
    - Launch in 2 new countries
    - Expand into B2B logistics

 Chunk 2:
Q2 Performance:
    - Reduced churn by 10%
    - Improved delivery TAT by 25%
    - Introduced voice-based ordering

 Chunk 3:
Q1 Highlights:
    - Revenue increased by 40%
    - Market share grew in Tier-1 cities
    - Launched referral campaigns

 Chunk 4:
Customer Feedback:
    - 90% satisfaction score
    - High praise for delivery speed
    - Referral program well received

 Chunk 5:


--------------------------------------------------------------------------------

 Query 5: Was any new feature launched in Q2?


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



 Answer:
Yes, voice-based ordering was introduced in Q2.

 Source Chunks:

 Chunk 1:
Q3 Plans:
    - Launch in 2 new countries
    - Expand into B2B logistics

 Chunk 2:
Q2 Performance:
    - Reduced churn by 10%
    - Improved delivery TAT by 25%
    - Introduced voice-based ordering

 Chunk 3:
Q1 Highlights:
    - Revenue increased by 40%
    - Market share grew in Tier-1 cities
    - Launched referral campaigns

 Chunk 4:


 Chunk 5:
Customer Feedback:
    - 90% satisfaction score
    - High praise for delivery speed
    - Referral program well received

--------------------------------------------------------------------------------

 Query 6: What are the company’s plans for Q3?


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



 Answer:
The company plans to launch in 2 new countries and expand into B2B logistics in Q3.

 Source Chunks:

 Chunk 1:
Q3 Plans:
    - Launch in 2 new countries
    - Expand into B2B logistics

 Chunk 2:
Q2 Performance:
    - Reduced churn by 10%
    - Improved delivery TAT by 25%
    - Introduced voice-based ordering

 Chunk 3:
Q1 Highlights:
    - Revenue increased by 40%
    - Market share grew in Tier-1 cities
    - Launched referral campaigns

 Chunk 4:
Customer Feedback:
    - 90% satisfaction score
    - High praise for delivery speed
    - Referral program well received

 Chunk 5:


--------------------------------------------------------------------------------

 Query 7: Is the company planning international expansion?


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



 Answer:
Yes, the company plans to launch in 2 new countries as part of Q3 plans.

 Source Chunks:

 Chunk 1:
Q3 Plans:
    - Launch in 2 new countries
    - Expand into B2B logistics

 Chunk 2:
Q1 Highlights:
    - Revenue increased by 40%
    - Market share grew in Tier-1 cities
    - Launched referral campaigns

 Chunk 3:
Q2 Performance:
    - Reduced churn by 10%
    - Improved delivery TAT by 25%
    - Introduced voice-based ordering

 Chunk 4:
Customer Feedback:
    - 90% satisfaction score
    - High praise for delivery speed
    - Referral program well received

 Chunk 5:


--------------------------------------------------------------------------------

 Query 8: What kind of customer feedback was received?


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



 Answer:
The customer feedback received was a 90% satisfaction score, high praise for delivery speed, and a well-received referral program.

 Source Chunks:

 Chunk 1:
Customer Feedback:
    - 90% satisfaction score
    - High praise for delivery speed
    - Referral program well received

 Chunk 2:
Q2 Performance:
    - Reduced churn by 10%
    - Improved delivery TAT by 25%
    - Introduced voice-based ordering

 Chunk 3:
Q1 Highlights:
    - Revenue increased by 40%
    - Market share grew in Tier-1 cities
    - Launched referral campaigns

 Chunk 4:


 Chunk 5:
Q3 Plans:
    - Launch in 2 new countries
    - Expand into B2B logistics

--------------------------------------------------------------------------------

 Query 9: How did customers respond to the referral program?


Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



 Answer:
Customers responded positively to the referral program as it received high praise in their feedback.

 Source Chunks:

 Chunk 1:
Customer Feedback:
    - 90% satisfaction score
    - High praise for delivery speed
    - Referral program well received

 Chunk 2:
Q1 Highlights:
    - Revenue increased by 40%
    - Market share grew in Tier-1 cities
    - Launched referral campaigns

 Chunk 3:
Q2 Performance:
    - Reduced churn by 10%
    - Improved delivery TAT by 25%
    - Introduced voice-based ordering

 Chunk 4:


 Chunk 5:
Q3 Plans:
    - Launch in 2 new countries
    - Expand into B2B logistics

--------------------------------------------------------------------------------

 Query 10: Summarize the key insights from Q2 performance and customer feedback.

 Answer:
The key insights from Q2 performance and customer feedback are:

1. Churn was reduced by 10%, indicating customer satisfaction with services.
2. Delivery TAT was improved by 25%, highlighting improved logistic