In [1]:
import fitz  # PyMuPDF
import pytesseract
from PIL import Image
import pdfplumber
import pandas as pd
import os
import re
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from pinecone.grpc import PineconeGRPC as Pinecone
from pinecone import ServerlessSpec
from langchain_pinecone import PineconeVectorStore
from langchain.docstore.document import Document
from rank_bm25 import BM25Okapi
from nltk.tokenize import word_tokenize
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import faiss
from sentence_transformers import CrossEncoder, SentenceTransformer
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from peft import LoraConfig, get_peft_model  # LoRA fine-tuning








    







In [23]:
# Step 1: Extract text, images, and tables from PDF
def extract_text_from_pdf(pdf_path):
    """Extract text from a PDF."""
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text("text") + "\n"
    return text.strip()

def extract_images_from_pdf(pdf_path, output_folder):
    """Extract images from a PDF and save them to a folder."""
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    doc = fitz.open(pdf_path)
    image_paths = []
    for page_num, page in enumerate(doc):
        for img_index, img in enumerate(page.get_images(full=True)):
            xref = img[0]
            base_image = doc.extract_image(xref)
            img_path = os.path.join(output_folder, f"page_{page_num+1}_img_{img_index+1}.{base_image['ext']}")
            with open(img_path, "wb") as f:
                f.write(base_image["image"])
            image_paths.append(img_path)
    return image_paths

def perform_ocr_on_images(image_paths):
    """Perform OCR on extracted images."""
    ocr_texts = []
    for img_path in image_paths:
        try:
            image = Image.open(img_path)
            text = pytesseract.image_to_string(image).strip()
            if text:
                ocr_texts.append(text)
        except Exception as e:
            print(f"Error processing {img_path}: {e}")
    return ocr_texts

def extract_tables_from_pdf(pdf_path):
    """Extract tables from a PDF."""
    table_texts = []
    with pdfplumber.open(pdf_path) as pdf:
        for page_num, page in enumerate(pdf.pages, start=1):
            tables = page.extract_tables()
            for table_index, table in enumerate(tables):
                df = pd.DataFrame(table)
                table_texts.append(f"Table {table_index + 1} on Page {page_num}:\n{df.to_string(index=False, header=False)}\n")
    return table_texts

In [24]:

# Example usage
pdf_path = r"C:\Users\user\Downloads\health_care\Python_Durga.pdf"
output_folder = r"C:\Users\user\Downloads\health_care\output_image"
# Extract text, images, and tables
text = extract_text_from_pdf(pdf_path)
image_paths = extract_images_from_pdf(pdf_path, output_folder)
ocr_texts = perform_ocr_on_images(image_paths)
table_texts = extract_tables_from_pdf(pdf_path)

# Combine all text data
all_text = text + "\n".join(ocr_texts) + "\n".join(table_texts)

In [25]:
# Step 2: Clean and chunk text
def clean_text(text):
    """Clean and normalize text."""
    text = re.sub(r'\n{3,}', '\n\n', text)  # Replace multiple newlines
    text = re.sub(r' {2,}', ' ', text)  # Remove extra spaces
    text = re.sub(r'(?<=\w)-\n(?=\w)', '', text)  # Fix hyphenated words
    text = text.replace("\n\n", " ").replace("\n", " ")  # Fix broken lines
    text = re.sub(r' {2,}', ' ', text)  # Remove excessive spaces
    text = re.sub(r'(?<=\w)-\n(?=\w)', '', text)  # Fix hyphenated word breaks
    text = text.replace("\n\n", " ")  # Fix broken line splits
    text = text.replace("\n", " ")  # Fix remaining broken lines
    return text.strip()

def chunk_text(text, chunk_size=600, overlap=100):
    """Split text into smaller chunks."""
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=overlap,
        separators=["\n\n", "?", ".", "!", "\n", " "]
    )
    return text_splitter.split_text(text)

In [26]:


# Clean and chunk the text
cleaned_text = clean_text(all_text)
text_chunks = chunk_text(cleaned_text)

In [None]:
text_chunks

In [5]:
from langchain.embeddings.base import Embeddings
from typing import List

# Custom embeddings class for the fine-tuned model
class FineTunedEmbeddings(Embeddings):
    def __init__(self, model):
        self.model = model

    def embed_documents(self, texts: List[str]) -> List[List[float]]:
        """Embed a list of documents using the fine-tuned model."""
        return self.model.encode(texts).tolist()

    def embed_query(self, text: str) -> List[float]:
        """Embed a single query using the fine-tuned model."""
        return self.model.encode([text]).tolist()[0]

# Step 3: Fine-tune the embedding model using LoRA
def fine_tune_embedding_model(model_name="sentence-transformers/all-MiniLM-L6-v2"):
    """Fine-tune the embedding model using LoRA."""
    model = SentenceTransformer(model_name)
    
    # Define LoRA configuration
    lora_config = LoraConfig(
        r=8,  # Rank of the low-rank adaptation
        lora_alpha=16,  # Scaling factor
        target_modules=["key", "value"],  # Target modules for adaptation
        lora_dropout=0.1,
        bias="none"
    )
    
    # Apply LoRA to the model
    model = get_peft_model(model, lora_config)
    
    # Wrap the fine-tuned model in the custom embeddings class
    embeddings_model = FineTunedEmbeddings(model)
    return embeddings_model

In [29]:
# Initialize the embedding model
embeddings_model = fine_tune_embedding_model()

In [None]:
embeddings_model

In [33]:
# Step 2: Create Pinecone index and store embeddings
def create_pinecone_index(index_name, dimension=384):
    """Create a Pinecone index."""
    os.environ["PINECONE_API_KEY"] = "pcsk_EggKG_S8oLWsWYLJmNNmoFyoutuZ33RBcNoCuqjtGSr9KUumYZbTuUMZj7feE1MGUNGUG"
    pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
    pc.create_index(
        name=index_name,
        dimension=dimension,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )
    print(f"✅ Pinecone index '{index_name}' created successfully!")

def check_index_exists(index_name):
    """Check if a Pinecone index exists."""
    os.environ["PINECONE_API_KEY"] = "pcsk_EggKG_S8oLWsWYLJmNNmoFyoutuZ33RBcNoCuqjtGSr9KUumYZbTuUMZj7feE1MGUNGUG"
    pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
    existing_indexes = pc.list_indexes()
    return index_name in existing_indexes

def store_embeddings_in_pinecone(text_chunks, embedding_model, index_name):
    """Store embeddings in Pinecone."""
    documents = [Document(page_content=chunk) for chunk in text_chunks]
    docsearch = PineconeVectorStore.from_documents(
        documents=documents,
        index_name=index_name,
        embedding=embedding_model
    )
    return docsearch

In [None]:
# Create Pinecone index and store embeddings
index_name = "durga-one"
if not check_index_exists(index_name):
    create_pinecone_index(index_name)
else:
    print(f"Index '{index_name}' already exists. Skipping creation.")
docsearch = store_embeddings_in_pinecone(text_chunks, embeddings_model, index_name)

In [None]:
import os
from sentence_transformers import SentenceTransformer
from rank_bm25 import BM25Okapi
from nltk.tokenize import word_tokenize
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import faiss
from langchain_pinecone import PineconeVectorStore
from langchain.docstore.document import Document

# Step 5: Multi-queue retrieval with BM25, dense embeddings, and HYDE
def multi_queue_retrieval(query, bm25, embedder, retrieved_docs, top_k=5):
    """Perform multi-queue retrieval using BM25, dense embeddings, and HYDE."""
    retrieved_texts = [doc.page_content for doc in retrieved_docs]
    
    # BM25 scores
    bm25_scores = bm25.get_scores(word_tokenize(query.lower()))
    
    # Dense embeddings
    doc_embeddings = np.array(embedder.encode(retrieved_texts))
    query_embedding = embedder.encode(query).reshape(1, -1)
    
    # FAISS index for dense retrieval
    index = faiss.IndexFlatL2(doc_embeddings.shape[1])
    index.add(doc_embeddings)
    _, dense_indices = index.search(query_embedding, k=len(retrieved_texts))
    dense_scores = np.exp(-dense_indices[0])  # Convert distances to scores
    
    # HYDE: Generate hypothetical document embeddings
    hyde_prompt = f"Generate a hypothetical document that answers the question: {query}"
    hyde_doc = embedder.encode(hyde_prompt).reshape(1, -1)
    _, hyde_indices = index.search(hyde_doc, k=len(retrieved_texts))
    hyde_scores = np.exp(-hyde_indices[0])
    
    # Normalize scores
    scaler = MinMaxScaler()
    bm25_scores_scaled = scaler.fit_transform(np.array(bm25_scores).reshape(-1, 1)).flatten()
    dense_scores_scaled = scaler.fit_transform(np.array(dense_scores).reshape(-1, 1)).flatten()
    hyde_scores_scaled = scaler.fit_transform(np.array(hyde_scores).reshape(-1, 1)).flatten()
    
    # Combine scores
    hybrid_scores = 0.4 * bm25_scores_scaled + 0.4 * dense_scores_scaled + 0.2 * hyde_scores_scaled
    
    # Sort documents by hybrid scores
    return [doc for doc, _ in sorted(zip(retrieved_docs, hybrid_scores), key=lambda x: x[1], reverse=True)[:top_k]]

# Initialize the embedding model for multi-queue retrieval
try:
    # Use the correct model name
    embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
except Exception as e:
    print(f"Error loading the embedding model: {e}")
    # Fallback to a different model if the primary one fails
    embedder = SentenceTransformer("sentence-transformers/paraphrase-MiniLM-L6-v2")

# Perform the initial retrieval
query = "write a program to add two numbers with list comprehension?"
retriever = docsearch.as_retriever(search_type="mmr", search_kwargs={"k": 15, "fetch_k": 50, "lambda_mult": 0.7})
retrieved_docs = retriever.invoke(query)

# Initialize BM25 with the retrieved documents
bm25 = BM25Okapi([word_tokenize(doc.page_content.lower()) for doc in retrieved_docs])

# Perform multi-queue retrieval
top_docs = multi_queue_retrieval(query, bm25, embedder, retrieved_docs)

# Print the top documents
for i, doc in enumerate(top_docs):
    print(f"Top {i+1} Document: {doc.page_content}\n")

In [49]:
# Step 6: Rerank documents using a cross-encoder
def rerank_documents(query, retrieved_docs):
    """Rerank documents using a cross-encoder."""
    reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
    pairs = [(query, doc.page_content) for doc in retrieved_docs]
    scores = reranker.predict(pairs)
    ranked_indices = np.argsort(scores)[::-1]
    return [retrieved_docs[i] for i in ranked_indices[:2]]

In [50]:
reranked_docs = rerank_documents(query, top_docs)

In [None]:
reranked_docs

In [None]:
from sentence_transformers import InputExample, losses, SentenceTransformer
from torch.utils.data import DataLoader

# ✅ **1. Ensure Model is Available**
reranker_name = "cross-encoder/ms-marco-MiniLM-L-6-v2"

try:
    reranker = SentenceTransformer(reranker_name)  # Load pretrained reranker
except Exception as e:
    print(f"⚠️ Error loading model: {e}")
    print("📥 Downloading model manually...")
    reranker = SentenceTransformer.from_pretrained(reranker_name)  # Force download

# ✅ **2. Prepare the dataset**
train_examples = [
    InputExample(texts=["What is Python?", "Python is a high-level programming language."], label=1.0),
    InputExample(texts=["How do you create a list in Python?", "You can create a list using square brackets."], label=1.0),
    InputExample(texts=["What is Python?", "Java is an object-oriented programming language."], label=0.0),
    # Add more examples
]

train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=16)

# ✅ **3. Define Loss Function**
train_loss = losses.CosineSimilarityLoss(reranker)

# ✅ **4. Fine-Tune the Reranker**
reranker.fit(
    train_objectives=[(train_dataloader, train_loss)],
    epochs=3,
    warmup_steps=100,
    output_path="./reranker-finetuned"
)

print("✅ Reranker Fine-Tuning Complete!")


In [None]:
# 🚀 Install required libraries
# pip install transformers datasets peft accelerate torch

import torch
from transformers import (
    AutoTokenizer, AutoModelForSeq2SeqLM,
    DataCollatorForSeq2Seq, Seq2SeqTrainingArguments, Seq2SeqTrainer
)
from datasets import Dataset
from peft import LoraConfig, get_peft_model

#  **1. Load Tokenizer & Base LLM**
model_name = "google/flan-t5-large"
tokenizer = AutoTokenizer.from_pretrained(model_name)

#  **2. Automatically Detect CPU or CUDA**
device = "cuda" if torch.cuda.is_available() else "cpu"

#  **3. Load Model with Correct Precision**
if device == "cuda":
    model = AutoModelForSeq2SeqLM.from_pretrained(
        model_name, torch_dtype=torch.float16, device_map="auto"
    )
else:
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

#  **4. Apply LoRA Fine-Tuning Configuration**
lora_config = LoraConfig(
    r=8,  # Low-rank adaptation size
    lora_alpha=16,  # Scaling factor
    target_modules=["q", "v"],  # Apply LoRA to attention layers
    lora_dropout=0.1,
    bias="none"
)

# **Convert model to LoRA fine-tuning**
model = get_peft_model(model, lora_config)

#  **5. Prepare Training & Validation Dataset**
data = [
    {"input_text": "What is Python?", "output_text": "Python is a high-level programming language."},
    {"input_text": "How do you create a list in Python?", "output_text": "You can create a list using square brackets, like `my_list = [1, 2, 3]`."},
    {"input_text": "What is recursion in Python?", "output_text": "Recursion is when a function calls itself until it reaches a base case."},
]

# **Split dataset into train & eval (80% train, 20% eval)**
train_data = data[:2]  
eval_data = data[2:]  

train_dataset = Dataset.from_dict({
    "input_text": [d["input_text"] for d in train_data],
    "output_text": [d["output_text"] for d in train_data]
})

eval_dataset = Dataset.from_dict({
    "input_text": [d["input_text"] for d in eval_data],
    "output_text": [d["output_text"] for d in eval_data]
})

#  **6. Tokenize the Dataset**
def preprocess_function(examples):
    inputs = tokenizer(examples["input_text"], max_length=512, truncation=True, padding="max_length", return_tensors="pt")
    targets = tokenizer(examples["output_text"], max_length=512, truncation=True, padding="max_length", return_tensors="pt")
    inputs["labels"] = targets["input_ids"]
    return inputs

tokenized_train_dataset = train_dataset.map(preprocess_function, batched=True)
tokenized_eval_dataset = eval_dataset.map(preprocess_function, batched=True)

#  **7. Fine-Tune the Model with LoRA**
training_args = Seq2SeqTrainingArguments(
    output_dir="./flan-t5-lora",
    per_device_train_batch_size=1,  # Reduced batch size to avoid OOM errors
    gradient_accumulation_steps=8,  # Simulate a larger batch size
    num_train_epochs=3,
    save_steps=100,
    save_total_limit=2,
    logging_dir="./logs",
    logging_steps=50,
    evaluation_strategy="steps",  # Enable evaluation after every `eval_steps`
    eval_steps=100,  # Run evaluation every 100 steps
    predict_with_generate=True,
    fp16=(device == "cuda"),  # Enable fp16 for GPU only
)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_eval_dataset,  # Provided eval dataset
    tokenizer=tokenizer,
    data_collator=DataCollatorForSeq2Seq(tokenizer, model=model),
)

#  **8. Train the Model**
trainer.train()

#  **9. Save the Fine-Tuned Model**
trainer.save_model("./flan-t5-lora-finetuned")
print(" Model Fine-Tuned with LoRA and Saved Successfully!")

In [None]:
import numpy as np
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
from langchain import PromptTemplate, LLMChain
from langchain.llms import HuggingFacePipeline
from langchain.docstore.document import Document
from langchain_pinecone import PineconeVectorStore  # Import Pinecone Retriever
from sentence_transformers import CrossEncoder, SentenceTransformer  # Import SentenceTransformer for embeddings
from typing import List
import os

# Custom wrapper for SentenceTransformer to provide `embed_query` method
class SentenceTransformerEmbeddings:
    def __init__(self, model_name: str):
        self.model = SentenceTransformer(model_name)

    def embed_query(self, text: str) -> List[float]:
        return self.model.encode(text).tolist()

# Load the fine-tuned LLM
fine_tuned_llm = AutoModelForSeq2SeqLM.from_pretrained("./flan-t5-lora-finetuned")
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large")
text_generator = pipeline("text2text-generation", model=fine_tuned_llm, tokenizer=tokenizer)
llm = HuggingFacePipeline(pipeline=text_generator)

# Load the fine-tuned reranker
fine_tuned_reranker = CrossEncoder("./reranker-finetuned")

# Load a separate embedding model for Pinecone
embedding_model = SentenceTransformerEmbeddings('all-MiniLM-L6-v2')  # Use the custom wrapper

# Ensure Pinecone API Key is Set
os.environ["PINECONE_API_KEY"] = "pcsk_EggKG_S8oLWsWYLJmNNmoFyoutuZ33RBcNoCuqjtGSr9KUumYZbTuUMZj7feE1MGUNGUG"

# Initialize Pinecone Retriever
index_name = "durga-update"  # Replace with your index name
retriever = PineconeVectorStore.from_existing_index(index_name=index_name, embedding=embedding_model)

# Define the prompt template
prompt_template = PromptTemplate(
    input_variables=["context", "query"],
    template="""You are an AI assistant that answers Python programming questions using retrieved context.
    - Think step-by-step using logical reasoning (Chain-of-Thought).
    - Use relevant examples when needed.
    - Prioritize clarity and accuracy.

    Context: {context}
    Question: {query}
    Answer: Let's think step-by-step:
    """
)

# Generate response using the fine-tuned LLM
def generate_response(query: str, reranked_docs: List[Document]) -> str:
    context = "\n\n".join([doc.page_content for doc in reranked_docs])
    
    # Format the prompt correctly
    prompt_text = prompt_template.format(context=context, query=query)
    
    # Run LLM Chain
    llm_chain = LLMChain(llm=llm, prompt=prompt_template)
    response = llm_chain.run({"context": context, "query": query})
    
    return response

# Rerank documents using the fine-tuned reranker
def rerank_documents(query: str, retrieved_docs: List[Document]) -> List[Document]:
    pairs = [(query, doc.page_content) for doc in retrieved_docs]
    scores = fine_tuned_reranker.predict(pairs)
    ranked_indices = np.argsort(scores)[::-1]  # Sort in descending order
    return [retrieved_docs[i] for i in ranked_indices[:2]]  # Select top 2

# Example usage
query = "write a program to add two numbers with list comprehension?"
retrieved_docs = retriever.as_retriever().invoke(query)  # Fixed `retriever`
reranked_docs = rerank_documents(query, retrieved_docs)
response = generate_response(query, reranked_docs)

print("🤖 AI Response:", response)

In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from langchain import PromptTemplate, LLMChain, HuggingFacePipeline
from typing import List

def chunk_text(text, max_tokens=400):
    """Split text into chunks that fit within the model's token limit."""
    words = text.split()
    chunks = []
    current_chunk = []
    current_length = 0

    for word in words:
        # Approximate token count (1 word ≈ 1.3 tokens)
        word_length = len(word) + 1  # Add 1 for the space
        if current_length + word_length > max_tokens:
            chunks.append(" ".join(current_chunk))
            current_chunk = []
            current_length = 0
        current_chunk.append(word)
        current_length += word_length

    if current_chunk:
        chunks.append(" ".join(current_chunk))
    return chunks

def generate_response(query: str, reranked_docs: List[Document]) -> str:
    """Generate a response using an LLM with few-shot examples and CoT reasoning."""
    model_name = "google/flan-t5-large"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    text_generator = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
    llm = HuggingFacePipeline(pipeline=text_generator)

    # Define few-shot examples with CoT reasoning
    few_shot_examples = """
    Example 1:
    Context: Python is a high-level programming language.
    Q: What is Python?
    A: Let's think step-by-step:
       1. Python is described as a "high-level programming language."
       2. High-level languages are known for their simplicity and readability.
       3. Python is widely used in web development, data analysis, and AI.
       Therefore, Python is a high-level programming language known for its simplicity and readability, widely used in web development, data analysis, and AI.

    Example 2:
    Context: Lists in Python are ordered, mutable collections of items.
    Q: How do you create a list in Python?
    A: Let's think step-by-step:
       1. Lists in Python are created using square brackets `[]`.
       2. Items are separated by commas.
       3. For example, `my_list = [1, 2, 3]` creates a list with three integers.
       Therefore, you can create a list in Python using square brackets, like `my_list = [1, 2, 3]`.
    """

    # Define the prompt template with few-shot examples and CoT reasoning
    prompt_template = PromptTemplate(
        input_variables=["context", "query"],
        template=f"""You are an AI assistant that answers Python programming questions using retrieved context.
        - Think step-by-step using logical reasoning (Chain-of-Thought).
        - Use relevant examples when needed.
        - Prioritize clarity and accuracy.

        Here are some examples of how to answer Python programming questions:
        {few_shot_examples}

        Now, answer the following Python programming question using the provided context:
        Context: {{context}}
        Question: {{query}}
        Answer: Let's think step-by-step:
        """
    )

    # Prepare the context by extracting page_content from each Document
    context = "\n\n".join([doc.page_content for doc in reranked_docs])
    context_chunks = chunk_text(context, max_tokens=400)
    responses = []

    for chunk in context_chunks:
        # Format the prompt with the chunk and query
        prompt_text = prompt_template.format(context=chunk, query=query)
        
        # Generate the response using LLM chaining
        llm_chain = LLMChain(llm=llm, prompt=prompt_template)
        response = llm_chain.run({"context": chunk, "query": query})
        responses.append(response)

    # Combine responses from all chunks
    combined_response = " ".join(responses)

    # Post-process the response to remove unwanted phrases
    cleaned_response = combined_response.replace("If unsure, say 'I don't know'.", "").strip()
    return cleaned_response

# Example usage
response = generate_response(query, reranked_docs)
print(response)

In [16]:
import os

# Set the Serper API key as an environment variable
os.environ["SERPER_API_KEY"] = "e8a743391241752aabd30ddf8b2ef0928534b43f"

# Initialize the GoogleSerperAPIWrapper
google_search = GoogleSerperAPIWrapper()

In [None]:
from langchain.agents import initialize_agent, Tool, AgentType
from langchain.tools import BaseTool
from langchain.utilities import GoogleSerperAPIWrapper, WikipediaAPIWrapper
from langchain_experimental.tools import PythonREPLTool
from langchain.chains import LLMMathChain
from typing import List, Dict
from langchain.docstore.document import Document
import os

# ✅ **Set API Key for Google Search**
os.environ["SERPER_API_KEY"] = "e8a743391241752aabd30ddf8b2ef0928534b43f"

# ✅ **Custom tool for retrieval**
class RetrievalTool(BaseTool):
    name: str = "retrieval_tool"
    description: str = "Retrieve relevant documents from Pinecone based on the query."

    def _run(self, tool_input: str) -> List[Document]:  # Expecting a **single query string**
        """Retrieve documents using Pinecone."""
        retrieved_docs = retriever.as_retriever().invoke(tool_input)
        return retrieved_docs

    def _arun(self, tool_input: str):
        raise NotImplementedError("Async not supported")

# ✅ **Custom tool for reranking**
class RerankTool(BaseTool):
    name: str = "rerank_tool"
    description: str = "Rerank retrieved documents using a fine-tuned cross-encoder."

    def _run(self, tool_input: Dict) -> List[Document]:  
        """Rerank documents using the fine-tuned reranker."""
        if not isinstance(tool_input, dict):
            raise TypeError(f"Expected dictionary input, got {type(tool_input)}")
        
        query = tool_input.get("query", "")
        retrieved_docs = tool_input.get("retrieved_docs", [])
        
        if not query or not retrieved_docs:
            raise ValueError("Both 'query' and 'retrieved_docs' must be provided.")
        
        return rerank_documents(query, retrieved_docs)

    def _arun(self, tool_input: Dict):
        raise NotImplementedError("Async not supported")

# ✅ **Custom tool for response generation**
class ResponseGenerationTool(BaseTool):
    name: str = "response_generation_tool"
    description: str = "Generate a response using the fine-tuned LLM with Chain-of-Thought reasoning."

    def _run(self, tool_input: Dict) -> str:
        """Generate a response using the fine-tuned LLM."""
        if not isinstance(tool_input, dict):
            raise TypeError(f"Expected dictionary input, got {type(tool_input)}")
        
        query = tool_input.get("query", "")
        reranked_docs = tool_input.get("reranked_docs", [])
        
        if not query or not reranked_docs:
            raise ValueError("Both 'query' and 'reranked_docs' must be provided.")
        
        return generate_response(query, reranked_docs)

    def _arun(self, tool_input: Dict):
        raise NotImplementedError("Async not supported")

# ✅ **Initialize Tools**
retrieval_tool = RetrievalTool()
rerank_tool = RerankTool()
response_generation_tool = ResponseGenerationTool()

# ✅ **Load built-in tools**
google_search = GoogleSerperAPIWrapper()
wikipedia = WikipediaAPIWrapper()
python_repl = PythonREPLTool()
llm_math = LLMMathChain(llm=llm)

# ✅ **Define built-in tools**
tools = [
    Tool(
        name="Google Search",
        func=google_search.run,
        description="Search Google for real-time information."
    ),
    Tool(
        name="Wikipedia",
        func=wikipedia.run,
        description="Fetch general knowledge from Wikipedia."
    ),
    Tool(
        name="Python REPL",
        func=python_repl.run,
        description="Execute Python code snippets."
    ),
    Tool(
        name="Calculator",
        func=llm_math.run,
        description="Perform complex mathematical calculations."
    ),
    retrieval_tool,
    rerank_tool,
    response_generation_tool
]

# ✅ **Initialize the agent**
agent = initialize_agent(
    tools=tools,
    llm=llm,  # Use the fine-tuned LLM
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True
)

# ✅ **Example usage**
query = "write a program to add two numbers with list comprehension?"

# ✅ **Call tools properly with dictionary inputs**
retrieved_docs = retrieval_tool.run(query)
reranked_docs = rerank_tool.run({"query": query, "retrieved_docs": retrieved_docs})  # ✅ FIXED
response = response_generation_tool.run({"query": query, "reranked_docs": reranked_docs})  # ✅ FIXED

print("🤖 AI Response:", response)