In [1]:
import os 
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
from crewai import Crew,Task,Agent

In [3]:
! pip install google-generativeai langchain_google_genai -q
! pip install -U duckduckgo-search -q


In [4]:
! pip install crewai pypdf langchain sentence-transformers faiss-cpu -q
# For GPU support: pip install faiss-gpu

In [None]:
from crewai import Crew
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
import os

class PDFProcessor:
    def __init__(self, pdf_path):
        self.pdf_path = pdf_path
        self.embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-MiniLM-L6-v2"
        )
    
    def load_and_chunk(self):
        """Load PDF and split into chunks"""
        loader = PyPDFLoader(self.pdf_path)
        pages = loader.load()
        
        # Configure text splitter
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size = 1000,
            chunk_overlap = 200,
            length_function = len
        )
        
        return text_splitter.split_documents(pages)
    
    def create_vectorstore(self, chunks, db_name="pdf_faiss_db"):
        """Create and save FAISS vector store"""
        vectorstore = FAISS.from_documents(
            documents = chunks,
            embedding  = self.embeddings
        )
        vectorstore.save_local(db_name)
        return vectorstore

# Usage
processor = PDFProcessor("The_GALE_ENCYCLOPEDIA_of_MEDICINE_SECOND.pdf")
chunks = processor.load_and_chunk()
vectorstore = processor.create_vectorstore(chunks)

  self.embeddings = HuggingFaceEmbeddings(
  from .autonotebook import tqdm as notebook_tqdm


- Step 3: Integrate with CrewAI Agents

In [None]:
from crewai import Agent, Task, Crew
from langchain.vectorstores import FAISS

# 1. First create and save your FAISS vectorstore (as shown in previous examples)
# Assume vectorstore is already created and loaded

# 2. Create a retrieval tool/function
def retrieve_from_faiss(topic: str, k: int = 4):
    """Custom retrieval function for FAISS"""
    return vectorstore.similarity_search(topic, k=k)

# 3. Create the agent with custom tools
retriever_agent = Agent(
    role="Information Retrieval Specialist",
    goal="Find relevant information from the PDF database",
    backstory="Expert in semantic search and information retrieval",
    tools=[],  # We'll add our custom retrieval function to the task
    verbose=True
)

# 4. Create task with proper configuration
research_task = Task(
    description="Find relevant information about {topic} from our documents",
    agent=retriever_agent,
    expected_output="A comprehensive summary of the most relevant information",
    # Remove async_execution or set it to True/False only
    async_execution=False,  # This must be boolean
    # Use context parameter to pass our retrieval function
    context=[
        {
            "function": retrieve_from_faiss,
            "description": "Retrieves relevant document chunks about a topic",
            "arguments": ["topic"]
        }
    ]
)

# 5. Create and run the crew
crew = Crew(
    agents=[retriever_agent],
    tasks=[research_task],
    verbose=2
)

# Execute with a query
result = crew.kickoff(inputs={"topic": "machine learning"})
print(result)

ValidationError: 1 validation error for Task
async_execution
  Input should be a valid boolean [type=bool_type, input_value=<function <lambda> at 0x0000029046EB6700>, input_type=function]
    For further information visit https://errors.pydantic.dev/2.11/v/bool_type

In [None]:
from crewai import Agent, Task, Crew

# 1. Define retrieval function
def retrieve_docs(topic: str):
    results = vectorstore.similarity_search(topic, k=4)
    return results

# 2. Create agent
retriever_agent = Agent(
    role="Information Retrieval Specialist",
    goal="Find relevant information from the PDF database",
    backstory="Expert in semantic search and information retrieval",
    verbose=True
)

# 3. Create task with context
research_task = Task(
    description="Find relevant information about {topic} from our documents",
    agent=retriever_agent,
    expected_output="A comprehensive summary of the most relevant information",
    context=[{
        "source": retrieve_docs,
        "description": "FAISS document retriever",
        "arguments": ["topic"]
    }]
)

# 4. Create and run crew
crew = Crew(
    agents=[retriever_agent],
    tasks=[research_task],
    verbose=2
)

result = crew.kickoff(inputs={"topic": "machine learning"})
print(result)

In [None]:
from crewai import Agent, Task, Crew
from crewai_tools import tool
from langchain.vectorstores import FAISS

# 1. Create a tool for FAISS retrieval
@tool("FAISS Document Retriever")
def faiss_retriever(topic: str) -> str:
    """Retrieves relevant document chunks about a topic from FAISS database"""
    results = vectorstore.similarity_search(topic, k=4)
    return "\n\n".join([doc.page_content for doc in results])

# 2. Create agent with the tool
retriever_agent = Agent(
    role="Information Retrieval Specialist",
    goal="Find relevant information from the PDF database",
    backstory="Expert in semantic search and information retrieval",
    tools=[faiss_retriever],  # Add our FAISS tool
    verbose=True
)

# 3. Create task (no async_execution needed for this approach)
research_task = Task(
    description="Find relevant information about {topic} from our documents",
    agent=retriever_agent,
    expected_output="A comprehensive summary of the most relevant information",
    # async_execution=False,  # Optional, defaults to False
)

# 4. Create and run crew
crew = Crew(
    agents=[retriever_agent],
    tasks=[research_task],
    verbose=2
)

result = crew.kickoff(inputs={"topic": "machine learning"})
print(result)