# Knowledge Sources in CrewAI

## String knowledge source

In [28]:
from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource

# Define the knowledge
policy_text = """Our return policy allows customers to return any product within 30 days of purchase.
                 Refunds will be issued only if the item is unused and in original packaging.
                 Customers must provide proof of purchase when requesting a return."""

# Create a StringKnowledgeSource object
return_policy_knowledge = StringKnowledgeSource(content=policy_text)


In [29]:
from crewai import LLM
import os

#llm = LLM(model="gpt-4o")
llm = LLM(model="gemini/gemini-2.0-flash", verbose=True, temperature=0.5,
          api_key=os.getenv("GEMINI_API_KEY"))


In [30]:
from crewai import Agent

returns_agent = Agent(
    role="Product Returns Assistant",
    goal="Answer customer questions about return policy accurately.",
    backstory="You work in customer service and specialize in returns, refunds, and policies.",
    allow_delegation=False,
    verbose=True,
    llm=llm
)


In [31]:
from crewai import Task

returns_task = Task(
    description="Answer the following customer question about returns: {question}",
    expected_output="A concise and accurate answer.",
    agent=returns_agent
)


In [33]:
from crewai import Crew, Process

crew = Crew(
    agents=[returns_agent],
    tasks=[returns_task],
    process=Process.sequential,
    knowledge_sources=[return_policy_knowledge],  # This is key
    verbose=True
)

print(crew)

name=None cache=True tasks=[Task(description=Answer the following customer question about returns: {question}, expected_output=A concise and accurate answer.)] agents=[Agent(role=Product Returns Assistant, goal=Answer customer questions about return policy accurately., backstory=You work in customer service and specialize in returns, refunds, and policies.)] process=<Process.sequential: 'sequential'> verbose=True memory=False memory_config=None short_term_memory=None long_term_memory=None entity_memory=None user_memory=None embedder=None usage_metrics=None manager_llm=None manager_agent=None function_calling_llm=None config=None id=UUID('f5a03090-0615-4f6d-b6fe-f0cb81c16820') share_crew=False step_callback=None task_callback=None before_kickoff_callbacks=[] after_kickoff_callbacks=[] max_rpm=None prompt_file=None output_log_file=None planning=False planning_llm=None task_execution_output_json_files=None execution_logs=[] knowledge_sources=[StringKnowledgeSource(chunk_size=4000, chunk_o

In [34]:
result = crew.kickoff(inputs={
    "question": "Can I get a refund if I used the item once?"
})

from pprint import pprint
pprint(result.raw)

[1m[95m# Agent:[00m [1m[92mProduct Returns Assistant[00m
[95m## Task:[00m [92mAnswer the following customer question about returns: Can I get a refund if I used the item once?[00m




[1m[95m# Agent:[00m [1m[92mProduct Returns Assistant[00m
[95m## Final Answer:[00m [92m
According to our return policy, refunds are issued only if the item is unused and in original packaging. Since you have used the item once, it does not meet this condition, and a refund cannot be issued.[00m




('According to our return policy, refunds are issued only if the item is '
 'unused and in original packaging. Since you have used the item once, it does '
 'not meet this condition, and a refund cannot be issued.')


## Text Knowledge Source

In [35]:
from crewai.knowledge.source.text_file_knowledge_source import TextFileKnowledgeSource

text_source = TextFileKnowledgeSource(
    file_paths=["hr_policy.txt"]
)

In [36]:
from crewai import Agent, Task, Crew, Process, LLM

llm = LLM(model="gpt-4o")

hr_agent = Agent(
    role="HR Policy Assistant",
    goal="Answer employee questions about HR policies.",
    backstory="You're a reliable HR knowledge assistant.",
    knowledge_sources=[text_source],
    llm=llm
)

task = Task(
    description="What is the leave policy for new employees?",
    expected_output="A clear summary of the leave policy.",
    agent=hr_agent
)


In [37]:
crew = Crew(
    agents=[hr_agent],
    tasks=[task],
    process=Process.sequential,
    verbose=True
)

result = crew.kickoff()
pprint(result.raw)


[1m[95m# Agent:[00m [1m[92mHR Policy Assistant[00m
[95m## Task:[00m [92mWhat is the leave policy for new employees?[00m




[1m[95m# Agent:[00m [1m[92mHR Policy Assistant[00m
[95m## Final Answer:[00m [92m
The leave policy for new employees at your company is as follows: All full-time employees, including new hires, are entitled to 21 days of paid leave annually. However, new employees become eligible to take this leave only after completing their first 30 days of employment. In case of sickness, employees must inform their absence before 10 AM on the same day. If a leave exceeds 3 consecutive days, it must be supported by appropriate documentation. Additionally, remote work is permitted up to 3 days a week with managerial approval.[00m




('The leave policy for new employees at your company is as follows: All '
 'full-time employees, including new hires, are entitled to 21 days of paid '
 'leave annually. However, new employees become eligible to take this leave '
 'only after completing their first 30 days of employment. In case of '
 'sickness, employees must inform their absence before 10 AM on the same day. '
 'If a leave exceeds 3 consecutive days, it must be supported by appropriate '
 'documentation. Additionally, remote work is permitted up to 3 days a week '
 'with managerial approval.')


## PDF source

In [38]:
from crewai.knowledge.source.pdf_knowledge_source import PDFKnowledgeSource

pdf_source = PDFKnowledgeSource(
    file_paths=["meeting_notes.pdf"]
)

CropBox missing from /Page, defaulting to MediaBox


CropBox missing from /Page, defaulting to MediaBox


In [40]:
meeting_summarizer = Agent(
    role="Meeting Note Summarizer",
    goal="Provide concise summaries of weekly meetings.",
    backstory="You help the team stay updated on discussions.",
    knowledge_sources=[pdf_source],
    llm=llm
)

task = Task(
    description="Summarize the key action items from last week's meeting.",
    expected_output="A bullet-point list of action items.",
    agent=meeting_summarizer
)


In [41]:
crew = Crew(
    agents=[meeting_summarizer],
    tasks=[task],
    process=Process.sequential,
    verbose=True
)

result = crew.kickoff()
pprint(result.raw)


[1m[95m# Agent:[00m [1m[92mMeeting Note Summarizer[00m
[95m## Task:[00m [92mSummarize the key action items from last week's meeting.[00m




[1m[95m# Agent:[00m [1m[92mMeeting Note Summarizer[00m
[95m## Final Answer:[00m [92m
- Product: Finalize Q2 roadmap (due April 20)
- DS: Conduct feasibility study on CrewAI (due April 18)
- Eng: Migrate ML serving infra to Kubernetes (due May 31)[00m




('- Product: Finalize Q2 roadmap (due April 20)\n'
 '- DS: Conduct feasibility study on CrewAI (due April 18)\n'
 '- Eng: Migrate ML serving infra to Kubernetes (due May 31)')


## CSV source

In [42]:
from crewai.knowledge.source.csv_knowledge_source import CSVKnowledgeSource

csv_source = CSVKnowledgeSource(
    file_paths=["feedback.csv"]
)

In [44]:
feedback_analyst = Agent(
    role="User Feedback Analyst",
    goal="Identify common themes in user feedback.",
    backstory="You specialize in converting raw feedback into insights.",
    knowledge_sources=[csv_source],
    llm=llm
)

task = Task(
    description="What are the three most common complaints users had last month?",
    expected_output="A short list of recurring issues.",
    agent=feedback_analyst
)

In [45]:
crew = Crew(
    agents=[feedback_analyst],
    tasks=[task],
    process=Process.sequential,
    verbose=True
)

result = crew.kickoff()
pprint(result.raw)


[1m[95m# Agent:[00m [1m[92mUser Feedback Analyst[00m
[95m## Task:[00m [92mWhat are the three most common complaints users had last month?[00m




[1m[95m# Agent:[00m [1m[92mUser Feedback Analyst[00m
[95m## Final Answer:[00m [92m
1. The user interface (UI) is perceived as slow on mobile devices.
2. Notifications are too frequent and lack customization options.
3. Difficulty in locating the export option within reports.[00m




('1. The user interface (UI) is perceived as slow on mobile devices.\n'
 '2. Notifications are too frequent and lack customization options.\n'
 '3. Difficulty in locating the export option within reports.')


## JSON source

In [46]:
from crewai.knowledge.source.json_knowledge_source import JSONKnowledgeSource

json_source = JSONKnowledgeSource(
    file_paths=["company_info.json"]
)

In [47]:
company_expert = Agent(
    role="Company Info Specialist",
    goal="Answer questions about company structure and data.",
    backstory="You are an internal data assistant for org-level queries.",
    # knowledge_sources=[json_source],
    llm=llm
)

task = Task(
    description="How many teams are working on the product and what are their names?",
    expected_output="A list of team names and their sizes.",
    agent=company_expert
)


In [48]:
crew = Crew(
    agents=[company_expert],
    tasks=[task],
    process=Process.sequential,
    verbose=True,
    knowledge_sources=[json_source]
)

result = crew.kickoff()
print(result)


[1m[95m# Agent:[00m [1m[92mCompany Info Specialist[00m
[95m## Task:[00m [92mHow many teams are working on the product and what are their names?[00m




[1m[95m# Agent:[00m [1m[92mCompany Info Specialist[00m
[95m## Final Answer:[00m [92m
The Product department at TechNova Inc. has two teams working on the product:
1. UX Team with 5 members.
2. Analytics Team with 3 members.[00m




The Product department at TechNova Inc. has two teams working on the product:
1. UX Team with 5 members.
2. Analytics Team with 3 members.


## Custom embedding model

In [55]:
ollama_embedder = {
    "provider": "ollama",
    "config": {
        "model": "nomic-embed-text",  # Must match or be compatible with Ollama's supported embedding models
        "api_url": "http://localhost:11434"
    }
}

In [56]:
from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource

# Internal onboarding FAQ
faq_content = """
- You can access your email via portal.company.com using your employee credentials.
- The standard work hours are from 9am to 6pm, Monday to Friday.
- All reimbursement requests must be submitted by the 5th of the following month.
- For any IT-related issues, contact support@company.com.
"""

# Create a string knowledge source
faq_knowledge = StringKnowledgeSource(content=faq_content, embedder=ollama_embedder)


In [57]:
from crewai import Agent

hr_faq_agent = Agent(
    role="HR Assistant",
    goal="Answer onboarding-related questions for new hires.",
    backstory="You are a helpful assistant who knows everything about internal policies and onboarding processes.",
    allow_delegation=False,
    verbose=True,
    embedder=ollama_embedder
)


In [58]:
from crewai import Task

task = Task(
    description="Answer this onboarding question: {question}",
    expected_output="A short, accurate answer based on internal HR documentation.",
    agent=hr_faq_agent,
    embedder=ollama_embedder
)


In [64]:
from crewai import Crew, Process

crew = Crew(
    agents=[hr_faq_agent],
    tasks=[task],
    knowledge_sources=[faq_knowledge],
    embedder=ollama_embedder,
    process=Process.sequential,
    verbose=True
)

result = crew.kickoff(inputs={
    "question": "What are the working hours and how do I get reimbursed?"
})

from pprint import pprint
pprint(result.raw)


[91m 
[2025-04-13 08:34:28][ERROR]: Embedding dimension mismatch. This usually happens when mixing different embedding models. Try resetting the collection using `crewai reset-memories -a`[00m
[93m 


[1m[95m# Agent:[00m [1m[92mWeather Reporter[00m
[95m## Task:[00m [92mWhat is the current temperature and wind speed in San Francisco?[00m




[1m[95m# Agent:[00m [1m[92mWeather Reporter[00m
[95m## Final Answer:[00m [92m
The current temperature in San Francisco is 12.7°C with a wind speed of 11.7 km/h. The weather code indicates clear conditions. This update is as of April 13, 2025, at 02:45.[00m




('The current temperature in San Francisco is 12.7°C with a wind speed of 11.7 '
 'km/h. The weather code indicates clear conditions. This update is as of '
 'April 13, 2025, at 02:45.')


# Custom knowledge source

In [60]:
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
from typing import Dict, Any
from pydantic import Field
import requests

class WeatherKnowledgeSource(BaseKnowledgeSource):
    """Knowledge source that fetches weather data from an external API."""

    city: str = Field(description="City for which weather should be fetched")

    def load_content(self) -> Dict[Any, str]:
        try:
            print(f"Fetching weather for {self.city}...")

            # Open-Meteo API (no key needed for basic data)
            endpoint = "https://api.open-meteo.com/v1/forecast"
            params = {
                "latitude": 37.77,  # San Francisco by default
                "longitude": -122.42,
                "current_weather": True
            }

            response = requests.get(endpoint, params=params)
            response.raise_for_status()

            weather_data = response.json().get("current_weather", {})
            formatted = self.validate_content(weather_data)
            return {self.city: formatted}

        except Exception as e:
            raise ValueError(f"Failed to fetch weather data: {str(e)}")

    def validate_content(self, data: dict) -> str:
        if not data:
            return "No weather data available."

        return (
            f"Current weather in {self.city}:\n"
            f"- Temperature: {data.get('temperature')}°C\n"
            f"- Wind Speed: {data.get('windspeed')} km/h\n"
            f"- Weather Code: {data.get('weathercode')}\n"
            f"- Time: {data.get('time')}"
        )

    def add(self) -> None:
        """Process and chunk the content."""
        content = self.load_content()
        for _, text in content.items():
            chunks = self._chunk_text(text)
            self.chunks.extend(chunks)
        self._save_documents()


In [61]:
from crewai import Agent, LLM

weather_knowledge = WeatherKnowledgeSource(city="San Francisco")

weather_agent = Agent(
    role="Weather Reporter",
    goal="Answer questions about the current weather forecast.",
    backstory="You are a friendly meteorologist who provides real-time weather updates.",
    knowledge_sources=[weather_knowledge],
    llm=LLM(model="gpt-4o", temperature=0.0),
    verbose=True
)


In [62]:
from crewai import Task, Crew, Process

task = Task(
    description="What is the current temperature and wind speed in San Francisco?",
    expected_output="A concise weather summary for San Francisco.",
    agent=weather_agent
)

crew = Crew(
    agents=[weather_agent],
    tasks=[task],
    process=Process.sequential,
    verbose=True
)


In [63]:
result = crew.kickoff()
print(result)


Fetching weather for San Francisco...


[1m[95m# Agent:[00m [1m[92mWeather Reporter[00m
[95m## Task:[00m [92mWhat is the current temperature and wind speed in San Francisco?[00m




[1m[95m# Agent:[00m [1m[92mWeather Reporter[00m
[95m## Final Answer:[00m [92m
The current weather in San Francisco as of April 13, 2025, at 02:45 is as follows: The temperature is 12.7°C, and the wind speed is 11.7 km/h.[00m




The current weather in San Francisco as of April 13, 2025, at 02:45 is as follows: The temperature is 12.7°C, and the wind speed is 11.7 km/h.
