In [2]:
!pip install crewai langchain-community langchain-openai langchain-google-genai requests duckduckgo-search lancedb -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m180.8/180.8 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.2/50.2 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m50.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.5/51.5 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.0/27.0 MB[0m [31m44.0 MB/s[0m eta [36m0:

In [1]:
from crewai import Agent, Task, Crew, Process
from langchain_openai import ChatOpenAI
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.retrievers import BaseRetriever
from langchain_openai import OpenAIEmbeddings
from langchain_community.embeddings import LlamafileEmbeddings
from langchain.tools import tool
from langchain_community.document_loaders import WebBaseLoader
import requests, os
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import LanceDB
from langchain_community.tools import DuckDuckGoSearchRun



In [2]:
os.environ["NEWSAPI_KEY"] = "ENTER YOUR KEY"

In [3]:

# @title LLM Config

LLM = "OPENAI_GPT4"  # @param ["OPENAI_GPT4", "GEMINI_PRO"]
API_KEY = "sk-*************"  # @param {type:"string"} ## ENTER YOU KEY

In [4]:
if LLM == "OPENAI_GPT4":
    os.environ["OPENAI_API_KEY"] = API_KEY
    embedding_function = OpenAIEmbeddings()
    llm = ChatOpenAI(model="gpt-4-turbo-preview")
elif LLM == "GEMINI_PRO":
    os.environ["GOOGLE_API_KEY"] = API_KEY
    embeddding_function = LlamafileEmbeddings()
    llm = ChatGoogleGenerativeAI(model="gemini-pro")

In [5]:
import lancedb
def create_lance_db_connection(db_path):
    return lancedb.connect(db_path)

# Initialize the connection to LanceDB
db_connection = create_lance_db_connection("/tmp/lancedb")

# Function to index documents using LanceDB
def index_documents(documents, embedding_function, db_connection):
    vector_store = LanceDB.from_documents(documents, embedding=embedding_function, connection=db_connection)
    return vector_store

embedding = OpenAIEmbeddings()  # Make sure to properly define or import OpenAIEmbeddings
emb = embedding.embed_query("hello_world")
dataset = [{"vector": emb, "text": "dummy_text"}]

In [7]:
class SearchNewsDB:
    @tool("News DB Tool")
    def news(query: str):
        """Fetch news articles and process their contents."""
        API_KEY = os.getenv("NEWSAPI_KEY")
        base_url = "https://newsapi.org/v2/top-headlines?sources=techcrunch"
        params = {
            "sortBy": "publishedAt",
            "apiKey": API_KEY,
            "language": "en",
            "pageSize": 15
        }
        response = requests.get(base_url, params=params)
        if response.status_code != 200:
            return "Failed to retrieve news."

        articles = response.json().get("articles", [])
        all_splits = []
        for article in articles:
            loader = WebBaseLoader(article["url"])
            docs = loader.load()
            text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
            splits = text_splitter.split_documents(docs)
            all_splits.extend(splits)

        if all_splits:
            vector_store = index_documents(all_splits, embedding_function, db_connection)
            retriever = vector_store.similarity_search(query)
            return retriever
        else:
            return "No content available for processing."

In [8]:
# Get the news articles from the database
class GetNews:
    @tool("Get News Tool")
    def news(query: str) -> str:
        """Search LanceDB for relevant news information based on a query."""
        vectorstore = LanceDB(embedding=embedding_function, connection=db_connection)

        retriever = vectorstore.similarity_search(query)
        return retriever

In [9]:
# Make sure to Install duckduckgo-search for this example
# !pip install -U duckduckgo-search

search_tool = DuckDuckGoSearchRun()

In [10]:
# Defining Search and Writer agents with roles and goals
news_search_agent = Agent(
    role="AI News Searcher",
    goal="Generate key points for each news article from the latest news",
    backstory="""You work at a leading tech think tank.
  Your expertise lies in identifying emerging trends in field of AI.
  You have a knack for dissecting complex data and presenting
  actionable insights.""",
    tools=[SearchNewsDB().news],
    allow_delegation=True,
    verbose=True,
    llm=llm,
)

writer_agent = Agent(
    role="Writer",
    goal="Identify all the topics received. Use the Get News Tool to verify the each topic to search. Use the Search tool for detailed exploration of each topic. Summarise the retrieved information in depth for every topic.",
    backstory="""You are a renowned Content Strategist, known for
  your insightful and engaging articles.
  You transform complex concepts into compelling narratives.""",
    tools=[GetNews().news, search_tool],
    allow_delegation=True,
    verbose=True,
    llm=llm,
)

In [11]:
# Creating search and writer tasks for agents
news_search_task = Task(
    description="""Conduct a comprehensive analysis of the latest advancements in AI in 2024.
  Identify key trends, breakthrough technologies, and potential industry impacts.
  Your final answer MUST be a full analysis report""",
    expected_output="Create key points list for each news",
    agent=news_search_agent,
    tools=[SearchNewsDB().news],
)

writer_task = Task(
    description="""Using the insights provided, summaries each post of them
  highlights the most significant AI advancements.
  Your post should be informative yet accessible, catering to a tech-savvy audience.
  Make it sound cool, avoid complex words so it doesn't sound like AI.
  Your final answer MUST not be the more than 50 words.""",
    expected_output="Write a short summary under 50 words for each news Headline seperately",
    agent=writer_agent,
    context=[news_search_task],
    tools=[GetNews().news, search_tool],
)

In [12]:
# Instantiate Crew with Agents and their tasks
news_crew = Crew(
    agents=[news_search_agent, writer_agent],
    tasks=[news_search_task, writer_task],
    process=Process.sequential,
    manager_llm=llm,
)

In [13]:
news_crew

Crew(id=c9809d2d-1437-4bfe-bba1-b0c688c2b6ac, process=sequential, number_of_agents=2, number_of_tasks=2)

In [14]:
# Execute the crew to see RAG in action
result = news_crew.kickoff()

[1m[95m# Agent:[00m [1m[92mAI News Searcher[00m
[95m## Task:[00m [92mConduct a comprehensive analysis of the latest advancements in AI in 2024.
  Identify key trends, breakthrough technologies, and potential industry impacts.
  Your final answer MUST be a full analysis report[00m


[1m[95m# Agent:[00m [1m[92mAI News Searcher[00m
[95m## Using tool:[00m [92mNews DB Tool[00m
[95m## Tool Input:[00m [92m
"{\"query\": \"AI advancements 2024\"}"[00m
[95m## Tool Output:[00m [92m
[Document(metadata={'description': 'We’ve all seen them. The inspector with a clipboard, walking around a building, ticking off the last time the fire extinguishers were checked, or if all', 'language': 'en-US', 'source': 'https://techcrunch.com/2024/05/08/checkfirst-raises-1-5m-pre-seed-applying-ai-to-remote-inspections-and-audits/', 'title': 'Checkfirst raises $1.5M pre-seed to apply AI to remote inspections and audits | TechCrunch'}, page_content='AI\n\n\nMistral releases new AI models opt

In [15]:
result

CrewOutput(raw="1. Checkfirst's AI transforms remote audits, making them quicker and smarter with $1.5M funding.\n2. Altera's AI players, backed by Eric Schmidt, bring $9M to redefine gaming with smarter challenges.\n3. Google DeepMind's AlphaFold unveils a game-changing update, speeding up drug discovery with its proteomics service.", pydantic=None, json_dict=None, tasks_output=[TaskOutput(description='Conduct a comprehensive analysis of the latest advancements in AI in 2024.\n  Identify key trends, breakthrough technologies, and potential industry impacts.\n  Your final answer MUST be a full analysis report', name=None, expected_output='Create key points list for each news', summary='Conduct a comprehensive analysis of the latest advancements in AI...', raw="After analyzing the recent articles on AI advancements in 2024, the following key points were identified:\n\n1. **Checkfirst's Application of AI in Remote Inspections and Audits**\n   - **Trend/Technology:** Checkfirst has raised