In [None]:
"""

"""

In [None]:
!pip install -qU crewai[tools,agentops]

In [None]:
!pip install -qU tavily-python scrapegraph-py

In [None]:
from crewai import Agent, Task, Crew, Process, LLM
from crewai.tools import tool

from google.colab import userdata
from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource
import agentops
from pydantic import BaseModel, Field
from typing import List
from tavily import TavilyClient
from scrapegraph_py import Client
import os
import json

In [None]:
os.environ["Groq_API_KEY"] = userdata.get('groq-colab')
os.environ["AGENTOPS_API_KEY"] = userdata.get('agentops-colab')

agentops.init(
    api_key=userdata.get('agentops-colab'),
    skip_auto_end_session=True,
    default_tags=['crewai']
)

In [None]:
print(agentops.get_client().config.exporter_endpoint)

In [None]:
output_dir = "./Agents-output"
os.makedirs(output_dir, exist_ok=True)

basic_llm = LLM(model="meta-llama/llama-4-scout-17b-16e-instruct", temperature=0)
search_client = TavilyClient(api_key=userdata.get('tvly-search'))
scrape_client = Client(api_key=userdata.get('scrapegraph'))

In [None]:
no_keywords = 5

about_personal_goal = "I am searching for details about the RTX 5070 graphics card for my personal use."

company_context = StringKnowledgeSource(
    content=about_personal_goal
)

### Setups for building Agents

### **Agent 1**
* search_queries_recommendation_agent

In [None]:
class SuggestedSearchQueries(BaseModel):
    queries: List[str] = Field(..., title="Suggested search queries to be passed to the search engine",
                               min_items=1, max_items=no_keywords)

# ---------------------------------------------------------------
search_queries_recommendation_agent = Agent(
    role="Search Queries Recommendation Agent",
    goal="\n".join([
                "To provide a list of suggested search queries to be passed to the search engine.",
                "The queries must be varied and looking for specific items."
            ]),
    backstory="The agent is designed to help in looking for products by providing a list of suggested search queries to be passed to the search engine based on the context provided.",
    llm=basic_llm,
    verbose=True,
)

search_queries_recommendation_task = Task(
    description="\n".join([
        "I am looking to buy an RTX 5070 at the best prices (value for price strategy).",
        "I target any of these websites to buy from: {websites_list}.",
        "I want to find all available RTX 5070 products on the internet to compare later.",
        "The stores must sell the product in {country_name}.",
        "Generate at maximum {no_keywords} queries.",
        "The search keywords must be in {language} language.",
        "Search keywords must include specific brands, types, or technologies (e.g., NVIDIA, RTX 5070, GPU). Avoid general keywords.",
        "The search query must lead to an e-commerce webpage for the RTX 5070, not a blog or listing page."
    ]),
    expected_output = "A JSON object containing a list of suggested search queries.",
    output_json = SuggestedSearchQueries,
    output_file = os.path.join(output_dir, "step_1_suggested_search_queries.json"),
    agent = search_queries_recommendation_agent
)

### **Agent 2**
* search_engine_agent

In [None]:
# pydantic format
class SignleSearchResult(BaseModel):
    title: str
    url: str = Field(..., title="the page url")
    content: str
    score: float
    search_query: str

class AllSearchResults(BaseModel):
    results: List[SignleSearchResult]

@tool
def search_engine_tool(query: str):
    """Useful for search-based queries. Use this to find current information about any query related pages using a search engine"""
    return search_client.search(query)

search_engine_agent = Agent(
    role="Search Engine Agent",
    goal="To search for products based on the suggested search query",
    backstory="The agent is designed to help in looking for products by searching for products based on the suggested search queries.",
    llm=basic_llm,
    verbose=True,
    tools=[search_engine_tool]
)

#-------------------------------------------------------------
search_engine_task = Task(
    description="\n".join([
        "The task is to search for products based on the suggested search queries.",
        "You have to collect results from multiple search queries.",
        "Ignore any susbicious links or not an ecommerce single product website link.",
        "Ignore any search results with confidence score less than ({score_th}) .",
        "The search results will be used to compare prices of products from different websites.",
    ]),
    expected_output="A JSON object containing the search results.",
    output_json=AllSearchResults,
    output_file=os.path.join(output_dir, "step_2_search_results.json"),
    agent=search_engine_agent
)

### **Agent 3**
* scraping_agent

In [None]:
class ProductSpec(BaseModel):
    specification_name: str
    specification_value: str

class SingleExtractedProduct(BaseModel):
    page_url: str = Field(..., title="The original url of the product page")
    product_title: str = Field(..., title="The title of the product")
    product_image_url: str = Field(..., title="The url of the product image")
    product_url: str = Field(..., title="The url of the product")
    product_current_price: float = Field(..., title="The current price of the product")
    product_original_price: float = Field(title="The original price of the product before discount. Set to None if no discount", default=None)
    product_discount_percentage: float

    product_specs: List[ProductSpec] = Field(..., title="The specifications of the product. Focus on the most important specs to compare.", min_items=1, max_items=5)

    agent_recommendation_rank: int = Field(..., title="The rank of the product to be considered in the final procurement report. (out of 5, Higher is Better) in the recommendation list ordering from the best to the worst")
    agent_recommendation_notes: List[str]  = Field(..., title="A set of notes why would you recommend or not recommend this product to the company, compared to other products.")


class AllExtractedProducts(BaseModel):
    products: List[SingleExtractedProduct]


@tool
def web_scraping_tool(page_url: str):
    """
    An AI Tool to help an agent to scrape a web page
    """
    details = scrape_client.smartscraper(
        website_url=page_url,
        user_prompt="Extract ```json\n" + SingleExtractedProduct.schema_json() + "```\n From the web page"
    )

    return {
        "page_url": page_url,
        "details": details
    }

scraping_agent = Agent(
    role="Web scraping agent",
    goal="To extract details from any website",
    backstory="The agent is designed to help in looking for required values from any website url. These details will be used to decide which best product to buy.",
    llm=basic_llm,
    tools=[web_scraping_tool],
    verbose=True,
)

scraping_task = Task(
    description="\n".join([
        "The task is to extract product details from any ecommerce store page url.",
        "The task has to collect results from multiple pages urls.",
        "Collect the best {top_recommendations_no} products from the search results.",
    ]),
    expected_output="A JSON object containing products details",
    output_json=AllExtractedProducts,
    output_file=os.path.join(output_dir, "step_3_search_results.json"),
    agent=scraping_agent
)

### **Agent 4**
* procurement_report_author_agent

In [None]:
procurement_report_author_agent = Agent(
    role="Procurement Report Author Agent",
    goal="To generate a professional HTML page for the procurement report",
    backstory="The agent is designed to assist in generating a professional HTML page for the procurement report after looking into a list of products.",
    llm=basic_llm,
    verbose=True,
)

procurement_report_author_task = Task(
    description="\n".join([
        "The task is to generate a professional HTML page for a personal procurement report about the RTX 5070.",
        "You have to use Bootstrap CSS framework for a better UI.",
        "Use the provided context about my goal to find the best RTX 5070 to make a specialized report.",
        "The report will include the search results and prices of RTX 5070 from different websites.",
        "The report should be structured with the following sections:",
        "1. Executive Summary: A brief overview of my RTX 5070 search process and key findings.",
        "2. Introduction: An introduction to the purpose and scope of my RTX 5070 procurement report.",
        "3. Methodology: A description of the methods used to gather and compare RTX 5070 prices.",
        "4. Findings: Detailed comparison of RTX 5070 prices from different websites, including tables and charts.",
        "5. Analysis: An analysis of the findings, highlighting any significant trends or observations about RTX 5070 pricing.",
        "6. Recommendations: Suggestions for purchasing the RTX 5070 based on the analysis.",
        "7. Conclusion: A summary of the report and final thoughts on selecting an RTX 5070.",
        "8. Appendices: Any additional information, such as raw data or supplementary materials."
    ]),
    expected_output="A professional HTML page for my RTX 5070 procurement report.",
    output_file=os.path.join(output_dir, "step_4_rtx_5070_procurement_report.html"),
    agent=procurement_report_author_agent,
)

###**Run CrewAI**

In [None]:
rankyx_crew = Crew(
    agents=[
        search_queries_recommendation_agent,
        search_engine_agent,
        scraping_agent,
        procurement_report_author_agent,
    ],
    tasks=[
        search_queries_recommendation_task,
        search_engine_task,
        scraping_task,
        procurement_report_author_task,
    ],
    process=Process.sequential,
    knowledge_sources=[company_context]
)

In [None]:
crew_results = rankyx_crew.kickoff(
    inputs={
        "product_name": "coffee machine for the office",
        "websites_list": ["www.amazon.eg", "www.noon.com/egypt-en", "www.thaher.tech", "wwww.alfrensia.com/en", "www.compumarts.com"],
        "country_name": "Egypt",
        "no_keywords": 5,
        "language": "English",
        "score_th": 0.50,
        "top_recommendations_no": 5
    }
)