#### Automated Procurement System

In [3]:
# Warning Control 
from warnings import filterwarnings
from pydantic import PydanticDeprecatedSince20

filterwarnings('ignore')  
filterwarnings('ignore', category=PydanticDeprecatedSince20)  

In [4]:
from crewai import Agent,Task, Crew, LLM, Process
from crewai_tools import SerperDevTool
from crewai.tools import tool
from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource
import os
from dotenv import load_dotenv
import agentops
from pydantic import BaseModel, Field
from scrapegraph_py import Client
import json
from IPython.display import HTML, display

In [5]:
# --------- setup Environment  ----------

load_dotenv()

basic_llm = LLM(model='gpt-4.1-mini', temperature=0 )

agentops.init(
    api_key = os.getenv("AGENTOPS_API_KEY"),
    skip_auto_end_session= True
)
scrape_client = Client(api_key = os.getenv("SGAI_API_KEY"))

🖇 AgentOps: [34m[34mYou're on the agentops free plan 🤔[0m[0m


In [6]:
output_dir = "./ai-agent-output"
os.makedirs(output_dir,exist_ok=True)


company_context = StringKnowledgeSource(
    content= "Rankyx is a company that provides AI solutions to help websites refine "
             "their search and recommendation systems."
)

#### CrewAI Tools

In [7]:
search_tool = SerperDevTool()   # To search the web 

#### Creating Pydantic Objects

In [8]:
# ---- Pydantic Models (structured outputs) for Task1 ----

class SuggestedSearchQueries(BaseModel):
    queries: list[str] = Field(..., 
                              title = "Suggested Queries",
                              description = "Suggested search queries to be passed to the search engine",
                              min_length=1,
                              max_length=10
                              )

In [9]:
# ---- Nested Pydantic Models (structured outputs) for Task2 ----

class SignleSearchResult(BaseModel):
    title: str
    url: str
    content: str
    score: float
    search_query: str

class AllSearchResults(BaseModel):
    results: list[SignleSearchResult]

In [13]:
# ---- Nested Pydantic Models for Task3 ----

class ProductSpec(BaseModel):
    specification_name: str
    specification_value: str

class SingleExtractedProduct(BaseModel):
    page_url: str = Field(..., title="The original url of the product page")
    product_title: str = Field(..., title="The title of the product")
    product_image_url: str = Field(..., title="The url of the product image")
    product_url: str = Field(..., title="The url of the product")
    product_current_price: float = Field(..., title="The current price of the product", ge=0)
    product_original_price: float = Field(..., title="The original price of the product before discount.", ge=0)
    product_discount_percentage: float = Field(title="The discount percentage of the product. Set to None if no discount", default=None)
    
    product_specs: list[ProductSpec] = Field(..., title="The specifications of the product. Focus on the most important specs to compare.", min_length=1, max_length=5)
    
    agent_recommendation_rank: int = Field(..., title="The rank of the product to be considered in the final procurement report. (out of 5, Higher is Better) in the recommendation list ordering from the best to the worst")
    agent_recommendation_notes: list[str] = Field(..., title="A set of notes why would you recommend or not recommend this product to the company, compared to other products.")

class AllExtractedProducts(BaseModel):
    products: list[SingleExtractedProduct]

#### Creating Agents

In [14]:
#--------Agent 1 ----------------

search_queries_recommendation_agent = Agent(
    role = "Search Queries Recommendation Agent",
    goal = "To provide a list of suggested search queries to be passed to the search engine. "
            "The queries must be varied and looking for specific items.",
    backstory= 
            "The agent is designed to help in looking for products by providing a list "
            "of suggested search queries to be passed to the search engine based on "
            "the context provided.",

    llm=basic_llm,
    verbose = True    
)

In [15]:
#-------- Agent 2 ----------------

search_engine_agent = Agent(
    role="Search Engine Agent",
    goal="To search for products based on the suggested search query",
    backstory="The agent is designed to help in looking for products by searching for products based on the suggested search queries.",
    llm=basic_llm,
    verbose=True,
    tools=[search_tool]
)

In [16]:
@tool
def web_scraping_tool(page_url: str):
    """
    An AI Tool to help an agent to scrape a web page.

    Example:
    web_scraping_tool(
        page_url="https://www.amazon.com/Uncanny-Brands-Office-Single-Mifflin/dp/B095QQ5JS4",
    )
    """
    details = scrape_client.smartscraper(
        website_url = page_url,
        user_prompt = "Extract ```json\n" + SingleExtractedProduct.schema_json() + "```\n From the web page"
    )

    return {
        "page_url": page_url,
        "details": details
    }


In [18]:
#-------- Agent 3 ----------------

scraping_agent = Agent(
    role= "Web scraping agent",
    goal= "To extract details from any website",
    backstory= "The agent is designed to help in looking for required values from any website url."
               "These details will be used to decide which best product to buy.",
    llm= basic_llm,
    tools=[web_scraping_tool],
    verbose= True,
)

In [19]:
#-------- Agent 4 ----------------

procurement_report_author_agent = Agent(
    role= "Procurement Report Author Agent",
    goal= "To generate a professional HTML page for the procurement report",
    backstory= "The agent is designed to assist in generating a professional HTML "
               "page for the procurement report after looking into a list of products.",
    llm= basic_llm,
    verbose= True,
)

#### Creating Tasks

In [20]:
#------- Task 1 -----------------

search_queries_recommendation_task = Task(
    description= (
        "Rankyx is looking to buy {product_name} at the best prices (value for a price strategy) "
        "The company target any of these websites to buy from: {websites_list} "
        "The company wants to reach all available products on the internet to be compared "
        "later in another stage. "
        "The stores must sell the product in {country_name} "
        "Generate at maximum {no_keywords} queries."
        "Search keywords could mention specific brands, types or technologies."
        "The search query must reach an ecommerce webpage for product, and not a blog or listing page."
    ),

    expected_output = "A JSON object containing a list of suggested search queries.",
    output_json = SuggestedSearchQueries,
    output_file = os.path.join(output_dir, "step_1_suggested_search_queries.json"),       
    agent = search_queries_recommendation_agent,
)

In [21]:
#------- Task 2 -----------------

search_engine_task = Task(
    description="\n".join([
        "The task is to search for products based on the suggested search queries.",
        "You have to collect results from multiple search queries.",
        "Ignore any suspicious links or not an ecommerce single product website link.",
        "Ignore any search results with confidence score less than ({score_th})",
        "The search results will be used to compare prices of products from different websites."
    ]),
    expected_output= "A JSON object containing the search result.",
    output_json= AllSearchResults,
    output_file= os.path.join(output_dir,"step_2_search_results.json"),
    agent= search_engine_agent
)


In [22]:
#-------- Task 3 ----------------

scraping_task = Task(
    description= (
        "The task is to extract product details from any ecommerce store page url."
        "The task has to collect results from multiple pages urls."
        "Collect the best {top_recommendations_no} products from the search results."
    ),
    expected_output= "A JSON object containing products details",
    output_json= AllExtractedProducts,
    output_file= os.path.join(output_dir, "step_3_extracted_products.json"),
    agent= scraping_agent,
)

In [23]:
#-------- Task 4 ----------------

procurement_report_author_task = Task(
    description="\n".join([
        "The task is to generate a professional HTML page for the procurement report.",
        "You have to use Bootstrap CSS framework for a better UI.",
        "Use the provided context about the company to make a specialized report.",
        "The report will include the search results and prices of products from different websites.",
        "The report should be structured with the following sections:",
        "1. Executive Summary: A brief overview of the procurement process and key findings.",
        "2. Introduction: An introduction to the purpose and scope of the report.",
        "3. Methodology: A description of the methods used to gather and compare prices.",
        "4. Findings: Detailed comparison of prices from different websites, including tables and charts.",
        "5. Analysis: An analysis of the findings, highlighting any significant trends or observations.",
        "6. Recommendations: Suggestions for procurement based on the analysis.",
        "7. Conclusion: A summary of the report and final thoughts.",
        "8. Appendices: Any additional information, such as raw data or supplementary materials."
    ]),
    expected_output="A professional HTML page for the procurement report.",
    output_file=os.path.join(output_dir, "step_4_procurement_report.html"),
    agent=procurement_report_author_agent,
)

#### Creating the Crew

In [24]:
rankyx_crew = Crew(
    agents=[
        search_queries_recommendation_agent,
        search_engine_agent,
        scraping_agent,
        procurement_report_author_agent,
    ],
    tasks=[
        search_queries_recommendation_task,
        search_engine_task,
        scraping_task,
        procurement_report_author_task
    ],
    process=Process.sequential,
    knowledge_sources= [company_context]
)

#### Running the Crew

In [25]:
rankyx_inputs = {
        'product_name': "coffee machine for the office",
        'websites_list': [
                          "www.webstaurantstore.com",
                          "www.amazon.com",
                          "www.uline.com"
                        ],
        'country_name': "USA",
        'no_keywords': 10,
        'score_th':0.10,
        'top_recommendations_no':10
    }

crew_results = rankyx_crew.kickoff( inputs = rankyx_inputs)

[91m 

I encountered an error while trying to use the tool. This was the error: [402] Insufficient credits.
 Tool web_scraping_tool accepts these inputs: Tool Name: web_scraping_tool
Tool Arguments: {'page_url': {'description': None, 'type': 'str'}}
Tool Description: 
    An AI Tool to help an agent to scrape a web page.

    Example:
    web_scraping_tool(
        page_url="https://www.amazon.com/Uncanny-Brands-Office-Single-Mifflin/dp/B095QQ5JS4",
    )
    
[00m


In [28]:
display(HTML("./ai-agent-output/step_4_procurement_report.html"))

Product Image,Product Title,Supplier,Price (USD),Original Price (USD),Discount (%),Key Specifications,Recommendation Rank
,Blended Reverse Osmosis System with White 10 Gallon Tank 200 GPD,WebstaurantStore,$419.99,$419.99,–,Type: Commercial Coffee Makers / Brewers  Capacity: 2.6 Gallon (10 Liter)  Category: Restaurant & Beverage Equipment  Hot Beverage / Hot Topping Dispenser,3
,Zulay Premium Commercial Coffee Urn,Amazon,$159.99,$169.99,5.88%,Brand: Zulay  Material: Stainless Steel  Size: 100 Cup Capacity  Weight: 20 pounds,5
