In [1]:
import os
import getpass

def _set_env(var: str):
    if not os.environ.get(var):
        os.environ[var] = getpass.getpass(f"var: ")

_set_env("OPENAI_API_KEY")

In [2]:
try:
    import nest_asyncio
    nest_asyncio.apply()
    print("Async environment configured for Jupyter.")
except ImportError:
    print("Please install nest_asyncio with `pip install nest_asyncio`")

Async environment configured for Jupyter.


In [3]:
grocery_list_deals_url = "https://www.ralphs.com/pr/shop-all-promotions"

In [8]:
import requests
from bs4 import BeautifulSoup
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

def get_page_content(url: str) -> str:
    max_chars = 12000
    try:
        # Configure retry strategy
        session = requests.Session()
        retries = Retry(
            total=5,  # Total number of retries
            backoff_factor=1,  # Wait 1, 2, 4, 8, 16 seconds between retries
            status_forcelist=[500, 502, 503, 504]  # HTTP status codes to retry on
        )
        
        # Mount the adapter with retry strategy for both http and https
        adapter = HTTPAdapter(max_retries=retries)
        session.mount('http://', adapter)
        session.mount('https://', adapter)
        
        # Add headers to mimic a browser
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        
        # Make request with increased timeout and headers
        response = session.get(url, timeout=(30, 30), headers=headers)
        response.raise_for_status()  # Raise exception for bad status codes
        
        soup = BeautifulSoup(response.content, "html.parser")
        text = soup.get_text(separator=" ", strip=True)
        return text[:max_chars]
    except requests.exceptions.RequestException as e:
        print(f"Error fetching {url}: {str(e)}")
        return ""
    except Exception as e:
        print(f"Unexpected error processing {url}: {str(e)}")
        return ""

# page_content = get_page_content(grocery_list_deals_url)
# print(page_content)

In [9]:
def read_file(file_path: str) -> str:
    with open(file_path, "r") as file:
        return file.read()

page_content = read_file("grocery-raw-data.md")
print(page_content)

Skip to content
Ralphs logo
Shop
Save
Pickup & Delivery
Services
Pharmacy & Health


Sign In

 Pickup
 at
 Bermuda Dunes
Digital Coupons
Weekly Ad
Meal Planning
Recipes
Blog
Gift Cards
Credit Card
Find a Store

Digital Coupons

Shop Weekly Ad

Fuel Points

Cash Back
This Week’s Top Deals
Build your cart with this week’s best deals & top offers.

Shop Top Deals
Weekly Digital Deals
Buy 5, Save $1 Each
Locked In Low
Top Coupons Picked for You
View All


Save $5.00 on 3 Nestle Multi Brands
Save $500 on 3
Save $5.00 on 3 Nestle Multi Brands
Exp. Jan. 30
Shop All Items
Sign In To Clip
Expiring Soon

Save $1.50 on Private Selection Fresh Sliced Deli Meat or Cheese
Save $150
Save $1.50 on Private Selection Fresh Sliced Deli Meat or Cheese
Exp. Jan. 22
Shop All Items
Sign In To Clip

SAVE $1.00 on 2 Pillsbury™ Refrigerated and Frozen
Save $100 on 2
SAVE $1.00 on 2 Pillsbury™ Refrigerated and Frozen
Exp. Feb. 15
Shop All Items
Sign In To Clip

SAVE $1.00 on 2 Old El Paso™ Products
Save $100 on 

In [11]:
from autogen_ext.models.openai import OpenAIChatCompletionClient

In [12]:
model_client = OpenAIChatCompletionClient(
    model="gpt-4o",
    response_format={"type": "json_object"},
)

In [17]:
from autogen_core.tools import FunctionTool


def write_to_file(json_output: str):
    with open("grocery-deals.json", "w") as file:
        file.write(json_output)

writer_tool = FunctionTool(
    func=write_to_file,
    description="Write the json output to a file",
    
)


In [19]:
from autogen_agentchat.agents import AssistantAgent

reader_tool = FunctionTool(
    func=read_file,
    description="Read the raw data from the .md files",
)

agent = AssistantAgent(
    name="grocery_extractor",
    model_client=model_client,
    system_message="You are an expert at extracting grocery deals from raw data into a structured tablejson format with the\
        following columns: 'product', 'deal', 'price'. The product column should contain the name of the product being offered\
        (e.g., 'Nestle Multi Brands', 'Kroger Cheese'). The deal column should contain the type of deal available\
        (e.g., 'Save $5.00 on 3', 'Buy 5, Save $1 Each'). The price column should contain the sale price including any discounts\
        (e.g., '$2.49', '$6.49 discounted from $6.99').",
    tools=[reader_tool]
)

reporter_agent = AssistantAgent(
    name="grocery_reporter",
    model_client=OpenAIChatCompletionClient(
        model="gpt-4o",
    ),
    system_message="You take in the json output from the grocery-extractor agent and you write it to file grocery-deals.json",
    tools=[writer_tool]
)

In [24]:
from autogen_agentchat.teams import RoundRobinGroupChat
from autogen_agentchat.conditions import MaxMessageTermination


team = RoundRobinGroupChat(
    [agent, reporter_agent],
    termination_condition=MaxMessageTermination(max_messages=3),
)

response = await team.run(task="Read the raw data from the ./grocery-raw-data.md file and extract the deals.")

In [27]:
response.messages[-1].content

"Skip to content\nRalphs logo\nShop\nSave\nPickup & Delivery\nServices\nPharmacy & Health\n\n\nSign In\n\n Pickup\n at\n Bermuda Dunes\nDigital Coupons\nWeekly Ad\nMeal Planning\nRecipes\nBlog\nGift Cards\nCredit Card\nFind a Store\n\nDigital Coupons\n\nShop Weekly Ad\n\nFuel Points\n\nCash Back\nThis Week’s Top Deals\nBuild your cart with this week’s best deals & top offers.\n\nShop Top Deals\nWeekly Digital Deals\nBuy 5, Save $1 Each\nLocked In Low\nTop Coupons Picked for You\nView All\n\n\nSave $5.00 on 3 Nestle Multi Brands\nSave $500 on 3\nSave $5.00 on 3 Nestle Multi Brands\nExp. Jan. 30\nShop All Items\nSign In To Clip\nExpiring Soon\n\nSave $1.50 on Private Selection Fresh Sliced Deli Meat or Cheese\nSave $150\nSave $1.50 on Private Selection Fresh Sliced Deli Meat or Cheese\nExp. Jan. 22\nShop All Items\nSign In To Clip\n\nSAVE $1.00 on 2 Pillsbury™ Refrigerated and Frozen\nSave $100 on 2\nSAVE $1.00 on 2 Pillsbury™ Refrigerated and Frozen\nExp. Feb. 15\nShop All Items\nSign I