In [None]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

In [7]:
BASE_URL = "https://recalls-rappels.canada.ca/en?page=%2C"
resp = requests.get(BASE_URL, headers={"User-Agent": "Mozilla/5.0"})
soup = BeautifulSoup(resp.text, "html.parser")

In [8]:
recalls = []
def get_hyperlinks(url):  
    for block in soup.select("div.homepage-recent-row"):
        a_tag = block.select_one("span.homepage-recent a")
        if not a_tag:
            continue
    
        href = urljoin(url, a_tag.get("href"))
        title = a_tag.get_text(strip=True)
    
        ar_tag = block.select_one("span.ar-type")
        recall_info = ar_tag.get_text(strip=True) if ar_tag else ""
        # Split "Food recall warning | 2025-09-10"
        if "|" in recall_info:
            recall_type, recall_date = [s.strip() for s in recall_info.split("|", 1)]
        else:
            recall_type, recall_date = recall_info, None
    
        recalls.append({
            "title": title,
            "href": href,
            "recall_type": recall_type,
            "recall_date": recall_date
        })

In [9]:
get_hyperlinks(BASE_URL)

In [10]:
print(recalls[1])

{'title': 'Krazy Krunch brand Popcorn White Cheddar recalled due to improperly declared milk', 'href': 'https://recalls-rappels.canada.ca/en/alert-recall/krazy-krunch-brand-popcorn-white-cheddar-recalled-due-improperly-declared-milk', 'recall_type': 'Notification', 'recall_date': '2025-10-03'}


In [18]:
# Run to get all the titles, hyperlinks, recall tags, recall date
recalls = []
for page in range(0, 20):
    url = "https://recalls-rappels.canada.ca/en?page=%2C{page}"
    get_hyperlinks(url)



In [20]:
print(len(recalls))

200


In [25]:
print(recalls[0:2])



In [30]:
import csv
import os

In [29]:
with open("recalls.csv", "w", newline="", encoding="utf-8") as f:
    writer = csv.DictWriter(
        f,
        fieldnames=["title", "href", "recall_type", "recall_date"]
    )
    writer.writeheader()
    writer.writerows(recalls)
print("✅ CSV file created successfully!")

✅ CSV file created successfully!


In [31]:
CSV_FILE = "recalls.csv"

In [36]:
with open(CSV_FILE, "a", newline="", encoding="utf-8") as f:
    fieldnames = ["id", "title", "href", "recall_type", "recall_date"]
    writer = csv.DictWriter(f, fieldnames=fieldnames)

    # Write header only if file is new/empty
    if f.tell() == 0:
        writer.writeheader()
        next_id = 1
    else:
        # Count existing lines to continue ID sequence
        with open(CSV_FILE, "r", encoding="utf-8") as existing:
            next_id = sum(1 for _ in existing)  # counts lines (including header)

    for i, item in enumerate(recalls, start=next_id):
        writer.writerow({
            "id": i,
            "title": item.get("title"),
            "href": item.get("href"),
            "recall_type": item.get("recall_type"),
            "recall_date": item.get("recall_date"),
        })