In [24]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36"
}

def scrape_recipe(url):
    resp = requests.get(url, headers=headers)
    soup = BeautifulSoup(resp.text, "html.parser")

    title = soup.find("h1").get_text(strip=True)

    ingredients = [i.get_text(strip=True) for i in soup.select("span.wprm-recipe-ingredient-name")]
    instructions = [step.get_text(strip=True) for step in soup.select("div.wprm-recipe-instruction-text")]

    return {
        "title": title,
        "ingredients": "; ".join(ingredients),  # join so CSV is cleaner
        "instructions": " ".join(instructions),
        "url": url
    }

# ---- Phase 1: Collect all recipe links ----
all_links = []
for page in range(1, 169):  # 168 pages total
    print(f"📄 Collecting links from page {page}...")
    url = f"https://panlasangpinoy.com/categories/recipes/page/{page}/"
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, "html.parser")

    links = [
        a["href"] for a in soup.select("h2.entry-title a")
        if a["href"].startswith("https://panlasangpinoy.com/")
    ]
    all_links.extend(links)

print(f"\n✅ Collected {len(all_links)} recipe links.")

# ---- Phase 2: Scrape details & save progressively ----
results = []
for i, link in enumerate(all_links, 1):
    try:
        recipe = scrape_recipe(link)
        results.append(recipe)
        print(f"✅ {i}/{len(all_links)} {recipe['title']}")
    except Exception as e:
        print(f"❌ Error on {link}: {e}")
    
    # Save every 20 recipes to avoid data loss
    if i % 20 == 0:
        df = pd.DataFrame(results)
        df.to_csv("recipes_data.csv", index=False)
        print("💾 Progress saved...")

# Final save
df = pd.DataFrame(results)
df.to_csv("recipes_data.csv", index=False)
print("\n🎉 Done! All recipes saved to recipes_data.csv")


📄 Collecting links from page 1...
📄 Collecting links from page 2...
📄 Collecting links from page 3...
📄 Collecting links from page 4...
📄 Collecting links from page 5...
📄 Collecting links from page 6...
📄 Collecting links from page 7...
📄 Collecting links from page 8...
📄 Collecting links from page 9...
📄 Collecting links from page 10...
📄 Collecting links from page 11...
📄 Collecting links from page 12...
📄 Collecting links from page 13...
📄 Collecting links from page 14...
📄 Collecting links from page 15...
📄 Collecting links from page 16...
📄 Collecting links from page 17...
📄 Collecting links from page 18...
📄 Collecting links from page 19...
📄 Collecting links from page 20...
📄 Collecting links from page 21...
📄 Collecting links from page 22...
📄 Collecting links from page 23...
📄 Collecting links from page 24...
📄 Collecting links from page 25...
📄 Collecting links from page 26...
📄 Collecting links from page 27...
📄 Collecting links from page 28...
📄 Collecting links from page 

In [25]:
import os

# Show current working directory
print("📂 Current working directory:", os.getcwd())

# List all CSV files in the folder
print("\n📄 CSV files found:")
for f in os.listdir():
    if f.endswith(".csv"):
        print(" -", f)


📂 Current working directory: c:\Users\Windows11\AppData\Local\Programs\Microsoft VS Code

📄 CSV files found:
 - filipino_recipes.csv
 - recipes_data.csv
