In [None]:
import asyncio
import aiohttp
import nest_asyncio
from bs4 import BeautifulSoup
import csv

# Apply nest_asyncio to allow nested event loops
nest_asyncio.apply()

# Range of pet IDs
start_id = 2001
end_id = 36500  # inclusive

# Async function to scrape one pet
async def scrape_pet(session, pet_id):
    url = f"https://www.petfinder.my/pets/{pet_id}/"
    try:
        await asyncio.sleep(30)  # Respect robots.txt: 30-second delay between requests
        async with session.get(url) as response:
            html = await response.text()
            soup = BeautifulSoup(html, "html.parser")

            # Extract pet name
            pet_title_tag = soup.find('div', class_='pet_title')
            pet_name = pet_title_tag.find('td', align="center").text.strip() if pet_title_tag else "N/A"

            # Extract details
            info_table = soup.find('table', class_='pet_box')
            pet_details = {}

            if info_table:
                rows = info_table.find_all('tr')
                for row in rows:
                    cols = row.find_all('td')
                    if len(cols) >= 2:
                        key_tag = cols[0].find('b')
                        if key_tag:
                            key = key_tag.text.strip().replace(":", "")
                            value = cols[1].text.strip()
                            pet_details[key] = value

            # Type and Species
            pet_type = next(iter(pet_details.keys()), "N/A")
            pet_species = pet_details.get(pet_type, "N/A")

            # Price/Adoption Fee
            adoption_fee = "N/A"
            if info_table:
                rows = info_table.find_all('tr')
                for row in rows:
                    cols = row.find_all('td')
                    if len(cols) >= 2:
                        key_tag = cols[0].find('b')
                        if key_tag and 'Adoption Fee' in key_tag.text:
                            fee_tag = cols[1].find('b')
                            if fee_tag:
                                adoption_fee = fee_tag.text.strip()
                            else:
                                adoption_fee = cols[1].text.strip()

            # Uploader Type and Name
            uploader_td = soup.find('td', align="left", width="130", valign="middle")
            uploader_type = "N/A"
            uploader_name = "N/A"

            if uploader_td:
                font_tag = uploader_td.find('font')
                uploader_type = font_tag.text.strip() if font_tag else "N/A"
                uploader_name_tag = uploader_td.find('a', class_='darkgrey')
                uploader_name = uploader_name_tag.text.strip() if uploader_name_tag else "N/A"

            # Status
            status_tag = soup.find('div', class_='pet_label')
            pet_status = status_tag.text.strip() if status_tag else "N/A"

            # Save data to CSV progressively
            with open('pets_2001_36500.csv', 'a', newline='', encoding='utf-8') as csvfile:
                csv_writer = csv.writer(csvfile)
                csv_writer.writerow([
                    pet_id,
                    pet_name,
                    pet_type,
                    pet_species,
                    pet_details.get('Profile', 'N/A'),
                    pet_details.get('Amount', 'N/A'),
                    pet_details.get('Vaccinated', 'N/A'),
                    pet_details.get('Dewormed', 'N/A'),
                    pet_details.get('Spayed', 'N/A'),
                    pet_details.get('Condition', 'N/A'),
                    pet_details.get('Body', 'N/A'),
                    pet_details.get('Color', 'N/A'),
                    pet_details.get('Location', 'N/A'),
                    pet_details.get('Posted', 'N/A'),
                    adoption_fee,
                    uploader_type,
                    uploader_name,
                    pet_status
                ])
                csvfile.flush()  # Force write to disk immediately
    except Exception as e:
        print(f"Failed to scrape pet {pet_id}: {e}")

# Main async runner
async def main():
    # Write CSV header once before scraping
    with open('pets_2001_36500.csv', 'w', newline='', encoding='utf-8') as csvfile:
        csv_writer = csv.writer(csvfile)
        csv_writer.writerow([
            "Pet ID", "Name", "Type", "Species", "Profile", "Amount", "Vaccinated", "Dewormed",
            "Spayed", "Condition", "Body", "Color", "Location", "Posted", "Price",
            "Uploader Type", "Uploader Name", "Status"
        ])

    async with aiohttp.ClientSession() as session:
        for pet_id in range(start_id, end_id + 1):
            await scrape_pet(session, pet_id)  # Sequential with delay

# Run the async function
await main()
