In [18]:
import requests
from bs4 import BeautifulSoup
import logging
from fake_useragent import UserAgent
from tqdm import tqdm
import time

def scrape_zillow_listings(url, delay=5, headers=None):
    """Scrapes Zillow listings for sale in a given URL.

    Args:
        url (str): The URL of the Zillow listings page.
        delay (int, optional): Delay between requests in seconds. Defaults to 5.
        headers (dict, optional): Custom headers to use for the request. Defaults to None.

    Returns:
        list: A list of dictionaries, each representing a scraped listing.
    """
    
    if headers is None:
        # Use fake user-agent library
        user_agent = UserAgent().random
        headers = {
            'User-Agent': user_agent,
            'Accept-Language': 'en-US,en;q=0.8',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
            'Accept-Encoding': 'gzip, deflate, br',
            'upgrade-insecure-requests': '1',
            'Referer': 'https://www.google.com/',
            'Connection': 'keep-alive',
            'Cache-Control': 'max-age=0',
            'TE': 'Trailers',
            'DNT': '1',  # Do Not Track
            'Upgrade-Insecure-Requests': '1',
            'Sec-Fetch-Site': 'none',
            'Sec-Fetch-Mode': 'navigate',
            'Sec-Fetch-User': '?1',
            'Sec-Fetch-Dest': 'document',
        }

    with requests.Session() as session:
        try:
            resp = session.get(url, headers=headers)
            resp.raise_for_status()  # Raise an exception for HTTP errors
            soup = BeautifulSoup(resp.content, 'html.parser')
            
            total_listings = len(soup.select(".StyledCard-c11n-8-85-1__sc-rmiu6p-0"))
            print(f"Found {total_listings} listings. Starting scraping...")

            with tqdm(total=total_listings, desc="Scraping listings") as pbar:
                listings = []

                for el in soup.select(".StyledCard-c11n-8-85-1__sc-rmiu6p-0"):
                    listing = {}

                    try:
                        listing["pricing"] = el.find(".bqsBln").text.strip()
                        listing["size"] = el.find(".gxlfal").text.strip()
                        listing["address"] = el.find("address").text.strip()
                        listing["listing_by"] = el.find(".StyledPropertyCardDataArea-c11n-8-85-1__sc-yipmu-0.cWiizR").text.strip()
                    except Exception as e:
                        logging.error(f"Error extracting data: {e}")

                    listings.append(listing)

                    # Update progress bar
                    pbar.update(1)

                    # Optional delay after each listing
                    time.sleep(delay)

            return listings

        except requests.exceptions.RequestException as e:
            logging.error(f"Error fetching URL: {e}")
            return []

# Example usage
url = "https://www.zillow.com/new-york-ny/buy/"
listings = scrape_zillow_listings(url)

print(listings)


Found 0 listings. Starting scraping...


Scraping listings: 0it [00:00, ?it/s]

[]



