import libraries

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from concurrent.futures import ThreadPoolExecutor
import time
from selenium import webdriver

In [None]:
# Set up the Selenium WebDriver (using Chrome in this example)
driver = webdriver.Chrome()  # Make sure ChromeDriver is in your PATH

In [None]:
# Base URL for the website, including the page number placeholder
BASE_URL = "https://www.immoweb.be/en/search/house/for-sale?countries=BE&page={}"


In [None]:
# Function to fetch a single page's content
def fetch_page(page_number):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36'
    }
    try:
        response = requests.get(BASE_URL.format(page_number), headers=headers, timeout=10)
        if response.status_code == 200:
            print(f"Successfully fetched page {page_number}")
            return response.content
        else:
            print(f"Failed to fetch page {page_number} - Status Code: {response.status_code}")
            return None
    except requests.exceptions.RequestException as e:
        print(f"Request failed for page {page_number} - Exception: {e}")
        return None

# Function to parse individual property listings
def parse_property(listing):
    try:
        # Extract locality
        locality_elem = listing.select_one(".card__information.card--results__information--locality")
        locality = locality_elem.get_text(strip=True) if locality_elem else None

        # Extract type of property (e.g., House, Apartment)
        type_elem = listing.select_one(".card__title-link")
        property_type = type_elem.get_text(strip=True) if type_elem else None

        # Attempt to extract price
        price_elem = listing.select_one(".card--result__price span[aria-hidden='true']")
        if price_elem:
            price = price_elem.get_text(strip=True)
        else:
            # Try alternative selector if first selector fails
            price_elem_alt = listing.select_one(".card--result__price")
            price = price_elem_alt.get_text(strip=True) if price_elem_alt else "N/A"

        # Debugging: print the full listing HTML if price extraction fails
        if price == "N/A":
            print("Price not found. Here’s the listing HTML for inspection:")
            print(listing.prettify())

        print("Found price:", price)  # Debug print to confirm extracted price
        
        # Extract number of rooms
        rooms_elem = listing.select_one(".card__information .abbreviation")
        rooms = rooms_elem.get_text(strip=True).replace("bdr.", "").strip() if rooms_elem else None

        # Extract living area
        living_area_elem = listing.select_one(".card__information .abbreviation + span")
        living_area = living_area_elem.get_text(strip=True).replace("m²", "").strip() if living_area_elem else None

        # Boolean features (convert Yes/No to 1/0)
        kitchen = 1 if listing.select_one(".kitchen-selector") else 0
        furnished = 1 if listing.select_one(".furnished-selector") else 0
        open_fire = 1 if listing.select_one(".fire-selector") else 0
        terrace = 1 if listing.select_one(".terrace-selector") else 0
        garden = 1 if listing.select_one(".garden-selector") else 0
        swimming_pool = 1 if listing.select_one(".pool-selector") else 0

        # Additional details
        terrace_area = None
        garden_area = None
        land_surface = None
        plot_surface = None
        facades = None
        state = None

        # Construct property data dictionary
        property_data = {
            "Locality": locality,
            "Type of Property": property_type,
            "Price": price,
            "Number of Rooms": rooms,
            "Living Area": living_area,
            "Fully Equipped Kitchen": kitchen,
            "Furnished": furnished,
            "Open Fire": open_fire,
            "Terrace": terrace,
            "Terrace Area": terrace_area,
            "Garden": garden,
            "Garden Area": garden_area,
            "Surface of the Land": land_surface,
            "Surface Area of the Plot": plot_surface,
            "Number of Facades": facades,
            "Swimming Pool": swimming_pool,
            "State of the Building": state
        }
        return property_data
    except Exception as e:
        print(f"Error parsing listing: {e}")
        return None

# Function to scrape a single page for all property listings
def scrape_page(page_number):
    html = fetch_page(page_number)
    if html:
        soup = BeautifulSoup(html, "html.parser")
        listings = soup.select("article.card--result")
        print(f"Found {len(listings)} listings on page {page_number}")
        return [parse_property(listing) for listing in listings if listing]
    return []

# Function to scrape multiple pages concurrently
def scrape_all_pages(start_page=1, end_page=10):
    all_properties = []
    with ThreadPoolExecutor(max_workers=10) as executor:
        futures = [executor.submit(scrape_page, page) for page in range(start_page, end_page + 1)]
        for future in futures:
            properties = future.result()
            if properties:
                all_properties.extend(properties)
    return all_properties

# Main function to initiate scraping and save data to CSV
def main():
    print("Starting data collection...")
    data = scrape_all_pages(start_page=1, end_page=10)
    data = [property for property in data if property]  # Filter out None values
    if data:
        print(f"Collected {len(data)} listings.")
    else:
        print("No listings collected.")

    # Save data to CSV
    df = pd.DataFrame(data)
    df.to_csv("real_estate_data.csv", index=False)
    print("Data saved to 'real_estate_data.csv'.")

if __name__ == "__main__":
    main()
