In [2]:
import time
import csv
import json
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

In [4]:
def scrape_real_estate_data(location):
    base_url = f'https://www.realtor.com/realestateandhomes-search/Maryville_TN/show-newest-listings/sby-6{location}'  # Replace with the actual URL

    # Configure Selenium options
    chrome_options = Options()
    chrome_options.add_argument('--headless')  # Run Chrome in headless mode (no GUI)

    # Set up the Selenium WebDriver
    driver = webdriver.Chrome(options=chrome_options)

    try:
        # Load the webpage
        driver.get(base_url)

        # Wait for dynamic content to load (adjust the sleep time based on website behavior)
        time.sleep(5)

        # Get the page source after dynamic content has loaded
        page_source = driver.page_source

        # Parse the HTML content using BeautifulSoup
        soup = BeautifulSoup(page_source, 'html.parser')

        # Lists to store scraped data
        property_titles = []
        property_prices = []
        property_urls = []

        # Extract information from the HTML
        property_elements = soup.find_all('div', class_='property')  
        
        for property_element in property_elements:
            # Extract Property Title
            title = property_element.find('h2').text.strip()
            property_titles.append(title)

            # Extract Property Price
            price = property_element.find('span', class_='price').text.strip()
            property_prices.append(price)

            # Extract Property URL
            url = property_element.find('a')['href']
            property_urls.append(url)

        # Store the scraped data in a well-structured format (CSV or JSON)
        data = list(zip(property_titles, property_prices, property_urls))

        # Save to CSV
        with open('real_estate_data.csv', 'w', newline='', encoding='utf-8') as csv_file:
            csv_writer = csv.writer(csv_file)
            csv_writer.writerow(['Property Title', 'Property Price', 'Property URL'])
            csv_writer.writerows(data)

        # Save to JSON
        with open('real_estate_data.json', 'w', encoding='utf-8') as json_file:
            json.dump(data, json_file, ensure_ascii=False, indent=2)

        print("Scraping completed successfully.")
    except Exception as e:
        print(f"Error: {e}")
    finally:
        # Close the WebDriver
        driver.quit()

scrape_real_estate_data('Maryville, TN')


Scraping completed successfully.
