In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By

# Initialize the WebDriver (ensure ChromeDriver is in PATH)
driver = webdriver.Chrome()  # Make sure ChromeDriver is accessible

# Open the target website
driver.get('https://www.zolo.ca/index.php?sarea=&s=')

# Find all article elements
articles = driver.find_elements(By.CSS_SELECTOR, 'article.card-listing')

# List to hold the scraped data
properties = []

# SVG path to check for
svg_path_to_skip = 'd="M18 8h-1V6A5 5 0 007 6v2H6a2 2 0 00-2 2v10c0 1.1.9 2 2 2h12a2 2 0 002-2V10a2 2 0 00-2-2zM9 6a3 3 0 116 0v2H9V6zm9 14H6V10h12v10zm-6-3c1.1 0 2-.9 2-2s-.9-2-2-2-2 .9-2 2 .9 2 2 2z"'

# Loop through each article and extract the required information
for article in articles:
    # Check if the article contains the specified SVG path
    if svg_path_to_skip in article.get_attribute('innerHTML'):
        continue  # Skip this article if the path is found

    # Extract data
    street_address = article.find_element(By.CSS_SELECTOR, 'span[itemprop="streetAddress"]').text.strip()
    city = article.find_element(By.CSS_SELECTOR, 'span[itemprop="addressLocality"]').text.strip()
    province = article.find_element(By.CSS_SELECTOR, 'span[itemprop="addressRegion"]').text.strip()
    image = article.find_element(By.CSS_SELECTOR, 'img.card-listing--img').get_attribute('src')
    price = article.find_element(By.CSS_SELECTOR, 'span[itemprop="price"]').text.strip()
    
    # Safely extract bedroom, bathroom, sqft, and post_duration
    try:
        bedroom = article.find_elements(By.CSS_SELECTOR, 'li')[1].text.strip()  # 0 for bedroom
    except IndexError:
        bedroom = 'None'
    
    try:
        bathroom = article.find_elements(By.CSS_SELECTOR, 'li')[2].text.strip()  # 1 for bathroom
    except IndexError:
        bathroom = 'None'
    
    index_for_post_duration = 4
    try:
        sqft = article.find_elements(By.CSS_SELECTOR, 'li')[3].text.strip()  # 2 for sqft
        # Check if sqft contains 'sqft'
        if 'sqft' not in sqft:
            index_for_post_duration = 3
            sqft = 'None'
    except IndexError:
        sqft = 'None'
    
    try:
        post_duration = article.find_elements(By.CSS_SELECTOR, 'ul.card-listing--values li')[index_for_post_duration].text.strip()  # last element for post_duration
    except IndexError:
        post_duration = 'None'

    # Extract details link
    details_link = article.find_element(By.CSS_SELECTOR, 'a.card-listing--image-link').get_attribute('href')

    # Append the data to the list
    properties.append({
        'street_address': street_address,
        'city': city,
        'province': province,
        'image': image,
        'price': price,
        'bedroom': bedroom,
        'bathroom': bathroom,
        'sqft': sqft,
        'post_duration': post_duration,
        'details_link': details_link
    })

# Print the scraped data
for prop in properties:
    print(prop)

# Quit the driver
driver.quit()


In [None]:
import csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException

# Initialize the WebDriver (ensure ChromeDriver is in PATH)
driver = webdriver.Chrome()  # Make sure ChromeDriver is accessible

# Total number of pages to scrape
total_pages = 605

# List to hold the scraped data
properties = []

# SVG path to check for
svg_path_to_skip = 'd="M18 8h-1V6A5 5 0 007 6v2H6a2 2 0 00-2 2v10c0 1.1.9 2 2 2h12a2 2 0 002-2V10a2 2 0 00-2-2zM9 6a3 3 0 116 0v2H9V6zm9 14H6V10h12v10zm-6-3c1.1 0 2-.9 2-2s-.9-2-2-2-2 .9-2 2 .9 2 2 2z"'

# Loop through each page
for page_number in range(11, total_pages + 1):
    # Construct the URL for the current page
    url = f'https://www.zolo.ca/index.php?sarea=&s={page_number}'
    driver.get(url)

    # Find all article elements
    articles = driver.find_elements(By.CSS_SELECTOR, 'article.card-listing')

    # Loop through each article and extract the required information
    for article in articles:
        # Check if the article contains the specified SVG path
        if svg_path_to_skip in article.get_attribute('innerHTML'):
            continue  # Skip this article if the path is found

        # Extract data
        
        try:
            street_address = article.find_element(By.CSS_SELECTOR, 'span[itemprop="streetAddress"]').text.strip()
        except NoSuchElementException:
            print('No addresssssssssssssssssssssssssssssssssssssssssss')
        
        city = article.find_element(By.CSS_SELECTOR, 'span[itemprop="addressLocality"]').text.strip()
        province = article.find_element(By.CSS_SELECTOR, 'span[itemprop="addressRegion"]').text.strip()
        image = article.find_element(By.CSS_SELECTOR, 'img.card-listing--img').get_attribute('src')
        price = article.find_element(By.CSS_SELECTOR, 'span[itemprop="price"]').text.strip()
        
        # Safely extract bedroom, bathroom, sqft, and post_duration
        try:
            bedroom = article.find_elements(By.CSS_SELECTOR, 'li')[1].text.strip()  # 0 for bedroom
        except IndexError:
            bedroom = 'None'
        
        try:
            bathroom = article.find_elements(By.CSS_SELECTOR, 'li')[2].text.strip()  # 1 for bathroom
        except IndexError:
            bathroom = 'None'
        
        index_for_post_duration = 4
        try:
            sqft = article.find_elements(By.CSS_SELECTOR, 'li')[3].text.strip()  # 2 for sqft
            # Check if sqft contains 'sqft'
            if 'sqft' not in sqft:
                index_for_post_duration = 3
                sqft = 'None'
        except IndexError:
            sqft = 'None'
        
        try:
            post_duration = article.find_elements(By.CSS_SELECTOR, 'ul.card-listing--values li')[index_for_post_duration].text.strip()  # last element for post_duration
        except IndexError:
            post_duration = 'None'

        # Extract details link
        details_link = article.find_element(By.CSS_SELECTOR, 'a.card-listing--image-link').get_attribute('href')

        # Append the data to the list
        properties.append({
            'street_address': street_address,
            'city': city,
            'province': province,
            'image': image,
            'price': price,
            'bedroom': bedroom,
            'bathroom': bathroom,
            'sqft': sqft,
            'post_duration': post_duration,
            'details_link': details_link
        })
        print(properties)
# Print the scraped data
csv_file_name = 'properties_data.csv'

# Write the scraped data to a CSV file
with open(csv_file_name, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=properties[0].keys())
    writer.writeheader()
    writer.writerows(properties)

print(f'Successfully saved data to {csv_file_name}')
# Quit the driver
driver.quit()


In [None]:
import csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException

# Initialize the WebDriver (ensure ChromeDriver is in PATH)
driver = webdriver.Chrome()  # Ensure ChromeDriver is accessible

# Total number of pages to scrape
total_pages = 605

# List to hold the scraped data
properties = []

# SVG path to check for
svg_path_to_skip = 'd="M18 8h-1V6A5 5 0 007 6v2H6a2 2 0 00-2 2v10c0 1.1.9 2 2 2h12a2 2 0 002-2V10a2 2 0 00-2-2zM9 6a3 3 0 116 0v2H9V6zm9 14H6V10h12v10zm-6-3c1.1 0 2-.9 2-2s-.9-2-2-2-2 .9-2 2 .9 2 2 2z"'

# Loop through each page
for page_number in range(1, total_pages + 1):
    # Construct the URL for the current page
    url = f'https://www.zolo.ca/index.php?sarea=&s={page_number}'
    driver.get(url)

    # Find all article elements
    articles = driver.find_elements(By.CSS_SELECTOR, 'article.card-listing')

    # Loop through each article and extract the required information
    for article in articles:
        # Check if the article contains the specified SVG path
        if svg_path_to_skip in article.get_attribute('innerHTML'):
            continue  # Skip this article if the path is found

        # Initialize an empty dictionary for the property data
        property_data = {}

        try:
            property_data['street_address'] = article.find_element(By.CSS_SELECTOR, 'span[itemprop="streetAddress"]').text.strip()
            property_data['city'] = article.find_element(By.CSS_SELECTOR, 'span[itemprop="addressLocality"]').text.strip()
            property_data['province'] = article.find_element(By.CSS_SELECTOR, 'span[itemprop="addressRegion"]').text.strip()
            property_data['image'] = article.find_element(By.CSS_SELECTOR, 'img.card-listing--img').get_attribute('src')
            property_data['price'] = article.find_element(By.CSS_SELECTOR, 'span[itemprop="price"]').text.strip()

            # Safely extract bedroom, bathroom, sqft, and post_duration
            try:
                property_data['bedroom'] = article.find_elements(By.CSS_SELECTOR, 'li')[1].text.strip()  # Bedroom
            except IndexError:
                property_data['bedroom'] = 'None'
            
            try:
                property_data['bathroom'] = article.find_elements(By.CSS_SELECTOR, 'li')[2].text.strip()  # Bathroom
            except IndexError:
                property_data['bathroom'] = 'None'
            
            try:
                sqft = article.find_elements(By.CSS_SELECTOR, 'li')[3].text.strip()  # Sqft
                if 'sqft' not in sqft:
                    property_data['sqft'] = 'None'
                    property_data['built_on'] = article.find_elements(By.CSS_SELECTOR, 'ul.card-listing--values li')[3].text.strip()
                else:
                    property_data['sqft'] = sqft
                    property_data['built_on'] = article.find_elements(By.CSS_SELECTOR, 'ul.card-listing--values li')[4].text.strip()
            except IndexError:
                property_data['sqft'] = 'None'
                property_data['built_on'] = 'None'

            # Extract details link
            property_data['details_link'] = article.find_element(By.CSS_SELECTOR, 'a.card-listing--image-link').get_attribute('href')

            # Append the valid property data to the list
            properties.append(property_data)

        except NoSuchElementException:
            continue  # Skip this record if any required element is not found

# Define the CSV file name
csv_file_name = 'properties_data.csv'

# Write the scraped data to a CSV file
with open(csv_file_name, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=properties[0].keys())
    writer.writeheader()
    writer.writerows(properties)

print(f'Successfully saved data to {csv_file_name}')

# Quit the driver
driver.quit()


In [5]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Load the CSV file
properties_df = pd.read_csv('properties_data.csv')

# Set up Selenium WebDriver
driver = webdriver.Chrome()  # Ensure you have the appropriate driver installed

# List to store scraped data
scraped_data = []

# Loop through each row in the DataFrame
for index, row in properties_df.iterrows():
    details_link = row['details_link']
    driver.get(details_link)

    # Use explicit wait for address element to load
    try:
        # Wait for the address element to be present
        address = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, 'h1.address'))
        ).text
        
        # Scrape the price
        price = driver.find_element(By.CSS_SELECTOR, 'section.listing-price .xs-text-2').text.strip()

        # Scrape the number of bedrooms
        bedrooms = driver.find_element(By.CSS_SELECTOR, 'li.tile-data:nth-child(1) .priv').text.strip()

        # Scrape the number of bathrooms
        bathrooms = driver.find_element(By.CSS_SELECTOR, 'li.tile-data:nth-child(2) .priv').text.strip()

        # Scrape the square footage
        sqft = driver.find_element(By.CSS_SELECTOR, 'li.tile-data:nth-child(3) .priv').text.strip()

        # Scrape the posting date
        posting_date = driver.find_element(By.CSS_SELECTOR, 'div.xs-mt3.sm-mt0 dl.column:nth-of-type(1) .column-value .priv').text.strip()
        walk_score = 'None'
        
        # Initialize property details
        property_details = {
            "Type": 'None',
            "Style": 'None',
            "Lot Size": 'None',
            "Age": 'None',
            "Year Built": 'None',
            "Pets": 'None',
            "Taxes": 'None',
            "Maintenance Fees": 'None'
        }

        detail_elements = driver.find_elements(By.CSS_SELECTOR, 'div dl.column')
        for detail in detail_elements:
            label = detail.find_element(By.CSS_SELECTOR, '.column-label').text.strip()
            
            if "Walk Score" in label:
                walk_score = detail.find_element(By.CSS_SELECTOR, '.column-value').text.strip()
            else:
                try:
                    value = detail.find_element(By.CSS_SELECTOR, '.column-value span').text.strip()
                except Exception:
                    value = 'N/A'  # Default value if span is not found
                
                # Assign values based on the label
                if label in property_details:
                    property_details[label] = value

        # Append the data to the list
        scraped_data.append({
            'details_link': details_link,
            'posted': posting_date,
            'Walk Score': walk_score,
            **property_details
        })

    except Exception as e:
        print(f"Error scraping {details_link}: {e}")

# Close the browser
driver.quit()

# Convert the scraped data to a DataFrame
scraped_df = pd.DataFrame(scraped_data)

# Merge with the original properties DataFrame based on 'details_link'
merged_df = properties_df.merge(scraped_df, on='details_link', how='left')

# Save the merged DataFrame to CSV
merged_df.to_csv('merged_properties_data.csv', index=False)


Error scraping https://www.zolo.ca/pelham-real-estate/17-port-robinson-road: Message: no such element: Unable to locate element: {"method":"css selector","selector":"li.tile-data:nth-child(1) .priv"}
  (Session info: chrome=130.0.6723.70); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
	GetHandleVerifier [0x00007FF60A4F3AB5+28005]
	(No symbol) [0x00007FF60A4583B0]
	(No symbol) [0x00007FF60A2F580A]
	(No symbol) [0x00007FF60A345A3E]
	(No symbol) [0x00007FF60A345D2C]
	(No symbol) [0x00007FF60A38EA97]
	(No symbol) [0x00007FF60A36BA7F]
	(No symbol) [0x00007FF60A38B8B3]
	(No symbol) [0x00007FF60A36B7E3]
	(No symbol) [0x00007FF60A3375C8]
	(No symbol) [0x00007FF60A338731]
	GetHandleVerifier [0x00007FF60A7E643D+3118829]
	GetHandleVerifier [0x00007FF60A836C90+3448640]
	GetHandleVerifier [0x00007FF60A82CF0D+3408317]
	GetHandleVerifier [0x00007FF60A5BA40B+841403]
	(No symbol) [0x00007FF60A