In [1]:
import pandas as pd
from bs4 import BeautifulSoup
from random import randint
import time
import re
# Scraping through chrome driver 
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

In [2]:
# Starting URLs
centris = "https://www.centris.ca/en/properties~for-sale?view=Thumbnail"
duproprio = "https://duproprio.com/en/search/list?search=true&is_for_sale=1&with_builders=1&parent=1&pageNumber=1&sort=-published_at"
# Path to Schromedriver
DRIVER_PATH = 'C:/webdriver/chromedriver.exe'

In [56]:
class Centris:
    """
    Class representing a data object that is being crawled from
    centris.ca through selenium and the Chrome webdriver.
    
    Attr:
    self.url - starting self.url for crawl on centris.ca
    """

        
    def __init__(self, url="https://www.centris.ca/en/properties~for-sale?view=Thumbnail"): 
        # Starting url
        self.url = url
        # Extracted data
        self.data = pd.DataFrame(columns=['title', 'price(CAD)', 'address'])
        # Google Chrome driver
        self.driver = None
        # List of real estate descriptions found on current page
        self.container_descriptions = []
        
    def add_data(self, title, price, address):
        new_data = pd.DataFrame({'title': title,
                                'price(CAD)': price,
                                 'address': address})
        self.data = self.data.append(new_data)
             
    def get_data(self):
        return self.data
        
    def start_driver(self):
        """
        Starts and returns Crome webdriver. The specified 'self.url' page 
        is opened in headless mode.
        """
        
        # Activate headless mode for fastest response
        options = Options()
        options.add_argument("--headless")

        # Start driver
        self.driver = webdriver.Chrome(executable_path=DRIVER_PATH)
        self.driver.get(self.url)
    
    def set_container_descriptions(self):
        try:
            self.container_descriptions = self.driver.find_elements_by_class_name("description")
        except:
            print("Chrome driver is not running!")
            print("Use: \"start_driver()\"")
    
    def get_next_page(self):
        try:
            next_page = self.driver.find_element_by_xpath("//li[@class='next']")
            next_page.click()
        except:
            print("Next page button not available")

# Instantiate class object
centris = Centris()

In [57]:
# Test
centris.start_driver()
centris.set_container_descriptions()

print(centris.container_descriptions)
print(centris.driver)

[<selenium.webdriver.remote.webelement.WebElement (session="d33c42384691d49203d2c360e1e3c1f5", element="74850f79-881e-4fd0-80ec-5b8abf8aef84")>, <selenium.webdriver.remote.webelement.WebElement (session="d33c42384691d49203d2c360e1e3c1f5", element="daa70169-bf3a-443f-bd8c-604822f32bf2")>, <selenium.webdriver.remote.webelement.WebElement (session="d33c42384691d49203d2c360e1e3c1f5", element="5e688e50-a72e-49be-8c88-fd26816b4607")>, <selenium.webdriver.remote.webelement.WebElement (session="d33c42384691d49203d2c360e1e3c1f5", element="7e44af7d-cb29-47e1-9288-c0b77566fef1")>, <selenium.webdriver.remote.webelement.WebElement (session="d33c42384691d49203d2c360e1e3c1f5", element="8b112692-3dd7-4c89-b1c6-34d2265c8130")>, <selenium.webdriver.remote.webelement.WebElement (session="d33c42384691d49203d2c360e1e3c1f5", element="9b172787-f72c-4922-bd08-6d389298392e")>, <selenium.webdriver.remote.webelement.WebElement (session="d33c42384691d49203d2c360e1e3c1f5", element="576bff0b-a93f-4de5-bb2c-82217bc3

In [4]:
def get_container_descriptions(url:str):
    driver = start_driver(url)
    container_descriptions = driver.find_elements_by_class_name("description")
    return container_descriptions

In [10]:
def get_next_page(driver):
    try:
        next_page = driver.find_element_by_xpath("//li[@class='next']")
        next_page.click()
    except:
        print("Next page button not available")

In [11]:
# Click Next button
driver = start_driver(centris)
get_next_page(driver)