In [1129]:
import pandas as pd
import numpy as np
from random import randint
import time
import re
# Scraping through Chrome webdriver
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains

In [1130]:
# Starting URLs
centris = "https://www.centris.ca/en/properties~for-sale?view=Thumbnail"
duproprio = "https://duproprio.com/en/search/list?search=true&is_for_sale=1&with_builders=1&parent=1&pageNumber=1&sort=-published_at"

In [1152]:
class Centris:
    """
    Accessing 
    
    Attr:
    self.url - starting url for scraping process
    self.data - pandas.DataFrame object containing scraped data
    self.driver - Chrome webdriver
    self.containers - List of web-elements containing information on listings
        - eg: link to listing, price, picture, address,...
    self.links_to_listings - List of web-elements, each containing the link to a listing
    """
     
    def __init__(self, url="https://www.centris.ca/en/houses~for-sale~lac-simon/11851081?view=Summary&uc=3"): 
        self.url = url
        self.data = pd.DataFrame()
        # Path to Chromedriver
        self.DRIVER_PATH = 'C:/webdriver/chromedriver.exe'
        self.driver = None
        # Verification for new DOM
        self.old_DOM = {\
                        'title' : [],\
                        'address' : [],\
                        'price' : [],\
                        'lat' : [],\
                        'long' : [],\
                        'descriptions' : [],\
                        'neighbourhood_top' : [],\
                        'neighbourhood_mid' : [],\
                        'neighbourhood_buttom' : [],\
                        'demographics_buttons' : [],\
                    }
        
    def reset_old_DOM(self):
        self.old_DOM = {\
                        'title' : [],\
                        'address' : [],\
                        'price' : [],\
                        'lat' : [],\
                        'long' : [],\
                        'descriptions' : [],\
                        'neighbourhood_top' : [],\
                        'neighbourhood_mid' : [],\
                        'neighbourhood_buttom' : [],\
                        'demographics_buttons' : [],\
                    }

    def append_data(self, title, address, price,\
            lat, long, descriptions, neighbourhood_indicators,\
            population, demographics):
        """Appends new data to existing data frame.
        
        Args:
        title - string
        address - string 
        price - 
        lat - 
        long - 
        descriptions - 
        neighbourhood_indicators -
        population - 
        demographics - 
        """
        new_data = pd.DataFrame({\
                        'title': title,\
                        'address': address,\
                        'price': price,\
                        'lat': lat,\
                        'long': long\
                    }, index=[0])
        
        # DESCRIPTIONS
        description_table = pd.DataFrame()
#         headers_of_interest = [\
#                 "rooms", "bedrooms", "powder room", "Number of units", "Building style",\
#                 "Condominium type", "Year built", "Building area", "Lot area", "walk_score",\
#                 "Net area", "Parking (total)", "Main unit", "Potential gross revenue", "Pool"\
#                               ]
#         # Ensures consistency accross listings
#         for header in headers_of_interest:
#             if header in descriptions.keys():
#                 value = descriptions[header]
#             else:
#                 value = np.nan
#             description_table[header] = pd.Series(value)

        for key in descriptions.keys():
            header = key
            value = descriptions[header]
            description_table[header] = pd.Series(value)
        
        # POPULATION AND DEMOGRAPHICS
        new_data = pd.concat([new_data, neighbourhood_indicators, description_table,\
                             population, demographics], axis=1)
        # LOGGING --------------------------     
        #print(new_data)
        
        self.data = self.data.append(new_data, sort=False,\
                                     ignore_index=True)
        
    
    def get_data(self):
        return self.data
        
    def start_driver(self):
        """
        Starts and returns Crome webdriver. 
        The page link in the url attribute 
        is opened in headless mode.
        """
        
        # Activate headless mode for fastest response
        options = Options()
        options.add_argument("--headless")
        options.add_argument("--disable-infobars"); # disabling infobars
        options.add_argument("--disable-extensions"); # disabling extensions
        options.add_argument("--disable-gpu"); # applicable to windows os only
        options.add_argument("--disable-dev-shm-usage"); # overcome limited resource problems
        options.add_argument("--no-sandbox"); # Bypass OS security model
        options.add_argument('--start-maximized') # open Browser in maximized mode
        options.add_argument('--incognito')

        # Start driver with url
        self.driver = webdriver.Chrome(executable_path=self.DRIVER_PATH)
        self.driver.get(self.url)

    def sort_listings(self):
        """Sorts listings in webdriver from newest to oldest."""
        
        # Click drop down menu
        drop_down = self.driver.find_element_by_xpath(\
                                    "//button[@id='dropdownSort']")
        drop_down.click()
        
        # Sort by most recent listings
        sort_by = self.driver.find_element_by_xpath("//a[@data-option-value='3']")
        sort_by.click()
    
    def goto_first_page(self):
        try:
            next_page = self.driver.find_element_by_xpath(\
                                        "//li[@class='goFirst']")
            next_page.click()
        except:
            print("goFirst button not available")
    
    def next_page(self):
        try:
            next_page = self.driver.find_element_by_xpath(\
                                        "//li[@class='next']")
            next_page.click()
            pass
        except:
            time.sleep(0.5)
            # Try again after waiting 0.5 sec.
            try:
                next_page = self.driver.find_element_by_xpath(\
                                            "//li[@class='next']")
                next_page.click()
                pass
            except:
                print("Next-page button not found!")
                
    def get_page_position(self):
        '''Returns the first and last page of the current search.
        
        Returns
        tuple - (current_page, last_page), '''
        
        pages = self.driver.find_element_by_xpath(\
                                    "//li[@class='pager-current']").text.\
                                    split(" / ")
        
        current_page, last_page = (int(page.replace(",","")) for page in pages)
        
        return (current_page, last_page)
    
    def refresh_page(self):
        "Refreshes current webdriver page."
        self.driver.refresh()
        print("Page is being refreshed.")
        # Wait until page fully loaded
        time.sleep(2)
        
    def distance(origin, destination):
        """Calculates distances from latitudinal/longitudinal data using
        the haversine formula"""
        lat1, lon1 = origin
        lat2, lon2 = destination
        radius = 6371 # km
        
        #Convert from degrees to radians
        dlat = math.radians(lat2-lat1)
        dlon = math.radians(lon2-lon1)
        
        # Haversine formula
        a = math.sin(dlat/2) * math.sin(dlat/2) + math.cos(math.radians(lat1)) \
            * math.cos(math.radians(lat2)) * math.sin(dlon/2) * math.sin(dlon/2)
        c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
        d = radius * c

        return d
                
                                                 
# Instantiate class object
centris = Centris()

The following functions need to be outside of the Class. wait_for_xpath() determined the approptiate time to call get_data(). Initially, both fuctions were part of the class object. It seems that after the get_data() call, the driver does not get updated within the class. This leads in some cases to old DOM's being accessed after the browser has already switched to the next page. To circumvent this issue, elements are called outside the class and tried until accessible. This allows the entire new DOM to be loaded before get_data() is called.

In [None]:
 def wait_for_xpath(xpath: str, old_element):
        """
        Wait until elements in new DOM are accessible.
        
        Arg.
        xpath - xpath to new element 
        old_element - element at xpath from previous DOM (found in centris.old_DOM)
        
        Returns:
        current_element - the element found in the new DOM at xpath
        """
        
        centris_driver = centris.driver
        element_at_xpath = []
        
        # Ensure that the NEW rather than the previous or no DOM is active
        # Maximum wait time 10 sec.
        time_passed = 0
        while (\
            (element_at_xpath == old_element or  element_at_xpath == [])\
            and (time_passed <= 10)\
              ):
            # Wait for DOM to load
            time.sleep(0.2)
            time_passed += 0.2
            
            # Print every 2 seconds
            if time_passed%2 == 0:
                print("Waiting for new DOM...")
            
            # Attempt to load new DOM
            try: 
                element_at_xpath = centris_driver.find_elements_by_xpath(xpath)
            except: pass
        
        # After 10 seconds unlikely to load at all -> restart entire process
        if time_passed > 10:
            print("RuntimeError: element not found.")
            centris.refresh_page()
            get_data_from_centris()
            wait_for_xpath(xpath, old_element)
            
        return element_at_xpath

In [1133]:
def scrape_description(old_DOM):
    """ Requires instantiated centris object. Scrapes and returns
    description data: Year build, price, Net area, etc."""
    
    descriptions = wait_for_xpath("//div[@class='col-lg-12 description']",\
                                 old_DOM)
    #First three elements not relevant
    descriptions_list = descriptions[0].text.split("\n")[3:]
    
    #LOGGING------------------------
    #print("DESCRIPTION:", descriptions_list)
    
    # Update old_DOM dictionary with new element for next verification
    centris.old_DOM['descriptions'] = descriptions
    
    return extract_descriptions(descriptions_list)

In [1134]:
def extract_descriptions(descriptions_list):
    """Takes in data from scrape_description() and returns it 
    as a dictionary"""
    
    # The data_dict found on this part of the page is inconsistent across listings
    # The first row may contain the number of rooms, bedrooms and bathrooms without headers or may be missing
    # Following rows have heathers with associated values after a line break
    # The very last element may be a walking score without header
    # Listings without first row may supply first row information in subsequent rows with headers
    # Because of these inconsistencies, two seperate extractions need to be implemented: one for
    # first row lements (if they exist) and another for subsequent rows
    
    # Transformed data
    data_dict = {}
    # Distinguish between elements from first and subsequent rows if first row exists
    first_row = True
    # Starting point for second part of transformation
    second_row_index = 0
    
    # First Part
    while first_row == True:
        for description in descriptions_list:
            numeric = re.findall("\(*[0-9]+\)*", description) # numbers
            text = re.findall("[A-Za-z]+[A-Za-z\s\-]*", description) # text after/inbetween numbers 

            # Initial elements with numeric values correspond to first row
            if (numeric != []):
                # For each value there must be one text description
                if (len(numeric) == len(text)):
                    for description,value in zip(text, numeric):
                        # Save as column in data_dict
                        description_clean = description.replace("and", "").strip()
                        data_dict[description_clean] = value
                    second_row_index += 1 
                else:
                    print("Unequal number of first row keys and values!")
                    print("Numbers:", numeric)
                    print("Text:", text)
                    break
            else:
                first_row = False # No numeric information implies header
                break
    
    # Index range of second extraction
    # Headers are found at every second index (0,2,4,...)
    # Values are one index apart from their corresponding header (1,3,5,...)
    list_length = len(descriptions_list)
    if (list_length - second_row_index)%2 == 1: # Implies presence of element without header -> Walk Score
        walk_score_listed = True
        end_point = list_length -1
    else:
        walk_score_listed = False
        end_point = list_length
    # Indices corresponding to headers
    extraction_range = range(second_row_index, end_point, 2)
    
    #LOGGING----------------------
#     print("Second row index:", second_row_index)
#     print("Extraction range:", extraction_range)
#     print("List:", descriptions_list)
    
    # Second Part
    for header_index in extraction_range:
        # Headers as column names
        header = descriptions_list[header_index]
        # Values corresponding to headers are found at subsequent indices
        information = descriptions_list[header_index + 1] 
        data_dict[header] = information
    
    if walk_score_listed:
        data_dict["walk_score"] = descriptions_list[-1]
        #LOGGING----------------------
        #print("Walk Score:", descriptions_list[-1])
        
    #LOGGING--------------------------
#     print("Descriptions:", data_dict)
        
    return data_dict

In [1135]:
def scrape_neighbourhood(old_DOM_top, old_DOM_mid, old_DOM_buttom):
    """ Scrapes and returns a list of ratings 
    between 0-10 for a set of neighborhood indicators
    such as groceries, parks, noise, etc.)
    """
    driver = centris.driver
    
    # Extract elements from top section of scrollable list
    neighbourhood_top = wait_for_xpath(\
                            "//div[@class='ll-list ps ps--active-y']",\
                            old_DOM_top)
    # Split into indicators and ranking values
    top = [x.text for x in neighbourhood_top][0].split("\n")
    
    # LOGGING----------------------
#     print("Top neighbourhood:", top)
    
    # Extract middle section - only one element
    # Scroll and activate scrollable bar container
    scrollable_bar = driver.find_element_by_xpath(\
                                            "//div[@class='ps__thumb-y']")
    ActionChains(driver).\
        move_to_element(scrollable_bar).\
        send_keys(Keys.PAGE_DOWN).\
        click(scrollable_bar).perform()

    # Elements from buttom of scrollable list
    neighbourhood_mid = wait_for_xpath(\
                            "//div[@class='ll-list ps ps--active-y']",\
                            old_DOM_mid)
    # Split into indicators and ranking values
    
    # LOGGING----------------------
    #print("Neighbourhoud mid section:", neighbourhood_mid)
    
    middle = [x.text for x in neighbourhood_mid][0].split("\n")
    
    # Extract buttom section
    # Scroll and load remaining elements
#     scrollable_bar = driver.find_element_by_xpath(\
#                                             "//div[@class='ps__thumb-y']")
    ActionChains(driver).\
        move_to_element(scrollable_bar).\
        send_keys(Keys.PAGE_DOWN).\
        click(scrollable_bar).perform()
    
    # Elements from buttom of scrollable list
    neighbourhood_buttom = wait_for_xpath(\
                            "//div[@class='ll-list ps ps--active-y']",\
                            old_DOM_buttom)
    # Split into indicators and ranking values
    buttom = [x.text for x in neighbourhood_buttom][0].split("\n")
    
    # LOGGING----------------------
#     print("Buttom neighbourhood:", buttom)
    
    # Unite all three sections by storing tuples of indicator names and corresponding values
    united_list = []
    list_length = len(top)
    for i in range(0, list_length, 2):
        united_list.append((top[i], top[i+1]))
        united_list.append((middle[i], middle[i+1]))
        united_list.append((buttom[i], buttom[i+1]))
    
    # Create set of unique tuples
    neighbourhood_indicators = set(united_list)
    
    # LOGGING----------------------
#     print("Number of neighborhood indicators: ", len(neighbourhood_indicators))
#     print("UNITED:", united_list)
#     print("SET:", neighbourhood_indicators)
    
    # Verify size and extract information as list
    # If size unexpected, refresh page and restart process
    if len(neighbourhood_indicators) < 8:
            centris.refresh_page()
            scrape_neighbourhood(old_DOM_top, old_DOM_buttom)

    # Update old_DOM dictionary with new elements for next verification
    centris.old_DOM['neighbourhood_top'] = neighbourhood_top
    centris.old_DOM['neighbourhood_mid'] = neighbourhood_mid
    centris.old_DOM['neighbourhood_buttom'] = neighbourhood_buttom

    return extract_neighbourhood_indicators(neighbourhood_indicators)

In [1136]:
def extract_neighbourhood_indicators(indicators):
    """Takes in neighbourhood data from scrape_neighbourhood() and returns it 
    in tabular form as a DataFrame object"""
    data = pd.DataFrame()
    for indicator in indicators:
        header = indicator[0]
        value = indicator[1]
        data[header] = pd.Series(value)
    
    return data

In [1137]:
def scrape_population():
    """Scrapes and returns population summary data (density, variation etc.)"""
    population_summaries =  centris.driver.find_element_by_id('info')
    population_summaries_list = population_summaries\
                        .text.split("\n")
    
    # LOGGING-----------------------
    #print("Population:", population_summaries_list)
    
    return extract_population(population_summaries_list)

In [1138]:
def extract_population(population):
    """Takes in population data from scrape_population() and returns it 
    in tabular form as a DataFrame object"""
    
    data = pd.DataFrame()
    for info in population:
        units_removed =  info.replace("hab/km2", "").strip()
        # Numeric data
        numeric = re.findall("[0-9]+[0-9,]*", units_removed)
        numeric_clean = numeric[-1].replace(",","")

        # Text data for column names
        header = re.findall("[a-zA-Z\s]+", units_removed)
        header_clean = header[0]
        # Add numeric data to header excluding the value at index -1
        for numeric_head_data in numeric[:-1]:
            header_clean = header_clean + str(numeric_head_data) + " "

        data[header_clean] = pd.Series(numeric_clean).astype("int")
    return data

In [1171]:
def scrape_demographics(old_DOM):
    """Scrapes and return demographic data found in a clickable list"""
    
    driver = centris.driver
    # Clickable list containing demographic data
    menu = driver.find_element_by_id("menu")
    # Load menu by moving browser to it
    ActionChains(driver).\
    move_to_element(menu).perform()
    
    #Buttons to access demographics data (education, incomes, etc.)
    demographics_buttons = wait_for_xpath(\
                        "//div[@class='centrisSocioDemobutton']",\
                                                 old_DOM)

    # LOGGING------------------------
    # print("DEMO. BUTTONS:", demographics_buttons)

    # First entry on clickable demographics list (pre-selected)
    demographics = []

    # Click buttons to access next demogrpahics elements
    for button in demographics_buttons:
        try: 
            button.click()
        except: 
            print("Demographics button missing!")
            # Reattempt loading buttons
            centris.refresh_page()
            ActionChains(driver).\
            move_to_element(menu).\
            perform()
            time.sleep(2) # extra time to load
            demographics_buttons = wait_for_xpath(\
                        "//div[@class='centrisSocioDemobutton']",\
                                                 old_DOM)
            
        # Get and append data after button click
        demographic_data = driver.find_element_by_class_name(\
                         "socioDemoLabel")
        demographics.append(demographic_data.text)
    
    # Split each demographic component into separate list
    # Example: splits "Occupation" data into -> ["Owners", "35%", "Renters", "65%"]
    demographics = [demo.split("\n") for demo in demographics]
    
    #LOGGING------------------------
#     print("DEMO. DATA:", demographics)
#     print("-"*50)
    
    # Update old_DOM dictionary with new elements for next verification
    centris.old_DOM['demographics_buttons'] = demographics_buttons
    
    return extract_demographics(demographics)

In [1140]:
def extract_demographics(demographics):
    """Takes in demographic data from extract_demographics() and returns it 
    in tabular form as a DataFrame object"""
    
    data = pd.DataFrame()
    
    for demographic in demographics:
        # Remove empty stings from splitting double line breaks \n\n
        removed_empty_strings = [x for x in demographic if x != ""]
        # Format of demographic: [header, value, header, value, ...]
        header_index = range(0, len(demographic), 2)
        for i in header_index:
            header = demographic[i] + " (%)" # add units to column names
            value = demographic[i+1].replace("%", "") # remove units from values 
            data[header] = pd.Series(value).astype("int")

    return data

In [1141]:
def get_data_from_centris():
        """
        Requires instantiate Centris object. Scrapes information from the
        webdriver and appends it to the Centris object.
        """
        driver = centris.driver
        old_DOM = centris.old_DOM
        
        # Data from headers
        print("Start scraping new page...")
        title = wait_for_xpath("//span[@data-id='PageTitle']", old_DOM['title'])
        address = wait_for_xpath("//h2[@itemprop='address']", old_DOM['address'])
        price = wait_for_xpath("//span[@itemprop='price']", old_DOM['price'])
        lat = wait_for_xpath("//meta[@itemprop='latitude']", old_DOM['lat'])
        long = wait_for_xpath("//meta[@itemprop='longitude']", old_DOM['long'])
        
        # Save elements as old DOM
        centris.old_DOM['title'] = title
        centris.old_DOM['address'] = address
        centris.old_DOM['lat'] = lat
        centris.old_DOM['long'] = long
        
        # Scrape remaining elements and store in dataframe
        descriptions = scrape_description(old_DOM['descriptions'])
        neighbourhood_indicators = scrape_neighbourhood(old_DOM['neighbourhood_top'],\
                                                            old_DOM['neighbourhood_mid'],\
                                                            old_DOM['neighbourhood_buttom'])
        population = scrape_population()
        demographics = scrape_demographics(old_DOM['demographics_buttons'])
                
        # Unify data in single dataframe and append to results table
        centris.append_data(
            title[0].text,\
            address[0].text,\
            price[0].text,\
            lat[0].get_attribute("content"),\
            long[0].get_attribute("content"),\
            descriptions,\
            neighbourhood_indicators,\
            population,\
            demographics\
        )
        
        # LOGGING--------------------------
        #print("GET DATA: DESCRIPTIONS:", descriptions)
        
        # Return to top of page, to access next-page button
        body = driver.find_element_by_tag_name("body")
        body.send_keys(Keys.HOME)
#         for i in range(7):
#             body.send_keys(Keys.PAGE_UP)

## Testing

In [1155]:
# Test
centris = Centris()
start = time.time()
centris.start_driver()
centris.sort_listings()
print("Execution time:", time.time() - start)

Execution time: 11.677691459655762


Before running the next cell, search for the region(s) you want to scrape in the webdriver window.
This is not required but will substential limit run time and narrow results.

In [None]:
start = time.time() 
current_page, last_page = centris.get_page_position() 
pages_to_scrape = last_page - current_page + 1 # in case scraping is interupted
one_to_100 = range(1,100) # to print message each 1% completion

print("Scraping initiated.")
print("Total number of pages to scrape:", pages_to_scrape)
print("Estimated runtime:", round(total_pages*((9.6)/(60*60)), 2), "hours")
print("="*50)

for i in range(pages_to_scrape):
    
    print("="*50)
    print("Page:", i+1)
    time_passed = 0 # to exit while loop after 10 seconds
    
    #Refresh every 20 pages to clear memory build-up
    if (i+1)%20 == 0:
        print("Clearing memory")
        print("-"*50)
        
        # Each refresh frees some memory. Four seem to work best.
        for i in range(4):
            centris.refresh_page()
            # Extra time for last refresh
            # Ensures that DOM is fully loaded
            if i == 3:
                time.sleep(2)
            
    #Retrieve data    
    get_data_from_centris()
    
    # Short delay for chrome to respond to PAGE_UP command
    time.sleep(0.5)
    centris.next_page()
    
    # Percent completed of scraping 
    percent_complete = round(100*((i)/total_pages),2)      
    # Print after every 1% mark
    if percent_complete in one_to_100:
        execution_time = (time.time() - start)/(i+1) # seconds per page
        print(percent_complete, "%", "completed")
        print("Average execution time per page:", round(execution_time, 2), "sec.")
        print("Estimated remaining runtime:", round(\
                                (total_pages - (i+1))\
                                *(execution_time\
                                /(60*60)), 1\
                                                   ), "hours <", "-"*50)
        print("="*50)


print("Total runtime:", execution_time/(60*60), "hours")
centris.data

Scraping initiated.
Total number of pages to scrape: 4531
Estimated runtime: 13.3 hours
Page: 1
Start scraping new page...
Page: 2
Start scraping new page...
Page: 3
Start scraping new page...
Page: 4
Start scraping new page...
Page: 5
Start scraping new page...
Page: 6
Start scraping new page...
Page: 7
Start scraping new page...
Page: 8
Start scraping new page...
Page: 9
Start scraping new page...
Page: 10
Start scraping new page...
Page: 11
Start scraping new page...
Page: 12
Start scraping new page...
Page: 13
Start scraping new page...
Page: 14
Start scraping new page...
Page: 15
Start scraping new page...
Page: 16
Start scraping new page...
Page: 17
Start scraping new page...
Page: 18
Start scraping new page...
Page: 19
Start scraping new page...
Page: 20
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 21
Start scraping new page...
Page: 22
Start scraping new page...
Page: 23
Start scraping new page...
Page: 24
Start scraping ne

Page: 91
Start scraping new page...
Page: 92
Start scraping new page...
Page: 93
Start scraping new page...
Page: 94
Start scraping new page...
Page: 95
Start scraping new page...
Page: 96
Start scraping new page...
Page: 97
Start scraping new page...
Page: 98
Start scraping new page...
Page: 99
Start scraping new page...
Page: 100
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 101
Start scraping new page...
Page: 102
Start scraping new page...
Page: 103
Start scraping new page...
Page: 104
Start scraping new page...
Page: 105
Start scraping new page...
Page: 106
Start scraping new page...
Page: 107
Start scraping new page...
Page: 108
Start scraping new page...
Page: 109
Start scraping new page...
Page: 110
Start scraping new page...
2.0 % completed
Average execution time per page: 8.73 sec.
Estimated remaining runtime: 12.9 hours < --------------------------------------------------
Page: 111
Start scraping new page...
Page: 112
Sta

Page: 177
Start scraping new page...
Page: 178
Start scraping new page...
Page: 179
Start scraping new page...
Page: 180
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 181
Start scraping new page...
Page: 182
Start scraping new page...
Page: 183
Start scraping new page...
Page: 184
Start scraping new page...
Page: 185
Start scraping new page...
Page: 186
Start scraping new page...
Page: 187
Start scraping new page...
Page: 188
Start scraping new page...
Page: 189
Start scraping new page...
Page: 190
Start scraping new page...
Page: 191
Start scraping new page...
Page: 192
Start scraping new page...
Page: 193
Start scraping new page...
Page: 194
Start scraping new page...
Page: 195
Start scraping new page...
Page: 196
Start scraping new page...
Page: 197
Start scraping new page...
Page: 198
Start scraping new page...
Page: 199
Start scraping new page...
Page: 200
Refreshing page
--------------------------------------------------
Start

Page: 267
Start scraping new page...
Page: 268
Start scraping new page...
Page: 269
Start scraping new page...
Page: 270
Start scraping new page...
Page: 271
Start scraping new page...
Page: 272
Start scraping new page...
Page: 273
Start scraping new page...
5.0 % completed
Average execution time per page: 8.89 sec.
Estimated remaining runtime: 12.8 hours < --------------------------------------------------
Page: 274
Start scraping new page...
Page: 275
Start scraping new page...
Page: 276
Start scraping new page...
Page: 277
Start scraping new page...
Page: 278
Start scraping new page...
Page: 279
Start scraping new page...
Page: 280
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 281
Start scraping new page...
Page: 282
Start scraping new page...
Page: 283
Start scraping new page...
Page: 284
Start scraping new page...
Page: 285
Start scraping new page...
Page: 286
Start scraping new page...
Page: 287
Start scraping new page...
Page

Page: 355
Start scraping new page...
Page: 356
Start scraping new page...
Page: 357
Start scraping new page...
Page: 358
Start scraping new page...
Page: 359
Start scraping new page...
Page: 360
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 361
Start scraping new page...
Page: 362
Start scraping new page...
Page: 363
Start scraping new page...
Page: 364
Start scraping new page...
Page: 365
Start scraping new page...
Page: 366
Start scraping new page...
Page: 367
Start scraping new page...
Page: 368
Start scraping new page...
Page: 369
Start scraping new page...
Page: 370
Start scraping new page...
Page: 371
Start scraping new page...
Page: 372
Start scraping new page...
Page: 373
Start scraping new page...
Page: 374
Start scraping new page...
Page: 375
Start scraping new page...
Page: 376
Start scraping new page...
Page: 377
Start scraping new page...
Page: 378
Start scraping new page...
Page: 379
Start scraping new page...
Page: 38

Start scraping new page...
Page: 441
Start scraping new page...
Page: 442
Start scraping new page...
Page: 443
Start scraping new page...
Page: 444
Start scraping new page...
Page: 445
Start scraping new page...
Page: 446
Start scraping new page...
Page: 447
Start scraping new page...
Page: 448
Start scraping new page...
Page: 449
Start scraping new page...
Page: 450
Start scraping new page...
Page: 451
Start scraping new page...
Page: 452
Start scraping new page...
Page: 453
Start scraping new page...
Page: 454
Start scraping new page...
Page: 455
Start scraping new page...
Page: 456
Start scraping new page...
Page: 457
Start scraping new page...
Page: 458
Start scraping new page...
Page: 459
Start scraping new page...
Page: 460
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 461
Start scraping new page...
Page: 462
Start scraping new page...
Page: 463
Start scraping new page...
Page: 464
Start scraping new page...
Page: 465
Start sc

Page: 531
Start scraping new page...
Page: 532
Start scraping new page...
Page: 533
Start scraping new page...
Page: 534
Start scraping new page...
Page: 535
Start scraping new page...
Page: 536
Start scraping new page...
Page: 537
Start scraping new page...
Page: 538
Start scraping new page...
Page: 539
Start scraping new page...
Page: 540
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 541
Start scraping new page...
Page: 542
Start scraping new page...
Page: 543
Start scraping new page...
Page: 544
Start scraping new page...
Page: 545
Start scraping new page...
10.0 % completed
Average execution time per page: 9.04 sec.
Estimated remaining runtime: 12.3 hours < --------------------------------------------------
Page: 546
Start scraping new page...
Page: 547
Start scraping new page...
Page: 548
Start scraping new page...
Page: 549
Start scraping new page...
Page: 550
Start scraping new page...
Page: 551
Start scraping new page...
Pag

Page: 619
Start scraping new page...
Page: 620
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 621
Start scraping new page...
Page: 622
Start scraping new page...
Page: 623
Start scraping new page...
Page: 624
Start scraping new page...
Page: 625
Start scraping new page...
Page: 626
Start scraping new page...
Page: 627
Start scraping new page...
Page: 628
Start scraping new page...
Page: 629
Start scraping new page...
Page: 630
Start scraping new page...
Page: 631
Start scraping new page...
Page: 632
Start scraping new page...
Page: 633
Start scraping new page...
Page: 634
Start scraping new page...
Page: 635
Start scraping new page...
Page: 636
Start scraping new page...
Page: 637
Start scraping new page...
Page: 638
Start scraping new page...
Page: 639
Start scraping new page...
Page: 640
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 641
Start scraping new page...
Page: 642
Start

13.0 % completed
Average execution time per page: 9.59 sec.
Estimated remaining runtime: 12.6 hours < --------------------------------------------------
Page: 709
Start scraping new page...
Page: 710
Start scraping new page...
Page: 711
Start scraping new page...
Page: 712
Start scraping new page...
Page: 713
Start scraping new page...
Page: 714
Start scraping new page...
Page: 715
Start scraping new page...
Page: 716
Start scraping new page...
Page: 717
Start scraping new page...
Page: 718
Start scraping new page...
Page: 719
Start scraping new page...
Page: 720
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 721
Start scraping new page...
Page: 722
Start scraping new page...
Page: 723
Start scraping new page...
Page: 724
Start scraping new page...
Page: 725
Start scraping new page...
Page: 726
Start scraping new page...
Page: 727
Start scraping new page...
Page: 728
Start scraping new page...
Page: 729
Start scraping new page...
Pag

Page: 797
Start scraping new page...
Page: 798
Start scraping new page...
Page: 799
Start scraping new page...
Page: 800
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 801
Start scraping new page...
Page: 802
Start scraping new page...
Page: 803
Start scraping new page...
Page: 804
Start scraping new page...
Page: 805
Start scraping new page...
Page: 806
Start scraping new page...
Page: 807
Start scraping new page...
Page: 808
Start scraping new page...
Page: 809
Start scraping new page...
Page: 810
Start scraping new page...
Page: 811
Start scraping new page...
Page: 812
Start scraping new page...
Page: 813
Start scraping new page...
Page: 814
Start scraping new page...
Page: 815
Start scraping new page...
Page: 816
Start scraping new page...
Page: 817
Start scraping new page...
15.0 % completed
Average execution time per page: 9.89 sec.
Estimated remaining runtime: 12.7 hours < --------------------------------------------------
Pag

Page: 882
Start scraping new page...
Page: 883
Start scraping new page...
Page: 884
Start scraping new page...
Page: 885
Start scraping new page...
Page: 886
Start scraping new page...
Page: 887
Start scraping new page...
Page: 888
Start scraping new page...
Page: 889
Start scraping new page...
Page: 890
Start scraping new page...
Page: 891
Start scraping new page...
Page: 892
Start scraping new page...
Page: 893
Start scraping new page...
Page: 894
Start scraping new page...
Page: 895
Start scraping new page...
Page: 896
Start scraping new page...
Page: 897
Start scraping new page...
Page: 898
Start scraping new page...
Page: 899
Start scraping new page...
Page: 900
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 901
Start scraping new page...
Page: 902
Start scraping new page...
Page: 903
Start scraping new page...
Page: 904
Start scraping new page...
Page: 905
Start scraping new page...
Page: 906
Start scraping new page...
Page: 90

Page: 973
Start scraping new page...
Page: 974
Start scraping new page...
Page: 975
Start scraping new page...
Page: 976
Start scraping new page...
Page: 977
Start scraping new page...
Page: 978
Start scraping new page...
Page: 979
Start scraping new page...
Page: 980
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 981
Start scraping new page...
Page: 982
Start scraping new page...
Page: 983
Start scraping new page...
Page: 984
Start scraping new page...
Page: 985
Start scraping new page...
Page: 986
Start scraping new page...
Page: 987
Start scraping new page...
Page: 988
Start scraping new page...
Page: 989
Start scraping new page...
Page: 990
Start scraping new page...
Page: 991
Start scraping new page...
Page: 992
Start scraping new page...
Page: 993
Start scraping new page...
Page: 994
Start scraping new page...
Page: 995
Start scraping new page...
Page: 996
Start scraping new page...
Page: 997
Start scraping new page...
Page: 99

Page: 1062
Start scraping new page...
Page: 1063
Start scraping new page...
Page: 1064
Start scraping new page...
Page: 1065
Start scraping new page...
Page: 1066
Start scraping new page...
Page: 1067
Start scraping new page...
Page: 1068
Start scraping new page...
Page: 1069
Start scraping new page...
Page: 1070
Start scraping new page...
Page: 1071
Start scraping new page...
Page: 1072
Start scraping new page...
Page: 1073
Start scraping new page...
Page: 1074
Start scraping new page...
Page: 1075
Start scraping new page...
Page: 1076
Start scraping new page...
Page: 1077
Start scraping new page...
Page: 1078
Start scraping new page...
Page: 1079
Start scraping new page...
Page: 1080
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 1081
Start scraping new page...
Page: 1082
Start scraping new page...
Page: 1083
Start scraping new page...
Page: 1084
Start scraping new page...
Page: 1085
Start scraping new page...
Page: 1086
Start scra

Page: 1147
Start scraping new page...
Page: 1148
Start scraping new page...
Page: 1149
Start scraping new page...
Page: 1150
Start scraping new page...
Page: 1151
Start scraping new page...
Page: 1152
Start scraping new page...
Page: 1153
Start scraping new page...
Page: 1154
Start scraping new page...
Page: 1155
Start scraping new page...
Page: 1156
Start scraping new page...
Page: 1157
Start scraping new page...
Page: 1158
Start scraping new page...
Page: 1159
Start scraping new page...
Page: 1160
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 1161
Start scraping new page...
Page: 1162
Start scraping new page...
Page: 1163
Start scraping new page...
Page: 1164
Start scraping new page...
Page: 1165
Start scraping new page...
Page: 1166
Start scraping new page...
Page: 1167
Start scraping new page...
Page: 1168
Start scraping new page...
Page: 1169
Start scraping new page...
Page: 1170
Start scraping new page...
Page: 1171
Start scra

Page: 1237
Start scraping new page...
Page: 1238
Start scraping new page...
Page: 1239
Start scraping new page...
Page: 1240
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 1241
Start scraping new page...
Page: 1242
Start scraping new page...
Page: 1243
Start scraping new page...
Page: 1244
Start scraping new page...
Page: 1245
Start scraping new page...
Page: 1246
Start scraping new page...
Page: 1247
Start scraping new page...
Page: 1248
Start scraping new page...
Page: 1249
Start scraping new page...
Page: 1250
Start scraping new page...
Page: 1251
Start scraping new page...
Page: 1252
Start scraping new page...
23.0 % completed
Average execution time per page: 9.71 sec.
Estimated remaining runtime: 11.3 hours < --------------------------------------------------
Page: 1253
Start scraping new page...
Page: 1254
Start scraping new page...
Page: 1255
Start scraping new page...
Page: 1256
Start scraping new page...
Page: 1257
Start scr

Page: 1323
Start scraping new page...
Page: 1324
Start scraping new page...
Page: 1325
Start scraping new page...
Page: 1326
Start scraping new page...
Page: 1327
Start scraping new page...
Page: 1328
Start scraping new page...
Page: 1329
Start scraping new page...
Page: 1330
Start scraping new page...
Page: 1331
Start scraping new page...
Page: 1332
Start scraping new page...
Page: 1333
Start scraping new page...
Page: 1334
Start scraping new page...
Page: 1335
Start scraping new page...
Page: 1336
Start scraping new page...
Page: 1337
Start scraping new page...
Page: 1338
Start scraping new page...
Page: 1339
Start scraping new page...
Page: 1340
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 1341
Start scraping new page...
Page: 1342
Start scraping new page...
Page: 1343
Start scraping new page...
Page: 1344
Start scraping new page...
Page: 1345
Start scraping new page...
Page: 1346
Start scraping new page...
Page: 1347
Start scra

Page: 1410
Start scraping new page...
Page: 1411
Start scraping new page...
Page: 1412
Start scraping new page...
Page: 1413
Start scraping new page...
Page: 1414
Start scraping new page...
Page: 1415
Start scraping new page...
26.0 % completed
Average execution time per page: 9.67 sec.
Estimated remaining runtime: 10.8 hours < --------------------------------------------------
Page: 1416
Start scraping new page...
Page: 1417
Start scraping new page...
Page: 1418
Start scraping new page...
Page: 1419
Start scraping new page...
Page: 1420
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 1421
Start scraping new page...
Page: 1422
Start scraping new page...
Page: 1423
Start scraping new page...
Page: 1424
Start scraping new page...
Page: 1425
Start scraping new page...
Page: 1426
Start scraping new page...
Page: 1427
Start scraping new page...
Page: 1428
Start scraping new page...
Page: 1429
Start scraping new page...
Page: 1430
Start scr

Page: 1497
Start scraping new page...
Page: 1498
Start scraping new page...
Page: 1499
Start scraping new page...
Page: 1500
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 1501
Start scraping new page...
Page: 1502
Start scraping new page...
Page: 1503
Start scraping new page...
Page: 1504
Start scraping new page...
Page: 1505
Start scraping new page...
Page: 1506
Start scraping new page...
Page: 1507
Start scraping new page...
Page: 1508
Start scraping new page...
Page: 1509
Start scraping new page...
Page: 1510
Start scraping new page...
Page: 1511
Start scraping new page...
Page: 1512
Start scraping new page...
Page: 1513
Start scraping new page...
Page: 1514
Start scraping new page...
Page: 1515
Start scraping new page...
Page: 1516
Start scraping new page...
Page: 1517
Start scraping new page...
Page: 1518
Start scraping new page...
Page: 1519
Start scraping new page...
Page: 1520
Refreshing page
--------------------------------

Page: 1583
Start scraping new page...
Page: 1584
Start scraping new page...
Page: 1585
Start scraping new page...
Page: 1586
Start scraping new page...
Page: 1587
Start scraping new page...
Page: 1588
Start scraping new page...
Page: 1589
Start scraping new page...
Page: 1590
Start scraping new page...
Page: 1591
Start scraping new page...
Page: 1592
Start scraping new page...
Page: 1593
Start scraping new page...
Page: 1594
Start scraping new page...
Page: 1595
Start scraping new page...
Page: 1596
Start scraping new page...
Page: 1597
Start scraping new page...
Page: 1598
Start scraping new page...
Page: 1599
Start scraping new page...
Page: 1600
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 1601
Start scraping new page...
Page: 1602
Start scraping new page...
Page: 1603
Start scraping new page...
Page: 1604
Start scraping new page...
Page: 1605
Start scraping new page...
Page: 1606
Start scraping new page...
Page: 1607
Start scra

Page: 1673
Start scraping new page...
Page: 1674
Start scraping new page...
Page: 1675
Start scraping new page...
Page: 1676
Start scraping new page...
Page: 1677
Start scraping new page...
Page: 1678
Start scraping new page...
Page: 1679
Start scraping new page...
Page: 1680
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 1681
Start scraping new page...
Page: 1682
Start scraping new page...
Page: 1683
Start scraping new page...
Page: 1684
Start scraping new page...
Page: 1685
Start scraping new page...
Page: 1686
Start scraping new page...
Page: 1687
Start scraping new page...
31.0 % completed
Average execution time per page: 9.71 sec.
Estimated remaining runtime: 10.1 hours < --------------------------------------------------
Page: 1688
Start scraping new page...
Page: 1689
Start scraping new page...
Page: 1690
Start scraping new page...
Page: 1691
Start scraping new page...
Page: 1692
Start scraping new page...
Page: 1693
Start scr

Page: 1760
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 1761
Start scraping new page...
Page: 1762
Start scraping new page...
Page: 1763
Start scraping new page...
Page: 1764
Start scraping new page...
Page: 1765
Start scraping new page...
Page: 1766
Start scraping new page...
Page: 1767
Start scraping new page...
Page: 1768
Start scraping new page...
Page: 1769
Start scraping new page...
Page: 1770
Start scraping new page...
Page: 1771
Start scraping new page...
Page: 1772
Start scraping new page...
Page: 1773
Start scraping new page...
Page: 1774
Start scraping new page...
Page: 1775
Start scraping new page...
Page: 1776
Start scraping new page...
Page: 1777
Start scraping new page...
Page: 1778
Start scraping new page...
Page: 1779
Start scraping new page...
Page: 1780
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 1781
Start scraping new page...
Page: 1782
Start scraping new 

Page: 1847
Start scraping new page...
Page: 1848
Start scraping new page...
Page: 1849
Start scraping new page...
Page: 1850
Start scraping new page...
34.0 % completed
Average execution time per page: 9.71 sec.
Estimated remaining runtime: 9.7 hours < --------------------------------------------------
Page: 1851
Start scraping new page...
Page: 1852
Start scraping new page...
Page: 1853
Start scraping new page...
Page: 1854
Start scraping new page...
Page: 1855
Start scraping new page...
Page: 1856
Start scraping new page...
Page: 1857
Start scraping new page...
Page: 1858
Start scraping new page...
Page: 1859
Start scraping new page...
Page: 1860
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 1861
Start scraping new page...
Page: 1862
Start scraping new page...
Page: 1863
Start scraping new page...
Page: 1864
Start scraping new page...
Page: 1865
Start scraping new page...
Page: 1866
Start scraping new page...
Page: 1867
Start scra

Page: 1934
Start scraping new page...
Page: 1935
Start scraping new page...
Page: 1936
Start scraping new page...
Page: 1937
Start scraping new page...
Page: 1938
Start scraping new page...
Page: 1939
Start scraping new page...
Page: 1940
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 1941
Start scraping new page...
Page: 1942
Start scraping new page...
Page: 1943
Start scraping new page...
Page: 1944
Start scraping new page...
Page: 1945
Start scraping new page...
Page: 1946
Start scraping new page...
Page: 1947
Start scraping new page...
Page: 1948
Start scraping new page...
Page: 1949
Start scraping new page...
Page: 1950
Start scraping new page...
Page: 1951
Start scraping new page...
Page: 1952
Start scraping new page...
Page: 1953
Start scraping new page...
Page: 1954
Start scraping new page...
Page: 1955
Start scraping new page...
Page: 1956
Start scraping new page...
Page: 1957
Start scraping new page...
Page: 1958
Start scra

Start scraping new page...
Page: 2021
Start scraping new page...
Page: 2022
Start scraping new page...
Page: 2023
Start scraping new page...
Page: 2024
Start scraping new page...
Page: 2025
Start scraping new page...
Page: 2026
Start scraping new page...
Page: 2027
Start scraping new page...
Page: 2028
Start scraping new page...
Page: 2029
Start scraping new page...
Page: 2030
Start scraping new page...
Page: 2031
Start scraping new page...
Page: 2032
Start scraping new page...
Page: 2033
Start scraping new page...
Page: 2034
Start scraping new page...
Page: 2035
Start scraping new page...
Page: 2036
Start scraping new page...
Page: 2037
Start scraping new page...
Page: 2038
Start scraping new page...
Page: 2039
Start scraping new page...
Page: 2040
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 2041
Start scraping new page...
Page: 2042
Start scraping new page...
Page: 2043
Start scraping new page...
Page: 2044
Start scraping new pa

Page: 2108
Start scraping new page...
Page: 2109
Start scraping new page...
Page: 2110
Start scraping new page...
Page: 2111
Start scraping new page...
Page: 2112
Start scraping new page...
Page: 2113
Start scraping new page...
Page: 2114
Start scraping new page...
Page: 2115
Start scraping new page...
Page: 2116
Start scraping new page...
Page: 2117
Start scraping new page...
Page: 2118
Start scraping new page...
Page: 2119
Start scraping new page...
Page: 2120
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 2121
Start scraping new page...
Page: 2122
Start scraping new page...
39.0 % completed
Average execution time per page: 9.72 sec.
Estimated remaining runtime: 9.0 hours < --------------------------------------------------
Page: 2123
Start scraping new page...
Page: 2124
Start scraping new page...
Page: 2125
Start scraping new page...
Page: 2126
Start scraping new page...
Page: 2127
Start scraping new page...
Page: 2128
Start scra

Page: 2195
Start scraping new page...
Page: 2196
Start scraping new page...
Page: 2197
Start scraping new page...
Page: 2198
Start scraping new page...
Page: 2199
Start scraping new page...
Page: 2200
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 2201
Start scraping new page...
Page: 2202
Start scraping new page...
Page: 2203
Start scraping new page...
Page: 2204
Start scraping new page...
Page: 2205
Start scraping new page...
Page: 2206
Start scraping new page...
Page: 2207
Start scraping new page...
Page: 2208
Start scraping new page...
Page: 2209
Start scraping new page...
Page: 2210
Start scraping new page...
Page: 2211
Start scraping new page...
Page: 2212
Start scraping new page...
Page: 2213
Start scraping new page...
Page: 2214
Start scraping new page...
Page: 2215
Start scraping new page...
Page: 2216
Start scraping new page...
Page: 2217
Start scraping new page...
Page: 2218
Start scraping new page...
Page: 2219
Start scra

Page: 2282
Start scraping new page...
Page: 2283
Start scraping new page...
Page: 2284
Start scraping new page...
Page: 2285
Start scraping new page...
Page: 2286
Start scraping new page...
Page: 2287
Start scraping new page...
Page: 2288
Start scraping new page...
Page: 2289
Start scraping new page...
Page: 2290
Start scraping new page...
Page: 2291
Start scraping new page...
Page: 2292
Start scraping new page...
Page: 2293
Start scraping new page...
Page: 2294
Start scraping new page...
Page: 2295
Start scraping new page...
Page: 2296
Start scraping new page...
Page: 2297
Start scraping new page...
Page: 2298
Start scraping new page...
Page: 2299
Start scraping new page...
Page: 2300
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 2301
Start scraping new page...
Page: 2302
Start scraping new page...
Page: 2303
Start scraping new page...
Page: 2304
Start scraping new page...
Page: 2305
Start scraping new page...
Page: 2306
Start scra

Page: 2372
Start scraping new page...
Page: 2373
Start scraping new page...
Page: 2374
Start scraping new page...
Page: 2375
Start scraping new page...
Page: 2376
Start scraping new page...
Page: 2377
Start scraping new page...
Page: 2378
Start scraping new page...
Page: 2379
Start scraping new page...
Page: 2380
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 2381
Start scraping new page...
Page: 2382
Start scraping new page...
Page: 2383
Start scraping new page...
Page: 2384
Start scraping new page...
Page: 2385
Start scraping new page...
Page: 2386
Start scraping new page...
Page: 2387
Start scraping new page...
Page: 2388
Start scraping new page...
Page: 2389
Start scraping new page...
Page: 2390
Start scraping new page...
Page: 2391
Start scraping new page...
Page: 2392
Start scraping new page...
Page: 2393
Start scraping new page...
Page: 2394
Start scraping new page...
44.0 % completed
Average execution time per page: 9.67 sec.

Page: 2459
Start scraping new page...
Page: 2460
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 2461
Start scraping new page...
Page: 2462
Start scraping new page...
Page: 2463
Start scraping new page...
Page: 2464
Start scraping new page...
Page: 2465
Start scraping new page...
Page: 2466
Start scraping new page...
Page: 2467
Start scraping new page...
Page: 2468
Start scraping new page...
Page: 2469
Start scraping new page...
Page: 2470
Start scraping new page...
Page: 2471
Start scraping new page...
Page: 2472
Start scraping new page...
Page: 2473
Start scraping new page...
Page: 2474
Start scraping new page...
Page: 2475
Start scraping new page...
Page: 2476
Start scraping new page...
Page: 2477
Start scraping new page...
Page: 2478
Start scraping new page...
Page: 2479
Start scraping new page...
Page: 2480
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 2481
Start scraping new 

Page: 2546
Start scraping new page...
Page: 2547
Start scraping new page...
Page: 2548
Start scraping new page...
Page: 2549
Start scraping new page...
Page: 2550
Start scraping new page...
Page: 2551
Start scraping new page...
Page: 2552
Start scraping new page...
Page: 2553
Start scraping new page...
Page: 2554
Start scraping new page...
Page: 2555
Start scraping new page...
Page: 2556
Start scraping new page...
Page: 2557
Start scraping new page...
Page: 2558
Start scraping new page...
Page: 2559
Start scraping new page...
Page: 2560
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 2561
Start scraping new page...
Page: 2562
Start scraping new page...
Page: 2563
Start scraping new page...
Page: 2564
Start scraping new page...
Page: 2565
Start scraping new page...
Page: 2566
Start scraping new page...
Page: 2567
Start scraping new page...
Page: 2568
Start scraping new page...
Page: 2569
Start scraping new page...
Page: 2570
Start scra

Page: 2636
Start scraping new page...
Page: 2637
Start scraping new page...
Page: 2638
Start scraping new page...
Page: 2639
Start scraping new page...
Page: 2640
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 2641
Start scraping new page...
Page: 2642
Start scraping new page...
Page: 2643
Start scraping new page...
Page: 2644
Start scraping new page...
Page: 2645
Start scraping new page...
Page: 2646
Start scraping new page...
Page: 2647
Start scraping new page...
Page: 2648
Start scraping new page...
Page: 2649
Start scraping new page...
Page: 2650
Start scraping new page...
Page: 2651
Start scraping new page...
Page: 2652
Start scraping new page...
Page: 2653
Start scraping new page...
Page: 2654
Start scraping new page...
Page: 2655
Start scraping new page...
Page: 2656
Start scraping new page...
Page: 2657
Start scraping new page...
Page: 2658
Start scraping new page...
Page: 2659
Start scraping new page...
Page: 2660
Refreshing

Page: 2723
Start scraping new page...
Page: 2724
Start scraping new page...
Page: 2725
Start scraping new page...
Page: 2726
Start scraping new page...
Page: 2727
Start scraping new page...
Page: 2728
Start scraping new page...
Page: 2729
Start scraping new page...
Page: 2730
Start scraping new page...
Page: 2731
Start scraping new page...
Page: 2732
Start scraping new page...
Page: 2733
Start scraping new page...
Page: 2734
Start scraping new page...
Page: 2735
Start scraping new page...
Page: 2736
Start scraping new page...
Page: 2737
Start scraping new page...
Page: 2738
Start scraping new page...
Page: 2739
Start scraping new page...
Page: 2740
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 2741
Start scraping new page...
Page: 2742
Start scraping new page...
Page: 2743
Start scraping new page...
Page: 2744
Start scraping new page...
Page: 2745
Start scraping new page...
Page: 2746
Start scraping new page...
Page: 2747
Start scra

Page: 2810
Start scraping new page...
Page: 2811
Start scraping new page...
Page: 2812
Start scraping new page...
Page: 2813
Start scraping new page...
Page: 2814
Start scraping new page...
Page: 2815
Start scraping new page...
Page: 2816
Start scraping new page...
Page: 2817
Start scraping new page...
Page: 2818
Start scraping new page...
Page: 2819
Start scraping new page...
Page: 2820
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 2821
Start scraping new page...
Page: 2822
Start scraping new page...
Page: 2823
Start scraping new page...
Page: 2824
Start scraping new page...
Page: 2825
Start scraping new page...
Page: 2826
Start scraping new page...
Page: 2827
Start scraping new page...
Page: 2828
Start scraping new page...
Page: 2829
Start scraping new page...
Page: 2830
Start scraping new page...
Page: 2831
Start scraping new page...
Page: 2832
Start scraping new page...
Page: 2833
Start scraping new page...
Page: 2834
Start scra

Page: 2900
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 2901
Start scraping new page...
Page: 2902
Start scraping new page...
Page: 2903
Start scraping new page...
Page: 2904
Start scraping new page...
Page: 2905
Start scraping new page...
Page: 2906
Start scraping new page...
Page: 2907
Start scraping new page...
Page: 2908
Start scraping new page...
Page: 2909
Start scraping new page...
Page: 2910
Start scraping new page...
Page: 2911
Start scraping new page...
Page: 2912
Start scraping new page...
Page: 2913
Start scraping new page...
Page: 2914
Start scraping new page...
Page: 2915
Start scraping new page...
Page: 2916
Start scraping new page...
Page: 2917
Start scraping new page...
Page: 2918
Start scraping new page...
Page: 2919
Start scraping new page...
Page: 2920
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 2921
Start scraping new page...
Page: 2922
Start scraping new 

Page: 2987
Start scraping new page...
Page: 2988
Start scraping new page...
Page: 2989
Start scraping new page...
Page: 2990
Start scraping new page...
Page: 2991
Start scraping new page...
Page: 2992
Start scraping new page...
Page: 2993
Start scraping new page...
Page: 2994
Start scraping new page...
Page: 2995
Start scraping new page...
Page: 2996
Start scraping new page...
Page: 2997
Start scraping new page...
Page: 2998
Start scraping new page...
Page: 2999
Start scraping new page...
Page: 3000
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 3001
Start scraping new page...
Page: 3002
Start scraping new page...
Page: 3003
Start scraping new page...
Page: 3004
Start scraping new page...
Page: 3005
Start scraping new page...
Page: 3006
Start scraping new page...
Page: 3007
Start scraping new page...
Page: 3008
Start scraping new page...
Page: 3009
Start scraping new page...
Page: 3010
Start scraping new page...
Page: 3011
Start scra

Page: 3074
Start scraping new page...
Page: 3075
Start scraping new page...
Page: 3076
Start scraping new page...
Page: 3077
Start scraping new page...
Page: 3078
Start scraping new page...
Page: 3079
Start scraping new page...
Page: 3080
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 3081
Start scraping new page...
Page: 3082
Start scraping new page...
Page: 3083
Start scraping new page...
Page: 3084
Start scraping new page...
Page: 3085
Start scraping new page...
Page: 3086
Start scraping new page...
Page: 3087
Start scraping new page...
Page: 3088
Start scraping new page...
Page: 3089
Start scraping new page...
Page: 3090
Start scraping new page...
Page: 3091
Start scraping new page...
Page: 3092
Start scraping new page...
Page: 3093
Start scraping new page...
Page: 3094
Start scraping new page...
Page: 3095
Start scraping new page...
Page: 3096
Start scraping new page...
Page: 3097
Start scraping new page...
Page: 3098
Start scra

Start scraping new page...
Page: 3161
Start scraping new page...
Page: 3162
Start scraping new page...
Page: 3163
Start scraping new page...
Page: 3164
Start scraping new page...
Page: 3165
Start scraping new page...
Page: 3166
Start scraping new page...
Page: 3167
Start scraping new page...
Page: 3168
Start scraping new page...
Page: 3169
Start scraping new page...
Page: 3170
Start scraping new page...
Page: 3171
Start scraping new page...
Page: 3172
Start scraping new page...
Page: 3173
Start scraping new page...
Page: 3174
Start scraping new page...
Page: 3175
Start scraping new page...
Page: 3176
Start scraping new page...
Page: 3177
Start scraping new page...
Page: 3178
Start scraping new page...
Page: 3179
Start scraping new page...
Page: 3180
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 3181
Start scraping new page...
Page: 3182
Start scraping new page...
Page: 3183
Start scraping new page...
Page: 3184
Start scraping new pa

Page: 3248
Start scraping new page...
Page: 3249
Start scraping new page...
Page: 3250
Start scraping new page...
Page: 3251
Start scraping new page...
Page: 3252
Start scraping new page...
Page: 3253
Start scraping new page...
Page: 3254
Start scraping new page...
Page: 3255
Start scraping new page...
Page: 3256
Start scraping new page...
Page: 3257
Start scraping new page...
Page: 3258
Start scraping new page...
Page: 3259
Start scraping new page...
Page: 3260
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 3261
Start scraping new page...
Page: 3262
Start scraping new page...
Page: 3263
Start scraping new page...
Page: 3264
Start scraping new page...
Page: 3265
Start scraping new page...
Page: 3266
Start scraping new page...
Page: 3267
Start scraping new page...
Page: 3268
Start scraping new page...
Page: 3269
Start scraping new page...
Page: 3270
Start scraping new page...
Page: 3271
Start scraping new page...
Page: 3272
Start scra

Page: 3335
Start scraping new page...
Page: 3336
Start scraping new page...
Page: 3337
Start scraping new page...
Page: 3338
Start scraping new page...
Page: 3339
Start scraping new page...
Page: 3340
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 3341
Start scraping new page...
Page: 3342
Start scraping new page...
Page: 3343
Start scraping new page...
Page: 3344
Start scraping new page...
Page: 3345
Start scraping new page...
Page: 3346
Start scraping new page...
Page: 3347
Start scraping new page...
Page: 3348
Start scraping new page...
Page: 3349
Start scraping new page...
Page: 3350
Start scraping new page...
Page: 3351
Start scraping new page...
Page: 3352
Start scraping new page...
Page: 3353
Start scraping new page...
Page: 3354
Start scraping new page...
Page: 3355
Start scraping new page...
Page: 3356
Start scraping new page...
Page: 3357
Start scraping new page...
Page: 3358
Start scraping new page...
Page: 3359
Start scra

Page: 3422
Start scraping new page...
Page: 3423
Start scraping new page...
Page: 3424
Start scraping new page...
Page: 3425
Start scraping new page...
Page: 3426
Start scraping new page...
Page: 3427
Start scraping new page...
Page: 3428
Start scraping new page...
Page: 3429
Start scraping new page...
Page: 3430
Start scraping new page...
Page: 3431
Start scraping new page...
Page: 3432
Start scraping new page...
Page: 3433
Start scraping new page...
Page: 3434
Start scraping new page...
Page: 3435
Start scraping new page...
Page: 3436
Start scraping new page...
Page: 3437
Start scraping new page...
Page: 3438
Start scraping new page...
Page: 3439
Start scraping new page...
Page: 3440
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 3441
Start scraping new page...
Page: 3442
Start scraping new page...
Page: 3443
Start scraping new page...
Page: 3444
Start scraping new page...
Page: 3445
Start scraping new page...
Page: 3446
Start scra

Page: 3509
Start scraping new page...
Page: 3510
Start scraping new page...
Page: 3511
Start scraping new page...
Page: 3512
Start scraping new page...
Page: 3513
Start scraping new page...
Page: 3514
Start scraping new page...
Page: 3515
Start scraping new page...
Page: 3516
Start scraping new page...
Page: 3517
Start scraping new page...
Page: 3518
Start scraping new page...
Page: 3519
Start scraping new page...
Page: 3520
Refreshing page
--------------------------------------------------
Start scraping new page...
Page: 3521
Start scraping new page...
Page: 3522
Start scraping new page...
Page: 3523
Start scraping new page...


# Expedited troubleshooting

In [1039]:
centris.data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 85 entries, 0 to 84
Data columns (total 91 columns):
title                                      85 non-null object
address                                    85 non-null object
price                                      85 non-null object
lat                                        85 non-null object
long                                       85 non-null object
Car friendly                               85 non-null object
Greenery                                   85 non-null object
Cafes                                      85 non-null object
Pedestrian friendly                        85 non-null object
Shopping                                   85 non-null object
Restaurants                                85 non-null object
Quiet                                      85 non-null object
Vibrant                                    85 non-null object
Groceries                                  85 non-null object
High Schools                 

In [1040]:
execution_time

6.54182364697359

In [1026]:
centris.data

Unnamed: 0,title,address,price,lat,long,Historic,Quiet,Nightlife,High Schools,Parks,...,University (%),College (%),Secondary (high) school (%),Apprentice or trade school diploma (%),No diploma (%),Non-immigrant population (%),Immigrant population (%),French (%),English (%),Others languages (%)
0,Condo for sale,"412, Rue Saint-Claude, apt. 307, Montréal (Vil...","$995,000",45.50860925,-73.55221964,9,5,10,2,10,...,52,15,18,6,9,63,37,54,26,20


In [969]:
cols = centris.data.loc[:,["Population" in columns for columns in centris.data.columns]].columns
cols = [x for x in cols]
cols.append("title")
cols.append("address")
centris.data.loc[:,cols]

Unnamed: 0,Population 2016,Population variation between 2011 2016,"Population density 2,616",Population density 207,Population density 588,Population density 305,Population density 67,"Population density 8,741","Population density 7,181","Population density 6,414",...,"Population density 5,347","Population density 2,299","Population density 2,498","Population density 7,718","Population density 2,515","Population density 5,545",Population density 778,"Population density 2,547",title,address
0,110408,8,2.0,,,,,,,,...,,,,,,,,,House for sale,"4325, 6e Rue, Laval (Chomedey), Neighbourhood ..."
1,44390,8,,2.0,,,,,,,...,,,,,,,,,House for sale,"158, Rue du Cardinal, Sherbrooke (Brompton/Roc..."
2,85338,7,,,2.0,,,,,,...,,,,,,,,,House for sale,"6541, Rue de Vénus, Québec (La Haute-Saint-Cha..."
3,75423,5,,,,2.0,,,,,...,,,,,,,,,Cottage for sale,"2485, Chemin Hemming, Drummondville, Neighbour..."
4,49349,241,,,,,2.0,,,,...,,,,,,,,,Lot for sale,"boulevard des Hêtres, Shawinigan, Neighbourhoo..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,84234,0,,,,,,,,,...,,,,,,,,,House for sale,"11140, Avenue Lausanne, Montréal (Montréal-Nord)"
100,139590,4,,,,,,2.0,,,...,,,,,,,,,Condo for sale,"6026, Avenue De Lorimier, Montréal (Rosemont/L..."
101,78305,3,,,,,,,,,...,,,,,,,,,Condo for sale,"7980, Rue Malouin, apt. 108, Montréal (Saint-L..."
102,44489,7,,,,,,,,,...,,,2.0,,,,,,House for sale,"525, 5e Avenue, Montréal (Lachine), Neighbourh..."


In [442]:
centris.data.describe(include="all")

Unnamed: 0,title,address,price,lat,long,descriptions,neighbourhood_indicators,demographics
count,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
unique,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
top,,,,,,,,
freq,,,,,,,,


In [443]:
# # Elements from top of scrollable list
#     neighbourhood_top = wait_for_xpath(\
#                             "//div[@class='ll-list ps ps--active-y']",\
#                             old_DOM)
    
#     # DEBUGGING-----------------------------------
#     print("FIRST:", [x.text for x in centris.driver.find_elements_by_class_name("ll-header")])
#     for button in centris.driver.find_elements_by_class_name("ll-header"):
#         time.sleep(1)
#         button.click()
#     # DEBUGGING-----------------------------------
    
#     # Scroll to buttom of list to load remaining elements
#     scrollable_bar = centris.driver.find_element_by_xpath(\
#                                             "//div[@class='ps__thumb-y']")
#     scrollable_bar.send_keys(Keys.PAGE_DOWN)
    
#     # Elements from buttom of scrollable list
#     neighbourhood_buttom = wait_for_xpath(\
#                             "//div[@class='ll-list ps ps--active-y']",\
#                             old_DOM_buttom)
    
#     # LOGGING----------------------
#     print("Top neighbourhood:", [x.text for x in neighbourhood_top])
#     print("Buttom neighbourhood:", [x.text for x in neighbourhood_buttom])

In [524]:
# neighbourhood_buttom = centris.driver.find_elements_by_xpath(\
#                              "//div[@class='ll-list ps ps--active-y']")
neighbourhood_buttom = centris.driver.find_elements_by_xpath("//div[@class='ll-list']")
#last_button = neighbourhood_buttom[-1]
print("ONE:", [x.text for x in neighbourhood_buttom])
# Scroll to buttom of list to load remaining elements
scrollable_bar = centris.driver.find_element_by_xpath(\
                                        "//div[@class='ps__thumb-y']")
scrollable_bar.send_keys(Keys.PAGE_DOWN)
scrollable_bar.click()
neighbourhood_buttom = centris.driver.find_elements_by_class_name("ll-list")
#last_button = neighbourhood_buttom[-1]
neighbourhood_buttom[0].text

ONE: []


ElementClickInterceptedException: Message: element click intercepted: Element <div class="ps__thumb-y focus-visible" tabindex="0" style="top: 174px; height: 244px;" data-focus-visible-added=""></div> is not clickable at point (303, 40). Other element would receive the click: <nav class="navbar navbar-expand fixed-top">...</nav>
  (Session info: chrome=84.0.4147.105)


In [515]:
# Scroll to buttom of list to load remaining elements
scrollable_bar = centris.driver.find_element_by_xpath(\
                                        "//div[@class='ps__thumb-y']")
scrollable_bar.send_keys(Keys.PAGE_DOWN)
#scrollable_bar.click()

In [477]:
last_button.click()

In [504]:
scrollable_bar.click()

In [657]:
description = "1 bathroom and 1 powder room"
numeric = re.findall("\(*[0-9]+\)*", description)
text = re.findall("[A-Za-z]+\-*\s*[A-Za-z]*", description)
numeric

['1', '1']

In [733]:
headers_of_interest = ["rooms", "bedrooms", "powder room", "Building style",\
                               "Condominium type", "Year built", "Building area", "Lot area",\
                     "Net area", "Parking", "Main unit", "Potential gross revenue", "Pool"]

descriptions = {'rooms': '16', 'bedrooms ': '4', 'in basement': '(1', 'bathrooms': '2', 'powder room': '1',\
                'Building style': 'Two or more storey, Detached', 'Year built': '1999', 'Lot area': '13,424 sqft',\
                'Parking (total)': 'Driveway (6), Garage (3)', 'Fireplace/Stove': 'Wood stove',\
                'Additional features': 'Basement 6 feet or +'}
description_table = pd.DataFrame()
dat = pd.DataFrame()
# Ensures consistency accross listings
for header in headers_of_interest:
    if header in descriptions.keys():
        value = descriptions[header]
#         print(header, "found")
#         print(descriptions[header])
    else:
#         print(header, "NOT FOUND!!!")
        value = np.nan
    description_table[header] = pd.Series(value)
    print(header, ":", value)
    print(description_table)
    
# Concat description information
new_data = pd.concat([new_data, description_table], axis=1)

# Append new data to existing data    
dat = dat.append(new_data, ignore_index=True)

rooms : 16
  rooms
0    16
bedrooms : nan
  rooms  bedrooms
0    16       NaN
powder room : 1
  rooms  bedrooms powder room
0    16       NaN           1
Building style : Two or more storey, Detached
  rooms  bedrooms powder room                Building style
0    16       NaN           1  Two or more storey, Detached
Condominium type : nan
  rooms  bedrooms powder room                Building style  Condominium type
0    16       NaN           1  Two or more storey, Detached               NaN
Year built : 1999
  rooms  bedrooms powder room                Building style  Condominium type  \
0    16       NaN           1  Two or more storey, Detached               NaN   

  Year built  
0       1999  
Building area : nan
  rooms  bedrooms powder room                Building style  Condominium type  \
0    16       NaN           1  Two or more storey, Detached               NaN   

  Year built  Building area  
0       1999            NaN  
Lot area : 13,424 sqft
  rooms  bedrooms powder

In [840]:
indicators = {('Transit friendly', '9'), ('Restaurants', '7'), ('Nightlife', '4'), ('Vibrant', '5'), ('Car friendly', '6'), ('Elementary Schools', '9'), ('Quiet', '8'), ('High Schools', '7'), ('Shopping', '9'), ('Pedestrian friendly', '10'), ('Cycling friendly', '8'), ('Daycares', '10'), ('Groceries', '10'), ('Cafes', '6'), ('Greenery', '7'), ('Parks', '8')}
data = pd.DataFrame()
for indicator in indicators:
    header = indicator[0]
    value = indicator[1]
    data[header] = pd.Series(value)

data

Unnamed: 0,Transit friendly,Restaurants,Vibrant,Car friendly,Elementary Schools,Quiet,High Schools,Shopping,Pedestrian friendly,Cycling friendly,Daycares,Groceries,Cafes,Greenery,Nightlife,Parks
0,9,7,5,6,9,8,7,9,10,8,10,10,6,7,4,8


In [852]:
population = ['Population (2016) 136,024', 'Population variation between 2011 and 2016 3%',\
              'Population density 5,347 hab/km2', 'Unemployment rate (2016) 9%']
data = pd.DataFrame()
for info in population:
    # Numeric data
    numeric = re.findall("[0-9]+[0-9,]*", info)
    numeric_clean = numeric[-1].replace(",","")
    
    # Text data for column names
    header = re.findall("[a-zA-Z\s]+", info)
    header_clean = header[0]
    # Add numeric data to header excluding the value at index -1
    for numeric_head_data in numeric[:-1]:
        header_clean = header_clean + str(numeric_head_data) + " "
    
    data[header_clean] = pd.Series(numeric_clean).astype("int")
    
data

Unnamed: 0,Population 2016,Population variation between 2011 2016,"Population density 5,347",Unemployment rate 2016
0,136024,3,2,9


In [888]:
demographics = [['Less than $50,000', '46%', '', 'Between $50,000 and $80,000', '23%',\
                 '', 'Between $80,000 and $100,000', '10%', '', 'Between $100,000 and $150,000',\
                 '13%', '', 'More than $150,000', '8%'],\
                ['1-person households', '38%', '', '2-person households', '38%', '',\
                 '3-person households', '12%', '', '4-person households', '9%', '', '5-person or more households', '3%'],\
                ['Couples without children at home', '54%', '', 'Couples with children at home', '31%',\
                 '', 'Single-parent families', '15%'],\
                ['Owners', '69%', '', 'Renters', '31%'],\
                ['Before 1960', '22%', '', 'Between 1961 and 1980', '24%', '', 'Between 1981 and 1990', '16%', '',\
                 'Between 1991 and 2000', '11%', '', 'Between 2001 and 2010', '20%', '', 'Between 2011 and 2016', '6%'],\
                ['Single-family homes', '60%', '', 'Semi-detached or row houses', '9%', '', 'Buildings with less than 5 floors',\
                 '31%', '', 'Buildings with 5 or more floors', '0%', '', 'Mobile homes', '0%'],\
                ['University', '25%', '', 'College', '19%', '', 'Secondary (high) school', '22%', '',\
                 'Apprentice or trade school diploma', '17%', '', 'No diploma', '16%'],\
                ['Non-immigrant population', '94%', '', 'Immigrant population', '6%'],\
                ['French', '94%', '', 'English', '5%', '', 'Others languages', '1%']]

# Remove empty stings from splitting double line breaks \n\n
demographics_clean = []
data = pd.DataFrame()
for demographic in demographics:
    demographics_clean.append([x for x in demographic if x != ""])

for demographic in demographics_clean:
    header_index = range(0, len(demographic), 2)
    for i in header_index:
        header = demographic[i] + " (%)"
        value = demographic[i+1].replace("%", "")
        data[header] = pd.Series(value).astype("int")

data

Unnamed: 0,"Less than $50,000 (%)","Between $50,000 and $80,000 (%)","Between $80,000 and $100,000 (%)","Between $100,000 and $150,000 (%)","More than $150,000 (%)",1-person households (%),2-person households (%),3-person households (%),4-person households (%),5-person or more households (%),...,University (%),College (%),Secondary (high) school (%),Apprentice or trade school diploma (%),No diploma (%),Non-immigrant population (%),Immigrant population (%),French (%),English (%),Others languages (%)
0,46,23,10,13,8,38,38,12,9,3,...,25,19,22,17,16,94,6,94,5,1
