In [1]:
# Code by Kanwar Adnan
# I couldnt find any API or url method so I continued using the worst method i.e webdriver
# pardon me.

from bs4 import BeautifulSoup as bs4
from csv import writer
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException

In [2]:
DRIVER_PATH = "C:\Chromedriver\chromedriver.exe"

In [3]:
url = "https://www.pararius.com/english"

In [4]:
class Browser:
    def __init__(self, driver_path , url , delay = 60):
        """
        Initializes a new instance of the `Browser` class.

        Parameters:
        - driver_path (str): The path to the ChromeDriver executable.
        - url (str): The URL of the website to be scraped.
        - delay: The time to wait for the website to load before timing out.
        
        Attributes:
        - driver_path (str): The path to the ChromeDriver executable.
        - url (str): The URL of the website to be scraped.
        - options (Options): The Chrome options for the webdriver.
        - driver (webdriver): The Chrome webdriver.
        - delay (int): The time to wait for the website to load before timing out.
        - first (bool): A flag to indicate whether this is the first time the browser is being set up.
        """        
        self.driver_path = driver_path
        self.url = url
        self.options = Options()
        self.options.headless = False

        self.driver = webdriver.Chrome(
            service = Service(self.driver_path),
            options = self.options)

        self.delay = delay
        self.first = True
        
        self.setup_browser()

    def close(self):
        self.driver.close()

    def wait_for_cookies(self):
        # Trying to accept the cookies in case they ask
        try:
            element = WebDriverWait(self.driver, self.delay).until(
                EC.element_to_be_clickable(
                    (By.ID,'onetrust-accept-btn-handler')
                )
            )
        except:
            pass
        else:
            self.driver.find_element(
                By.ID,'onetrust-accept-btn-handler').click()
            self.first = False
            return True
        
        
    def setup_browser(self):
        """
        Sets up the Chrome webdriver and navigates to the website specified in the `url` attribute.
        
        Returns:
        - bool: `True` if the browser was set up successfully, `False` otherwise.
        """
        self.driver.get(self.url)
        # Waiting for the website to load as it takes upto 5seconds to load
        try:
            myElem = WebDriverWait(self.driver, self.delay).until(
                EC.element_to_be_clickable(
                    (By.CLASS_NAME, 'autocomplete__input')
                )
            )
        except TimeoutException:
            print("Loading took too much time!")
        except:
            return False
        else:
            if self.first:
                return self.wait_for_cookies()
        return True

    def get_next_link(self):
        """
        Gets the link to the next page of search results, if available.
        
        Returns:
        - str: The link to the next page of search results, or `None` if there is no next page.
        """
        
        try:
            next_page = self.driver.find_element(By.CLASS_NAME , 'pagination__link--next')
            next_link = next_page.get_property('href')
        except:
            next_link = None
        finally:
            return next_link
    
    def get_html(self, search):
        """
        Searches the website for the specified query and returns the HTML of the search results page.
        
        Parameters:
        - search (str): The query to search for.
        
        Returns:
        - str: The HTML of the search results page.
        - str: The link to the next page of search results, or `None` if there is no next page.
        """
        # if everything was fine we are going to search and return html
        self.driver.find_element(
            By.CLASS_NAME , 'autocomplete__input').clear()    
        self.driver.find_element(
            By.CLASS_NAME , 'autocomplete__input').send_keys(search)
        self.driver.find_element(
            By.NAME , 'search').click()
        html = self.driver.page_source
        next_link = self.get_next_link()
        return html , next_link

        
    def get_url(self , url):
        """
        Navigates to the specified URL and returns the HTML of the page.
        
        Parameters:
        - url (str): The URL to navigate to.
        
        Returns:
        - str: The HTML of the page at the specified URL.
        - str: The link to the next page of search results, if available, or `None` if there is no next page.
        """
        self.driver.get(url)
        next_link = self.get_next_link()
        return self.driver.page_source , next_link

In [5]:
class DataProcessor:
    def __init__(self):
        """
        Initializes a new instance of the `DataProcessor` class.
        """
        pass

    def get_data(self, section):
        """
        Processes the section of a search results page to extract information about rental listings.
        
        Parameters:
        - section (str): The HTML of the search results page.
        
        Returns:
        - list: A dictionaries, containing information about a rental listing. The dictionaries have the following keys:
            - 'title': The title of the listing.
            - 'location': The location of the listing.
            - 'price': The price of the listing.
            - 'surface': The surface area of the listing.
            - 'rooms': The number of rooms in the listing.
            - 'interior': The interior of the listing.
        """
        divs = section.findAll('div')
        temp = divs[5].text.strip().split('\n')
        info = {
            'title' : section.find('h2').text.strip(),
            'location' : divs[3].text.strip(),
            'price' : divs[4].text.strip().split(' ')[0],
            'surface' : temp[0].strip(),
            'rooms' : temp[1].split('rooms')[0].strip(),
            'interior' : temp[2].strip(),
        }
        return info

    def process_data(self, html):
        """
        Processes the HTML of a search results page to extract information about rental listings.
        
        Parameters:
        - html (str): The HTML of the search results page.
        
        Returns:
        - list: A list of dictionaries, each containing information about a rental listing. The dictionaries have the following keys:
            - 'title': The title of the listing.
            - 'location': The location of the listing.
            - 'price': The price of the listing.
            - 'surface': The surface area of the listing.
            - 'rooms': The number of rooms in the listing.
            - 'interior': The interior of the listing.
        """
        soup = bs4(html,'html.parser')
        content = soup.find('ul' , {"data-controller":"search-list"})
        sections = content.findAll('section')

        information = [self.get_data(section) for section in sections]

        return information
    
    def write_data(self , information: list):
        """
        Writes the information about rental listings to a CSV file.
        
        Parameters:
        - information (list): A list of dictionaries, each containing information about a rental listing.
        """
        with open(f'Results.csv' , 'a',newline='') as file:
            thewriter = writer(file)
            header = list(information[0].keys())
            thewriter.writerow(header)

            for info in information:
                info = list(info.values())
                thewriter.writerow(info)

In [6]:
class Fetcher:
    def __init__(self , driver_path , url , search):
        """
        Initializes a new instance of the `Fetcher` class.

        Parameters:
        - driver_path (str): The path to the ChromeDriver executable.
        - url (str): The URL of the website to be scraped.
        - search (str): The query to search for.
        
        Attributes:
        - driver_path (str): The path to the ChromeDriver executable.
        - url (str): The URL of the website to be scraped.
        - search (str): The query to search for.
        - html (str): The HTML of the current page.
        - next_url (str): The URL of the next page of search results, if available.
        - browser (Browser): An instance of the `Browser` class.
        - data_processor (DataProcessor): An instance of the `DataProcessor` class.
        """
        self.driver_path = driver_path
        self.url = url
        self.search = search
        self.html = None
        self.next_url = None
        
        self.browser = Browser(self.driver_path , self.url)
        self.data_processor = DataProcessor()

    def get_html(self , search):
        """
        Searches the website for the specified query and returns the HTML of the search results page.
        
        Parameters:
        - search (str): The query to search for.
        
        Returns:
        - str: The HTML of the search results page.
        """
        if self.url:
            self.html , self.next_url = self.browser.get_html(search)
            return self.html
    
    def get(self):
        """
        Gets the HTML of the next page of search results, if available.
        
        Returns:
        - str: The HTML of the next page of search results, or the HTML of the search results page if there is no next page.
        """
        if self.next_url:
            self.html , self.next_url = self.browser.get_url(self.next_url)
            return self.get_data()
        else:
            self.get_html(self.search)
            return self.get_data()
        
    def get_data(self):
        """
        Processes the HTML of the current page to extract information about rental listings.
        
        Returns:
        - list: A list of dictionaries, each containing information about a rental listing. The dictionaries have the following keys:
            - 'title': The title of the listing.
            - 'location': The location of the listing.
            - 'price': The price of the listing.
            - 'surface': The surface area of the listing.
            - 'rooms': The number of rooms in the listing.
            - 'interior': The interior of the listing.
        """
        if self.html:
            return self.data_processor.process_data(self.html)
    
    def write_data(self):
        """
        Writes the information about rental listings to a CSV file.
        """
        if self.html:
            data = self.get_data()
            return self.data_processor.write_data(data)

In [7]:
search = "Nederland"

In [8]:
fetcher = Fetcher(DRIVER_PATH , url , search)

In [9]:
fetcher.get()

[{'title': 'Apartment Goirkestraat',
  'location': '5046 GL Tilburg (Goirke Noord)',
  'price': '€1,260',
  'surface': '70 m²',
  'rooms': '2',
  'interior': 'Upholstered'},
 {'title': 'House St Martinusstraat',
  'location': '5615 PK Eindhoven (Oude Spoorbaan)',
  'price': '€1,650',
  'surface': '140 m²',
  'rooms': '5',
  'interior': 'Upholstered'},
 {'title': 'Apartment Gevers Deynootplein 165',
  'location': '2586 CS Den Haag (Scheveningen Badplaats)',
  'price': '€2,500',
  'surface': '173 m²',
  'rooms': '4',
  'interior': 'Upholstered or furnished'},
 {'title': 'Apartment Lage Zand 306',
  'location': '2511 GS Den Haag (Uilebomen)',
  'price': '€3,000',
  'surface': '127 m²',
  'rooms': '3',
  'interior': 'Upholstered'},
 {'title': 'Apartment Von Zesenstraat 190',
  'location': '1093 BJ Amsterdam (Dapperbuurt)',
  'price': '€1,800',
  'surface': '68 m²',
  'rooms': '2',
  'interior': 'Furnished'},
 {'title': 'Apartment Wijnhaven',
  'location': '3011 WJ Rotterdam (Stadsdriehoek)

In [10]:
# You can call fetcher.get() method infinite times to get results of next pages as well
# developer Kanwar Adnan , kanwaradnanrajput@gmail.com

In [11]:
fetcher.browser.close()