In [89]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.webdriver import WebDriver
from selenium.webdriver.common.by import By
import time

import pandas as pd
import os

In [90]:
class FileAppender:
    """
    Appends if file existing new data to the file, otherwise creates new one.
    """

    @staticmethod
    def append_data_as_csv(data, name: str):
        """
        Converts data to DataFrame, checks does file exist and saves data.
        :param data: Data to be saved.
        :param name: Name of file without extension.
        """
        data_df = pd.DataFrame(data)

        is_file_exists = os.path.isfile(f'{name}.csv')

        data_df.to_csv(f'{name}.csv', mode='a', header=not is_file_exists, index=False)

In [91]:
class OtoDomScrapper:
    """
    Handles all operation for scrapping 'otodom.pl' website.
    """
    __webdriver: WebDriver
    __file_name = "test_data"

    def execute(self):
        try:
            self.__initialize_driver();

            for i in range(1, 100):
                self.__go_to_website(i)
                time.sleep(3)
                if i == 1:
                    self.__accept_cookies()

            self.__get_data_from_site()

        finally:
            self.__webdriver.quit()

    def __initialize_driver(self):
        """
        Initializes the webdriver.
        """
        download_service = Service()
        self.__webdriver = webdriver.Chrome(service=download_service)

    def __go_to_website(self, page: int):
        """
        Goes to the website.
        :param page: Number of page to go.
        """
        self.__webdriver.get(f"https://www.otodom.pl/pl/wyniki/sprzedaz/mieszkanie/mazowieckie/warszawa/warszawa/warszawa?viewType=listing&page={page}")

    def __accept_cookies(self):
        """
        Finds the accept button on the website.
        Clicks on the accept button to accept cookies.
        :return:
        """
        accept_button = self.__webdriver.find_element(By.ID, "onetrust-accept-btn-handler")
        accept_button.click()

    def __get_data_from_site(self):
        """
        Gets data about apartments from the website.
        """
        advertisements = self.__webdriver.find_elements(By.CSS_SELECTOR, 'article[data-cy="listing-item"]')


        for advertisement in advertisements:
            try:
                # 'Title' probably will be not necessary
                title = advertisement.find_element(By.CSS_SELECTOR, '[data-cy="listing-item-title"]').text
                price = advertisement.find_element(By.CSS_SELECTOR, '.css-2bt9f1').text
                localization = advertisement.find_element(By.CSS_SELECTOR, '.css-42r2ms').text
                link = advertisement.find_element(By.CSS_SELECTOR, 'a[data-cy="listing-item-link"]').get_attribute('href')

                # Gets details about rooms, area, floor
                details = advertisement.find_elements(By.CSS_SELECTOR, '.css-12dsp7a dt')
                value = advertisement.find_elements(By.CSS_SELECTOR, '.css-12dsp7a dd')

                details_dict = {details.text: value.text for details, value in zip(details, value)}

                rooms_number = details_dict.get("Liczba pokoi", "Brak informacji")
                area = details_dict.get("Powierzchnia", "Brak informacji")
                floor = details_dict.get("Piętro", "Brak informacji")

                apartment = {
                    "link": link,
                    "localization": localization,
                    "price": price,
                    "rooms_number": rooms_number,
                    "area": area,
                    "floor": floor,
                }
                print(apartment)

                # apartmentsList.append(apartment)
                FileAppender.append_data_as_csv([apartment], self.__file_name)

            except Exception as e:
                print(f"Błąd podczas przetwarzania ogłoszenia: {e}")


In [92]:
otodom_scrapper: OtoDomScrapper = OtoDomScrapper();

otodom_scrapper.execute();


{'link': 'https://www.otodom.pl/pl/oferta/klimatyczne-mieszkanie-warszawa-bielany-ID4ttQe', 'localization': 'Huta, Bielany, Warszawa, mazowieckie', 'price': '899 000 zł', 'rooms_number': '3 pokoje', 'area': '51 m²', 'floor': '6 piętro'}
{'link': 'https://www.otodom.pl/pl/oferta/2-pokoje-eco-remont-blisko-metra-ID4tVFF', 'localization': 'ul. Blacharska, Służew, Mokotów, Warszawa, mazowieckie', 'price': '675 000 zł', 'rooms_number': '2 pokoje', 'area': '32 m²', 'floor': '4 piętro'}
{'link': 'https://www.otodom.pl/pl/oferta/muranow-2-pokoje-kamienica-ID4u1xZ', 'localization': 'ul. Józefa Lewartowskiego 4, Muranów, Śródmieście, Warszawa, mazowieckie', 'price': '899 000 zł', 'rooms_number': '2 pokoje', 'area': '38 m²', 'floor': '3 piętro'}
{'link': 'https://www.otodom.pl/pl/oferta/chmielna-duo-mieszkanie-1-pok-a1-52-ID4nYB6', 'localization': 'ul. Chmielna, Mirów, Wola, Warszawa, mazowieckie', 'price': '1 262 716 zł', 'rooms_number': '1 pokój', 'area': '39.28 m²', 'floor': '5 piętro'}
{'link