In [1]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException, ElementNotInteractableException, TimeoutException
from selenium.webdriver.common.keys import Keys
import time

def get_bus_distance(homeAddress, officeAddress):
    # Initialize the WebDriver
    driver = webdriver.Chrome()  # or webdriver.Firefox(), etc.

    try:
        # Open Google Maps
        driver.get("https://www.google.ro/maps/preview/")
        
        # Function to click an element with fallback to JavaScript
        def click_element(locator):
            try:
                element = WebDriverWait(driver, 10).until(
                    EC.element_to_be_clickable(locator)
                )
                element.click()
                print("Button clicked successfully!")
            except (ElementClickInterceptedException, ElementNotInteractableException):
                driver.execute_script("arguments[0].click();", element)
                print("Button clicked using JavaScript!")
            except NoSuchElementException:
                print("Element not found.")

        # Accept the cookies button
        try:
            accept_button_locator = (By.XPATH, "//button[.//span[text()='Accept all']]")
            click_element(accept_button_locator)
        except NoSuchElementException:
            print("Accept button not found.")

        # Input the address in the search box
        try:
            search_input = WebDriverWait(driver, 10).until(
                EC.visibility_of_element_located((By.ID, "searchboxinput"))
            )
            search_input.clear()
            search_input.send_keys(officeAddress)
            print(f"Inserted '{officeAddress}' into the search box.")

            # Click the search button
            search_button_locator = (By.ID, "hArJGc")
            click_element(search_button_locator)
            print("The search button is clicked.")

            # Input a different address
            input_field = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CLASS_NAME, "tactile-searchbox-input"))
            )
            input_field.clear()
            input_field.send_keys(homeAddress)
            print(f"Inserted '{homeAddress}' into the search box.")
            input_field.send_keys(Keys.RETURN)
            WebDriverWait(driver,10)
            print("The final search button has been clicked.")

            # Wait for the travel mode div to be visible and extract text
            travel_mode_div = WebDriverWait(driver, 20).until(
            EC.visibility_of_element_located((By.XPATH, '//div[@data-travel_mode="3"]'))
            )
        
            try:
                button = travel_mode_div.find_element(By.TAG_NAME, 'button')  # Locate the button
                button.click()  # Click the button
                print("Button inside the travel mode div clicked successfully!")
            except NoSuchElementException:
                print("Button not found inside the travel mode div.")
            except Exception as e:
                print(f"Error while clicking the button: {e}")
            
            time.sleep(3)
            travel_mode_text = WebDriverWait(driver, 20).until(
            EC.visibility_of_element_located((By.XPATH, '//div[@data-travel_mode="3"]'))
            )
            print(travel_mode_text.get_attribute("outerHTML"))
            return travel_mode_text.text
        except TimeoutException as te:
            print(f"Timeout Exception: {te}")
    except Exception as e:
        print(f"Error: {e}")
    driver.quit()


# Example usage
# home_address = "Bulevardul Tineretului, Tineretului, Sectorul 4, Bucuresti"
# rentData=pd.read_csv("chirii.csv")
# office_address = "Calea Floreasca 246c, București 077190"
# for index, record in rentData.iterrows():    
#     rentData.at[index,"distance"]=get_bus_distance(record["adresa"], office_address)
#     print(f"At index: {index} the distance between {office_address} and {record["adresa"]} is {rentData.at[index,"distance"]}")
# rentData.to_csv("cleanRentData.csv")



In [2]:
def get_rent_links(nrOfPage):
    driver = webdriver.Chrome()
    driver.get(f'https://www.storia.ro/ro/rezultate/inchiriere/apartament/bucuresti?ownerTypeSingleSelect=ALL&viewType=listing&page={nrOfPage}')
    li_elements = driver.find_elements(By.TAG_NAME, 'li')
    hrefs = []
    for li in li_elements:
        try:
            # Find <a> tag with the specific data-cy attribute
            a_element = li.find_element(By.CSS_SELECTOR, 'a[data-cy="listing-item-link"]')
            href = a_element.get_attribute('href')  # Get the href attribute
            hrefs.append(href)  # Append href to the list
        except Exception as e:
            continue  # Skip if no <a> element is found or any other error occurs
    driver.quit()
    return hrefs
    

def get_rent_data(rentLink):
    driver=webdriver.Chrome()
    rentData=pd.Series()
    driver.get(rentLink)


    rentData["pret"]=driver.find_element(By.CSS_SELECTOR, 'strong[data-cy="adPageHeaderPrice"]').text
    rentData["adresa"]=driver.find_element(By.CSS_SELECTOR, 'a.css-1jjm9oe').text
    rentData["suprafata"]=driver.find_elements(By.CLASS_NAME, 'css-1ftqasz')[0].text
    rentData["numar_camere"]=driver.find_elements(By.CLASS_NAME, 'css-1ftqasz')[1].text
    rentData["descriere"]= driver.find_element(By.CSS_SELECTOR, 'div[data-cy="adPageAdDescription"]').text
    rentData["link"]=rentLink
    rentData["autobuz"]=get_bus_distance(homeAddress=rentData["adresa"],officeAddress="Calea Floreasca 246c, București 077190")


    # # Initialize variables
    # nrColumns = 0
    # p_elements = driver.find_elements(By.CLASS_NAME, 'e1qhas4i2.css-nlohq6')
    # newText = ""
    # # Iterate through the elements to gather information
    # for elem in p_elements:
    #     if ':' in elem.text:
    #         nrColumns += 1
    #     newText += elem.text + "\n"  # Adding a newline for better readability
    #     # Populate the informatii Series
    # for index in range(nrColumns):
    #     indexHelp = index * 2
    #     if indexHelp + 1 < len(p_elements):  # Check to avoid IndexError
    #         key = p_elements[indexHelp].text[:-1]  # Remove the trailing colon
    #         value = p_elements[indexHelp + 1].text  # Get the value
    #         rentData[key] = value  # Set the key-value pair in the Series
    

    # try:
    #     consent_close_button = WebDriverWait(driver, 10).until(
    #         EC.element_to_be_clickable((By.ID, 'onetrust-accept-btn-handler'))
    #     )
    #     consent_close_button.click()
    # except Exception as e:
    #     print("Consent popup not found or couldn't close:", e)

    # # Now attempt to click the header button again
    # headers = WebDriverWait(driver, 10).until(
    #     EC.presence_of_all_elements_located((By.CLASS_NAME, 'css-1g1u77j'))
    # )

    # headers[0].click()
    # headers[1].click()
    # parent_element = driver.find_element(By.CLASS_NAME, 'css-gfsn8h')
    # # Retrieve child elements with the specified class names
    # child_elements = parent_element.find_elements(By.CSS_SELECTOR, '.e1qhas4i2.css-nlohq6')
    # text=""
    # for elem in child_elements:
    #     text=text+elem.text

    # nrColumns=0
    # for elem in child_elements:
    #     if ':' in elem.text:
    #         nrColumns=nrColumns+1
    # for index in range(nrColumns):
    #     indexHelp = index * 2
    #     if indexHelp + 1 < len(child_elements):  # Check to avoid IndexError
    #         key = child_elements[indexHelp].text[:-1]  # Remove the trailing colon
    #         value = child_elements[indexHelp + 1].text  # Get the value
    #         rentData[key] = value  # Set the key-value pair in the Series
    # container = WebDriverWait(driver, 10).until(
    # EC.presence_of_element_located((By.CLASS_NAME, 'css-drw7ek'))
    # )
    # texts = container.text
    # print(texts)
    # nrColumns=texts.count(':')
    # newText=""
    # for line in texts.splitlines():
    #     if(':' in line):
    #         rentData[line[:-1]]=None
    #         aux=line[:-1]
    #         newText=""
    #     else:
    #         newText=newText+","+line
    #         rentData[aux]=newText


    
    # rentData[driver.find_elements(By.CLASS_NAME,"css-t7cajz e1qhas4i1")[0]]=driver.find_elements(By.CLASS_NAME,"css-511gym")[0]


    
    driver.quit()
    return rentData

    # elements=driver.find_elements(By.CLASS_NAME, 'e1qhas4i2.css-nlohq6')
    # extra_info = [element.text for element in elements[12:]]
    # rentData["extra_info"]=extra_info


    # rentData["an_constructie"]=None
    # rentData["lift"]=None
    # rentData["tip_cladire"]=None
    # rentData["material_constructie"]=None
    # rentData["tip_geamuri"]=None
    # rentData["facilitati"]=None
    # rentData["siguranta"]=None
    # rentData["media"]=None
    

    





In [3]:
import time
nrOfPages=5
rentData=pd.DataFrame()
for page in range(1,nrOfPages,1):
    links=get_rent_links(page)
    for link in links:
        # print(link)
        rentData = pd.concat([rentData, get_rent_data(link).to_frame().T], ignore_index=True)
        time.sleep(1)

rentData.to_csv("chirii.csv",index=False)

https://www.storia.ro/ro/oferta/apartament-2-camere-tineretului-IDBHLR
https://www.storia.ro/ro/oferta/one-2-camere-1-loc-parcare-prima-inchiriere-luminos-incalzire-pardose-IDBC6p
https://www.storia.ro/ro/oferta/ocazie-apart-2-camere-regie-residence-pet-friendly-IDAWXG
https://www.storia.ro/ro/oferta/3-camere-titan-cu-centrala-complet-renovat-propietar-IDBlLv
https://www.storia.ro/ro/oferta/inchiriez-apartament-5-camere-IDBKo1
https://www.storia.ro/ro/oferta/studio-13-septembrie-vulcan-residence-prima-inchiriere-parcare-IDBKo0
https://www.storia.ro/ro/oferta/2-camere-baba-novac-brancusi-metrou-titan-parc-ior-nicolae-grigorescu-IDBooi
https://www.storia.ro/ro/oferta/unirii-cantemir-stradal-parcul-tineretului-decomandat-mobilat-utilat-c-IDBJtx
https://www.storia.ro/ro/oferta/ap-2-camere-evia-pallady-bucatrie-inchisa-incalzire-in-pardoseala-IDBKnY
https://www.storia.ro/ro/oferta/apartament-3-camere-mihalache-pet-friendly-IDBKnU
https://www.storia.ro/ro/oferta/ap-2-camere-palladium-residen

In [4]:
if False:
    from opencage.geocoder import OpenCageGeocode
    from geopy.distance import geodesic
    load_dotenv()
    # Function to get coordinates of an address
    def get_coords(address, api_key):
        geocoder = OpenCageGeocode(api_key)
        result = geocoder.geocode(address)
        if result and len(result):
            return (result[0]['geometry']['lat'], result[0]['geometry']['lng'])
        return None

    # Function to calculate distance between two addresses
    def calculate_distance(address1, address2, api_key):
        coords_1 = get_coords(address1, api_key)
        coords_2 = get_coords(address2, api_key)
        
        if coords_1 and coords_2:
            # Calculate distance in kilometers using geodesic
            distance_km = geodesic(coords_1, coords_2).kilometers
            return distance_km
        return None

    # Example usage

    address1 = "1600 Amphitheatre Parkway, Mountain View, CA"
    address2 = "1 Infinite Loop, Cupertino, CA"

    distance = calculate_distance(address1, address2, os.getenv("api_key"))
    if distance:
        print(f"Distance: {distance} km")
    else:
        print("Failed to retrieve data")


Distance: 11.103841630088793 km


In [None]:
if False:    # First, load the CSV data
    rentData = pd.read_csv("chirii.csv")

    # Get coordinates of the office address
    coordonateBirou = get_coords(address="calea floreasca 246c bucuresti", api_key=os.getenv("api_key"))

    # Loop through the dataset and calculate distances
    for index, record in rentData.iterrows():
        print(index)
        # Get coordinates of the current address
        # coords_address = get_coords(record["adresa"], api_key)
        coords_address=record["adresa"]
        # If coordinates are available, calculate the distance
        if coords_address:
            distance = calculate_distance("calea floreasca 246c bucuresti", coords_address, os.getenv("api_key"))
            rentData.at[index, "distance"] = distance
        else:
            rentData.at[index, "distance"] = None  # Handle cases where geocoding fails

    # Save the updated data with distances to a new CSV file
    rentData.to_csv("chirii_with_distances.csv", index=False)


In [None]:
if False:
    from selenium import webdriver
    from selenium.webdriver.common.by import By
    from selenium.common.exceptions import NoSuchElementException

    def get_div_text(driver, url):
        driver.get(url)
        div_element = driver.find_element(By.CSS_SELECTOR, 'div[jstcache="522"]')
        div_text = div_element.text
        return div_text

    # Example usage:
    driver = webdriver.Chrome()  # or the browser of your choice
    url = "https://www.google.com/maps/dir/Bulevardul+Tineretului,+Bucure%C8%99ti/Calea+Floreasca+246c,+Bucure%C8%99ti+077190/@44.4439973,26.0671406,13z/data=!3m1!4b1!4m14!4m13!1m5!1m1!1s0x40b1fefe144ede77:0x5c99dd4ae925a749!2m2!1d26.1107837!2d44.410399!1m5!1m1!1s0x40b20388e10cc5ab:0x252c88de294519b6!2m2!1d26.1054352!2d44.4774927!3e2?entry=ttu&g_ep=EgoyMDI0MTAxNi4wIKXMDSoASAFQAw%3D%3D"
    try:
        accept_button = driver.find_element(By.XPATH, "//button[.//span[text()='Accept all']]")
        accept_button.click()
        print("Button clicked successfully!")
    except NoSuchElementException:
        # If the button doesn't exist
        print("Button not found.")
    print(get_div_text(driver, url))

    # Don't forget to close the browser after you're done
    driver.quit()


In [17]:
if False:
    from selenium import webdriver
    from selenium.webdriver.common.by import By
    from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException, ElementNotInteractableException
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC

    # Assuming the webdriver is already set up
    driver = webdriver.Chrome()

    # Open your target page
    driver.get("https://www.google.com/maps/dir/Bulevardul+Tineretului,+Bucure%C8%99ti/Calea+Floreasca+246c,+Bucure%C8%99ti+077190/@44.4439973,26.0671406,13z/data=!3m1!4b1!4m14!4m13!1m5!1m1!1s0x40b1fefe144ede77:0x5c99dd4ae925a749!2m2!1d26.1107837!2d44.410399!1m5!1m1!1s0x40b20388e10cc5ab:0x252c88de294519b6!2m2!1d26.1054352!2d44.4774927!3e2?entry=ttu&g_ep=EgoyMDI0MTAxNi4wIKXMDSoASAFQAw%3D%3D")

    try:
        # Wait for the button to be visible and clickable
        accept_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, "//button[.//span[text()='Accept all']]"))
        )    
        try:
            accept_button.click()
            print("Button clicked successfully!")
        except (ElementClickInterceptedException, ElementNotInteractableException):
            # Fallback to JavaScript click in case of issues
            driver.execute_script("arguments[0].click();", accept_button)
            print("Button clicked using JavaScript!")
    except NoSuchElementException:
        # If the button doesn't exist
        print("Button not found.")

    try:
        travel_mode_div = WebDriverWait(driver, 10).until(
            EC.visibility_of_element_located((By.XPATH, '//div[@data-travel_mode="3"]'))
        )
        travel_mode_text = travel_mode_div.text
        
        print("Text inside the div with data-travel_mode=3:")
        print(travel_mode_text)

    except Exception as e:
        print(f"Error: {e}")
    finally:
        driver.quit()


Button clicked successfully!
Text inside the div with data-travel_mode=3:
33 min


In [None]:
if False:
    import pandas as pd
    from selenium import webdriver
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.support import expected_conditions as EC
    from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException, ElementNotInteractableException, TimeoutException
    from selenium.webdriver.common.keys import Keys

    # Load the rent data
    rentData = pd.read_csv("chirii.csv")
    adresaBirou = "Calea Floreasca 246c, București 077190"

    # Initialize the WebDriver
    driver = webdriver.Chrome()  # or webdriver.Firefox(), etc.

    # Open Google Maps
    driver.get("https://www.google.ro/maps/preview/")

    # Function to click an element with fallback to JavaScript
    def click_element(locator):
        try:
            element = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable(locator)
            )
            element.click()
            print("Button clicked successfully!")
        except (ElementClickInterceptedException, ElementNotInteractableException):
            driver.execute_script("arguments[0].click();", element)
            print("Button clicked using JavaScript!")
        except NoSuchElementException:
            print("Element not found.")

    # Accept the cookies button
    try:
        accept_button_locator = (By.XPATH, "//button[.//span[text()='Accept all']]")
        click_element(accept_button_locator)
    except NoSuchElementException:
        print("Accept button not found.")

    # Input the address in the search box
    try:
        search_input = WebDriverWait(driver, 10).until(
            EC.visibility_of_element_located((By.ID, "searchboxinput"))
        )
        search_input.clear()
        search_input.send_keys(adresaBirou)
        print(f"Inserted '{adresaBirou}' into the search box.")

        # Click the search button
        search_button_locator = (By.ID, "hArJGc")
        click_element(search_button_locator)
        print("The search button is clicked with 1 argument.")

        # Input a different address
        input_field = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "tactile-searchbox-input"))
        )
        input_field.clear()
        homeAddress = "Bulevardul Tineretului, Tineretului, Sectorul 4, Bucuresti"
        input_field.send_keys(homeAddress)
        print(f"Inserted '{homeAddress}' into the search box.")
        input_field.send_keys(Keys.RETURN)
        print("The final search button has been clicked.")

        # Wait for the travel mode div to be visible and extract text
        travel_mode_div = WebDriverWait(driver, 10).until(
            EC.visibility_of_element_located((By.XPATH, '//div[@data-travel_mode="3"]'))
        )
        travel_mode_text = travel_mode_div.text
        print(travel_mode_div.get_attribute("outerHTML"))
        print("Text inside the div with data-travel_mode=3:")
        print(travel_mode_text)

    except TimeoutException as te:
        print(f"Timeout Exception: {te}")
    except Exception as e:
        print(f"Error: {e}")

    # Close the driver when done
    driver.quit()
