In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import csv
import time
import numpy as np
import pandas as pd

In [None]:
# Setup Chrome
options = webdriver.ChromeOptions()
options.add_argument("--headless")  # run in background
options.add_argument("--disable-gpu")
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

In [None]:
# URL and number of pages
myhome_url = "https://www.myhome.ge/s/iyideba-bina-Tbilisshi/?deal_types=1&cities=1&currency_id=1&CardView=1&owner_type=physical&real_estate_types=1&page="
pages = 10  # number of pages to scrape

data = []  # store all listings

for i in range(1, pages + 1):
    driver.get(myhome_url + str(i))
    time.sleep(5)  # wait for JS to render

    cards = driver.find_elements(By.CSS_SELECTOR, "a.group.relative.block")
    # remove cards without title (avoids pagination or extra buttons)
    cards = [card for card in cards if len(card.find_elements(By.CSS_SELECTOR, "h2")) > 0]

    for card in cards:
        # URL first
        url = card.get_attribute("href") if card.get_attribute("href") else np.nan

        # Skip auction listings
        if pd.isna(url) or "auction" in url:
            continue

        # Extract property ID from URL
        property_id = url.split('/')[4]

        # Title
        try:
            title = card.find_element(By.CSS_SELECTOR, "h2").text
        except:
            title = np.nan

        # Price number
        try:
            price_number = card.find_element(By.CSS_SELECTOR, "span.truncate").text
        except:
            price_number = np.nan
            
        # Price per square meter
        try:
            price_per_sqm_span = card.find_element(
                By.CSS_SELECTOR, "div.text-sm.truncate.text-secondary-70 span"
            ).text
            # Remove the " / მ²" part and convert to float
            price_per_sqm = float(price_per_sqm_span.split("/")[0].replace(",", "").strip())
        except:
            price_per_sqm = np.nan


        # Currency
        try:
            currency = card.find_element(By.CSS_SELECTOR, "span.text-secondary-70").text
        except:
            currency = np.nan

        # Location
        try:
            location = card.find_element(By.CSS_SELECTOR, "h3.text-sm").text
        except:
            location = np.nan

        # District
        try:
            district = card.find_element(By.CSS_SELECTOR, "span.font-tbcx-regular").text
        except:
            district = np.nan

        # Initialize fields
        floors = rooms = bedrooms = sqm = np.nan

        # -----------------------
        # Extract facilities: Floors, Rooms, Bedroom(s), Square meter
        # -----------------------
        try:
            facility_spans = card.find_elements(
                By.CSS_SELECTOR,
                "div[class*='facilities--'] div.inline-flex.items-center.gap-1 span"
            )

            # Floors
            if len(facility_spans) > 0:
                floors = facility_spans[0].text if facility_spans[0].text else np.nan

            # Rooms
            if len(facility_spans) > 1:
                rooms = facility_spans[1].text if facility_spans[1].text else np.nan

            # Bedrooms
            if len(facility_spans) > 2:
                # Check if next span is m² symbol (i.e., this is square meters)
                parent_div = facility_spans[2].find_element(By.XPATH, "..")  # get parent div
                spans_in_div = parent_div.find_elements(By.TAG_NAME, "span")
                if len(spans_in_div) == 2 and spans_in_div[1].text.strip() in ["მ²", "m²"]:
                    bedrooms = np.nan
                else:
                    bedrooms = facility_spans[2].text.strip()


           # Square meter
            if len(facility_spans) > 2:
                # Check if the second span in this div is m²/მ²
                second_span_text = facility_spans[3].text.strip() if len(facility_spans) > 3 else ""
                if second_span_text in ["მ²", "m²"]:
                    # The number in the 3rd span is square meters
                    try:
                        sqm = float(facility_spans[2].text.replace(",", "."))
                    except:
                        sqm = np.nan
                else:
                    # Square meters comes from 4th span if exists
                    if len(facility_spans) > 3:
                        try:
                            sqm = float(facility_spans[3].text.replace(",", "."))
                        except:
                            sqm = np.nan


        except:
            pass


        # Post Date
        try:
            post_date = card.find_element(
                By.CSS_SELECTOR, "div.flex.items-center.h-full.gap-1.text-secondary-70.text-xs span"
            ).text
        except:
            post_date = np.nan

        # Append all data including property ID, currency, district
        data.append([property_id, title, price_number, price_per_sqm, currency, location, district, floors, rooms, bedrooms, sqm, post_date, url])

In [1]:
# Save to CSV
with open("myhome_listings_en.csv", "w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow(["ID", "Title", "Price", "Price per m²", "Currency", "Location", "District", "Floors", "Rooms", "Bedroom(s)", "Square meter", "Post Date", "URL"])
    writer.writerows(data)

driver.quit()
print(f"Scraped {len(data)} listings from {pages} pages!")

Scraped 200 listings from 10 pages!
