In [13]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.firefox.options import Options

# Set up the Selenium webdriver (Firefox)
options = Options()
# options.headless = True  # Uncomment to run headlessly
driver = webdriver.Firefox(options=options)

# Define a list of bounding boxes that roughly cover Belo Horizonte.
# Each box is defined by its southwest (sw) and northeast (ne) coordinates.
# (You may need to adjust these coordinates for full coverage and overlap.)
bounding_boxes = [
    {"sw_lat": -20.00, "sw_lng": -44.05, "ne_lat": -19.95, "ne_lng": -44.00},
    {"sw_lat": -19.95, "sw_lng": -44.05, "ne_lat": -19.90, "ne_lng": -44.00},
    {"sw_lat": -20.00, "sw_lng": -44.00, "ne_lat": -19.95, "ne_lng": -43.95},
    {"sw_lat": -19.95, "sw_lng": -44.00, "ne_lat": -19.90, "ne_lng": -43.95},
    # Add additional boxes as needed to cover the whole city
]

all_data = []

def scrape_box(url):
    driver.get(url)
    time.sleep(5)  # wait for the page to load

    # Scroll down to load additional listings
    scroll_pause_time = 3
    last_height = driver.execute_script("return document.body.scrollHeight")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(scroll_pause_time)
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

    # Find listings on the page. (Adjust XPath/CSS selectors as needed.)
    listings = driver.find_elements(By.XPATH, "//div[@itemprop='itemListElement']")
    box_data = []
    for listing in listings:
        try:
            title_elem = listing.find_element(By.XPATH, ".//meta[@itemprop='name']")
            title = title_elem.get_attribute("content")
        except Exception:
            title = None

        try:
            price_elem = listing.find_element(By.XPATH, ".//span[@aria-hidden='true']")
            price = price_elem.text
        except Exception:
            price = None

        try:
            link_elem = listing.find_element(By.XPATH, ".//a")
            listing_url = link_elem.get_attribute("href")
        except Exception:
            listing_url = None

        box_data.append({
            "title": title,
            "price": price,
            "url": listing_url
        })
    return box_data

# Loop over each bounding box and scrape the listings
for box in bounding_boxes:
    url = (
        f"https://www.airbnb.com/s/Belo-Horizonte--Brazil/homes?"
        f"sw_lat={box['sw_lat']}&sw_lng={box['sw_lng']}&"
        f"ne_lat={box['ne_lat']}&ne_lng={box['ne_lng']}"
    )
    print(f"Scraping URL: {url}")
    data = scrape_box(url)
    print(f"Found {len(data)} listings in this box.")
    all_data.extend(data)

# Remove potential duplicates (if listings appear in overlapping boxes)
df = pd.DataFrame(all_data).drop_duplicates(subset=["url"])

print(f"Total unique listings found: {len(df)}")
print(df.head())

driver.quit()

Scraping URL: https://www.airbnb.com/s/Belo-Horizonte--Brazil/homes?sw_lat=-20.0&sw_lng=-44.05&ne_lat=-19.95&ne_lng=-44.0
Found 18 listings in this box.
Scraping URL: https://www.airbnb.com/s/Belo-Horizonte--Brazil/homes?sw_lat=-19.95&sw_lng=-44.05&ne_lat=-19.9&ne_lng=-44.0
Found 18 listings in this box.
Scraping URL: https://www.airbnb.com/s/Belo-Horizonte--Brazil/homes?sw_lat=-20.0&sw_lng=-44.0&ne_lat=-19.95&ne_lng=-43.95
Found 18 listings in this box.
Scraping URL: https://www.airbnb.com/s/Belo-Horizonte--Brazil/homes?sw_lat=-19.95&sw_lng=-44.0&ne_lat=-19.9&ne_lng=-43.95
Found 18 listings in this box.
Total unique listings found: 72
                                               title               price  \
0  Apartamento Barreiro West hospital close to ev...   Free cancellation   
1                                Comfortable Bedroom  Stay with Vanderli   
2           Full Apartment in Diamante Neighborhood.              2 beds   
3           Entire home, safe, comfortable and cosy!

In [17]:
print(df.url[0])

https://www.airbnb.com/rooms/849361143802325736?search_mode=regular_search&adults=1&category_tag=Tag%3A8678&check_in=2025-03-01&check_out=2025-03-06&children=0&infants=0&pets=0&photo_id=1609020263&source_impression_id=p3_1740168124_P3zE__HWboE1wcsY&previous_page_section_name=1000&federated_search_id=7a6877c3-b6ca-4761-80b4-9bf7abc5d421


In [None]:
import pandas as pd
from airbnb import Api  # make sure to install via: pip install airbnb

# Replace these with your actual credentials
CLIENT_ID = "YOUR_CLIENT_ID"
ACCESS_TOKEN = "YOUR_ACCESS_TOKEN"

# Initialize the Airbnb API client
api = Api(client_id=CLIENT_ID, access_token=ACCESS_TOKEN)

# Define search parameters for Belo Horizonte
search_params = {
    "location": "Belo Horizonte, Brazil",
    "check_in": "2025-03-01",   # adjust dates as needed
    "check_out": "2025-03-07",
    "guests": 1,
    # You can include additional parameters here if supported
}

# Perform the search query
response = api.search_listings(**search_params)

# Process the returned JSON to extract listings data.
# Note: The exact keys may vary depending on the API wrapper version.
data = []
for result in response.get("search_results", []):
    listing = result.get("listing", {})
    data.append({
        "id": listing.get("id"),
        "name": listing.get("name"),
        "price": listing.get("price_formatted"),  # sometimes returned as price_formatted
        "url": listing.get("public_url"),
    })

# Convert the data to a pandas DataFrame
df = pd.DataFrame(data)
print(df.head())


In [11]:
import pyairbnb
import json

check_in = "2025-04-10"
check_out = "2025-04-12"
currency = "EUR"
user_input_text = "Belo Horizonte"
locale = "es"
proxy_url = ""  # Proxy URL (if needed)

api_key = pyairbnb.get_api_key("")
listings = pyairbnb.search_experiences(user_input_text, currency, locale, check_in, check_out, api_key, proxy_url)

with open('listings.json', 'w', encoding='utf-8') as f:
    json.dump(listings, f, ensure_ascii=False, indent=2)

AttributeError: module 'pyairbnb' has no attribute 'search_experiences'

In [18]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.firefox.options import Options

# Set up the Selenium webdriver (Firefox)
options = Options()
# options.headless = True  # Uncomment to run headlessly
driver = webdriver.Firefox(options=options)

# Define a list of bounding boxes that roughly cover Belo Horizonte.
# Each box is defined by its southwest (sw) and northeast (ne) coordinates.
# (You may need to adjust these coordinates for full coverage and overlap.)
bounding_boxes = [
    {"sw_lat": -20.00, "sw_lng": -44.05, "ne_lat": -19.95, "ne_lng": -44.00},
    {"sw_lat": -19.95, "sw_lng": -44.05, "ne_lat": -19.90, "ne_lng": -44.00},
    {"sw_lat": -20.00, "sw_lng": -44.00, "ne_lat": -19.95, "ne_lng": -43.95},
    {"sw_lat": -19.95, "sw_lng": -44.00, "ne_lat": -19.90, "ne_lng": -43.95},
    # Add additional boxes as needed to cover the whole city
]

all_data = []

def scrape_box(url):
    driver.get(url)
    time.sleep(5)  # wait for the page to load

    # Scroll down to load additional listings
    scroll_pause_time = 3
    last_height = driver.execute_script("return document.body.scrollHeight")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(scroll_pause_time)
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

    # Find listings on the page. (Adjust XPath/CSS selectors as needed.)
    listings = driver.find_elements(By.XPATH, "//div[@itemprop='itemListElement']")
    box_data = []
    for listing in listings:
        try:
            title_elem = listing.find_element(By.XPATH, ".//meta[@itemprop='name']")
            title = title_elem.get_attribute("content")
        except Exception:
            title = None

        try:
            price_elem = listing.find_element(By.XPATH, ".//span[@aria-hidden='true']")
            price = price_elem.text
        except Exception:
            price = None

        try:
            link_elem = listing.find_element(By.XPATH, ".//a")
            listing_url = link_elem.get_attribute("href")
        except Exception:
            listing_url = None

        box_data.append({
            "title": title,
            "price": price,
            "url": listing_url
        })
    return box_data

# Loop over each bounding box and scrape the listings
for box in bounding_boxes:
    url = (
        f"https://www.airbnb.com/s/Belo-Horizonte--Brazil/homes?"
        f"sw_lat={box['sw_lat']}&sw_lng={box['sw_lng']}&"
        f"ne_lat={box['ne_lat']}&ne_lng={box['ne_lng']}"
    )
    print(f"Scraping URL: {url}")
    data = scrape_box(url)
    print(f"Found {len(data)} listings in this box.")
    all_data.extend(data)

# Remove potential duplicates (if listings appear in overlapping boxes)
df = pd.DataFrame(all_data).drop_duplicates(subset=["url"])

print(f"Total unique listings found: {len(df)}")
print(df.head())

driver.quit()
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.firefox.options import Options

# Set up the Selenium webdriver (Firefox)
options = Options()
# options.headless = True  # Uncomment to run headlessly
driver = webdriver.Firefox(options=options)

# Define a list of bounding boxes that roughly cover Belo Horizonte.
# Each box is defined by its southwest (sw) and northeast (ne) coordinates.
# (You may need to adjust these coordinates for full coverage and overlap.)
bounding_boxes = [
    {"sw_lat": -20.00, "sw_lng": -44.05, "ne_lat": -19.95, "ne_lng": -44.00},
    {"sw_lat": -19.95, "sw_lng": -44.05, "ne_lat": -19.90, "ne_lng": -44.00},
    {"sw_lat": -20.00, "sw_lng": -44.00, "ne_lat": -19.95, "ne_lng": -43.95},
    {"sw_lat": -19.95, "sw_lng": -44.00, "ne_lat": -19.90, "ne_lng": -43.95},
    # Add additional boxes as needed to cover the whole city
]

all_data = []

def scrape_box(url):
    driver.get(url)
    time.sleep(5)  # wait for the page to load

    # Scroll down to load additional listings
    scroll_pause_time = 3
    last_height = driver.execute_script("return document.body.scrollHeight")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(scroll_pause_time)
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

    # Find listings on the page. (Adjust XPath/CSS selectors as needed.)
    listings = driver.find_elements(By.XPATH, "//div[@itemprop='itemListElement']")
    box_data = []
    for listing in listings:
        try:
            title_elem = listing.find_element(By.XPATH, ".//meta[@itemprop='name']")
            title = title_elem.get_attribute("content")
        except Exception:
            title = None

        try:
            price_elem = listing.find_element(By.XPATH, ".//span[@aria-hidden='true']")
            price = price_elem.text
        except Exception:
            price = None

        try:
            link_elem = listing.find_element(By.XPATH, ".//a")
            listing_url = link_elem.get_attribute("href")
        except Exception:
            listing_url = None

        box_data.append({
            "title": title,
            "price": price,
            "url": listing_url
        })
    return box_data

# Loop over each bounding box and scrape the listings
for box in bounding_boxes:
    url = (
        f"https://www.airbnb.com/s/Belo-Horizonte--Brazil/homes?"
        f"sw_lat={box['sw_lat']}&sw_lng={box['sw_lng']}&"
        f"ne_lat={box['ne_lat']}&ne_lng={box['ne_lng']}"
    )
    print(f"Scraping URL: {url}")
    data = scrape_box(url)
    print(f"Found {len(data)} gg in this box.")
    all_data.extend(data)

# Remove potential duplicates (if listings appear in overlapping boxes)
df = pd.DataFrame(all_data).drop_duplicates(subset=["url"])

print(f"Total unique listings found: {len(df)}")
print(df.head())

driver.quit()


Scraping URL: https://www.airbnb.com/s/Belo-Horizonte--Brazil/homes?sw_lat=-20.0&sw_lng=-44.05&ne_lat=-19.95&ne_lng=-44.0
Found 18 listings in this box.
Scraping URL: https://www.airbnb.com/s/Belo-Horizonte--Brazil/homes?sw_lat=-19.95&sw_lng=-44.05&ne_lat=-19.9&ne_lng=-44.0


WebDriverException: Message: Failed to decode response from marionette
