In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import NoSuchElementException
from time import sleep
import pandas as pd
import re

# 初始化浏览器
browser = webdriver.Edge()

# 访问目标网页
browser.get('https://zu.fang.com/')
sleep(2)  # 等待页面加载

# 鼠标悬停到城市选择区域
target = browser.find_element(By.CSS_SELECTOR, "div.s4Box")
ActionChains(browser).move_to_element(target).perform()
sleep(1)

# 选择“北京”
browser.find_element(By.XPATH, "//div[@id='cityi010']/a[text()='北京']").click()
sleep(2)

# 依次进入“通州” -> “马驹桥”
for area in ["通州", "马驹桥"]:
    browser.find_element(By.LINK_TEXT, area).click()
    sleep(2)

# 存储房源信息
data_list = []

while True:
    try:
        container = browser.find_element(By.CSS_SELECTOR, "div.houseList")
        listings = container.find_elements(By.TAG_NAME, "dl")
    except NoSuchElementException:
        print("未找到房源信息")
        break

    for listing in listings:
        try:
            title = listing.find_element(By.XPATH, ".//a").text.strip()
            details = listing.find_element(By.XPATH, ".//p[@class='font15 mt12 bold']").text.strip()
            price = listing.find_element(By.XPATH, ".//span[@class='price']").text.strip()
            data_list.append({"标题": title, "户型_面积等": details, "价格": price})
        except Exception as err:
            print(f"跳过异常房源: {err}")

    try:
        next_btn = browser.find_element(By.LINK_TEXT, "下一页")
        next_btn.click()
        sleep(3)
    except NoSuchElementException:
        print("爬取完成")
        break

# 处理数据
df = pd.DataFrame(data_list)


def extract_area(info):
    match = re.search(r"\|\s*([\d.]+)㎡\s*\|", info)
    return float(match.group(1)) if match else None


df["平方数（㎡）"] = df["户型_面积等"].apply(extract_area)
df["价格（元/㎡）"] = df["价格"].str.replace("元/㎡", "", regex=False).astype(float)
df = df[["平方数（㎡）", "价格（元/㎡）"]]

# 存储结果
df.to_csv("majuqiao_rental.csv", index=False)
print("数据保存完毕")


爬取完成
数据保存完毕
