In [3]:
import requests
from bs4 import BeautifulSoup

# 基础URL
base_url = 'https://www.oldlistings.com.au/real-estate/VIC'

# 伪装请求头，模拟浏览器访问
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

# 维多利亚州城市和邮编的列表，包含多单词城市名
victoria_cities_postcodes = 【postcode_data】

# 遍历每个城市和邮编，生成URL并尝试爬取
for city, postcode in victoria_cities_postcodes:
    # 将城市名称中的空格替换为加号
    city_url = city.replace(' ', '+')
    
    # 遍历一系列房产ID，假设从800到900
    for listing_id in range(1, 1001):
        url = f'{base_url}/{city_url}/{postcode}/rent/{listing_id}'
        print(f'正在爬取: {url}')
        
        # 发送请求获取网页内容
        response = requests.get(url, headers=headers)
        
        # 检查响应状态码
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, 'html.parser')
            
            # 假设页面有房产信息，提取地址、价格等信息
            property_div = soup.find('div', class_='property')
            if property_div:
                # 提取经纬度信息
                latitude = property_div['data-lat']
                longitude = property_div['data-lng']
                
                # 提取地址
                address = property_div.find('h2', class_='address').text.strip() if property_div.find('h2', class_='address') else 'No address'

                # 提取卧室数量
                bedrooms = property_div.find('p', class_='property-meta bed').text.strip() if property_div.find('p', class_='property-meta bed') else 'No bedroom info'

                # 提取卫生间数量
                bathrooms = property_div.find('p', class_='property-meta bath').text.strip() if property_div.find('p', class_='property-meta bath') else 'No bathroom info'

                # 提取房产类型
                property_type = property_div.find('p', class_='property-meta type').text.strip() if property_div.find('p', class_='property-meta type') else 'No type info'

                # 提取最后广告价格
                last_price = property_div.find('section', class_='price').find('h3').text.strip() if property_div.find('section', class_='price') else 'No price info'

                # 提取历史价格信息
                historical_prices_section = property_div.find('section', class_='historical-price')
                historical_prices = historical_prices_section.find_all('li') if historical_prices_section else []

                # 遍历历史价格
                price_history = []
                for price in historical_prices:
                    date = price.find('span').text.strip()
                    amount = price.contents[1].strip()  # 价格通常是span之后的文本
                    price_history.append((date, amount))

                # 打印房产信息
                print(f'地址: {address}')
                print(f'经度: {latitude}, 纬度: {longitude}')
                print(f'卧室数量: {bedrooms}')
                print(f'卫生间数量: {bathrooms}')
                print(f'房产类型: {property_type}')
                print(f'最后广告价格: {last_price}')
                print("历史价格:")
                for date, amount in price_history:
                    print(f"日期: {date}, 价格: {amount}")
                print('-' * 40)

            else:
                print('无效房产，跳过...')
        else:
            print(f'页面未找到: {url}')


正在爬取: https://www.oldlistings.com.au/real-estate/VIC/Port+Melbourne/3207/rent/1
地址: 303D/52 NOTT STREET, PORT MELBOURNE
经度: -37.84102001, 纬度: 144.93856001
卧室数量: Bed : 2
卫生间数量: Bath : 1
房产类型: No type info
最后广告价格: $750 per week
历史价格:
日期: August 2024, 价格: $750 per week
日期: November 2023, 价格: $750 per week
日期: November 2023, 价格: $750
----------------------------------------
正在爬取: https://www.oldlistings.com.au/real-estate/VIC/Port+Melbourne/3207/rent/2
地址: 21 BRIDGE STREET, PORT MELBOURNE
经度: -37.8386656, 纬度: 144.9464706
卧室数量: Bed : 2
卫生间数量: Bath : 1
房产类型: No type info
最后广告价格: $700 per week
历史价格:
日期: July 2024, 价格: $700 per week
日期: March 2024, 价格: $545 per week
日期: March 2023, 价格: $545 per week
日期: March 2023, 价格: $545
日期: September 2022, 价格: $535 per week
日期: December 2019, 价格: $535 per week
日期: November 2019, 价格: $550 per week
日期: July 2018, 价格: $525 per week
日期: February 2018, 价格: $525 per week
日期: February 2018, 价格: $525
日期: February 2017, 价格: $510 per week
日期: November 2014, 价格: $490

KeyboardInterrupt: 

In [10]:
import requests
from bs4 import BeautifulSoup

# 目标网页URL
url = 'https://postcodes-australia.com/state-postcodes/vic'

# 发送请求并获取网页内容
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

# 查找所有包含邮政编码和城市信息的部分
postcode_data = []

# 遍历所有包含邮政编码的 <a> 标签
for link in soup.find_all('a', href=True, title=True):
    # 提取邮政编码
    postcode = link.text.strip()

    # 查找相邻的 <ul> 标签中的城市名称
    ul_tag = link.find_next('ul')
    if ul_tag:
        cities = ul_tag.find_all('li')
        for city in cities:
            city_name = city.text.strip()
            postcode_data.append((city_name, postcode))

# 输出结果
for city, postcode in postcode_data:
    print(f"城市: {city}, 邮政编码: {postcode}")

城市: Australian Capital Territory, 邮政编码: Home
城市: New South Wales, 邮政编码: Home
城市: Northern Territory, 邮政编码: Home
城市: Queensland, 邮政编码: Home
城市: South Australia, 邮政编码: Home
城市: Tasmania, 邮政编码: Home
城市: Victoria, 邮政编码: Home
城市: Western Australia, 邮政编码: Home
城市: 3167, VIC, 邮政编码: Australian Capital Territory
城市: 5086, SA, 邮政编码: Australian Capital Territory
城市: 4014, QLD, 邮政编码: Australian Capital Territory
城市: 4573, QLD, 邮政编码: Australian Capital Territory
城市: 5321, SA, 邮政编码: Australian Capital Territory
城市: 2739, NSW, 邮政编码: Australian Capital Territory
城市: 4217, QLD, 邮政编码: Australian Capital Territory
城市: 5245, SA, 邮政编码: Australian Capital Territory
城市: 3167, VIC, 邮政编码: New South Wales
城市: 5086, SA, 邮政编码: New South Wales
城市: 4014, QLD, 邮政编码: New South Wales
城市: 4573, QLD, 邮政编码: New South Wales
城市: 5321, SA, 邮政编码: New South Wales
城市: 2739, NSW, 邮政编码: New South Wales
城市: 4217, QLD, 邮政编码: New South Wales
城市: 5245, SA, 邮政编码: New South Wales
城市: 3167, VIC, 邮政编码: Northern Territory
城市: 5086, SA, 邮

In [12]:
import requests
from bs4 import BeautifulSoup

# 目标网页URL
url = 'https://postcodes-australia.com/state-postcodes/vic'

# 发送请求并获取网页内容
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

# 创建存储城市和邮政编码的列表
postcode_data = []

# 遍历所有包含邮政编码的 <a> 标签
for link in soup.find_all('a', href=True, title=True):
    # 提取邮政编码
    postcode = link.text.strip()

    # 查找相邻的 <ul> 标签中的城市名称
    ul_tag = link.find_next('ul')
    if ul_tag:
        cities = ul_tag.find_all('li')
        for city in cities:
            city_name = city.text.strip()
            postcode_data.append([city_name, postcode])  # 整合成嵌套列表

# 输出完整的嵌套列表
print(postcode_data)


[['Australian Capital Territory', 'Home'], ['New South Wales', 'Home'], ['Northern Territory', 'Home'], ['Queensland', 'Home'], ['South Australia', 'Home'], ['Tasmania', 'Home'], ['Victoria', 'Home'], ['Western Australia', 'Home'], ['6210, WA', 'Australian Capital Territory'], ['3749, VIC', 'Australian Capital Territory'], ['2431, NSW', 'Australian Capital Territory'], ['2870, NSW', 'Australian Capital Territory'], ['2642, NSW', 'Australian Capital Territory'], ['5018, SA', 'Australian Capital Territory'], ['2631, NSW', 'Australian Capital Territory'], ['3113, VIC', 'Australian Capital Territory'], ['6210, WA', 'New South Wales'], ['3749, VIC', 'New South Wales'], ['2431, NSW', 'New South Wales'], ['2870, NSW', 'New South Wales'], ['2642, NSW', 'New South Wales'], ['5018, SA', 'New South Wales'], ['2631, NSW', 'New South Wales'], ['3113, VIC', 'New South Wales'], ['6210, WA', 'Northern Territory'], ['3749, VIC', 'Northern Territory'], ['2431, NSW', 'Northern Territory'], ['2870, NSW', 

In [15]:
import requests
from bs4 import BeautifulSoup

# 基础URL
base_url = 'https://www.oldlistings.com.au/real-estate/VIC'

# 伪装请求头，模拟浏览器访问
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

# 维多利亚州城市和邮编的列表
victoria_cities_postcodes = postcode_data  # 假设postcode_data是之前已经构建的嵌套列表

# 遍历每个城市和邮编，生成URL并尝试爬取
for city, postcode in victoria_cities_postcodes:
    # 将城市名称中的空格替换为加号
    city_url = city.replace(' ', '+')

    # 首先爬取第一页，看看是否有有效数据
    url = f'{base_url}/{city_url}/{postcode}/rent/1'
    print(f'正在检查第一页数据: {url}')
    
    # 发送请求获取第一页的网页内容
    response = requests.get(url, headers=headers)
    
    # 检查响应状态码
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, 'html.parser')

        # 假设页面有房产信息，提取地址、价格等信息
        property_div = soup.find('div', class_='property')

        # 如果第一页没有数据，跳过该 postcode
        if not property_div:
            print(f'页面无有效房产信息，跳过postcode: {postcode}')
            continue  # 跳过当前的 postcode，继续下一个

    else:
        print(f'页面未找到: {url}')
        continue  # 如果第一页无法访问，跳过该 postcode

    # 如果第一页有数据，继续爬取该 postcode 的其他页面
    for listing_id in range(1, 1001):
        url = f'{base_url}/{city_url}/{postcode}/rent/{listing_id}'
        print(f'正在爬取: {url}')
        
        # 发送请求获取网页内容
        response = requests.get(url, headers=headers)
        
        # 检查响应状态码
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, 'html.parser')
            
            # 假设页面有房产信息，提取地址、价格等信息
            property_div = soup.find('div', class_='property')
            
            if not property_div:
                print(f'页面无有效房产信息，跳过: {url}')
                continue  # 如果当前页面没有数据，跳过该页面

            # 提取经纬度信息
            latitude = property_div.get('data-lat', 'No latitude info')
            longitude = property_div.get('data-lng', 'No longitude info')
            
            # 提取地址
            address = property_div.find('h2', class_='address').text.strip() if property_div.find('h2', class_='address') else 'No address'

            # 提取卧室数量
            bedrooms = property_div.find('p', class_='property-meta bed').text.strip() if property_div.find('p', class_='property-meta bed') else 'No bedroom info'

            # 提取卫生间数量
            bathrooms = property_div.find('p', class_='property-meta bath').text.strip() if property_div.find('p', class_='property-meta bath') else 'No bathroom info'

            # 提取房产类型
            property_type = property_div.find('p', class_='property-meta type').text.strip() if property_div.find('p', class_='property-meta type') else 'No type info'

            # 提取最后广告价格
            last_price = property_div.find('section', class_='price').find('h3').text.strip() if property_div.find('section', class_='price') else 'No price info'

            # 提取历史价格信息
            historical_prices_section = property_div.find('section', class_='historical-price')
            historical_prices = historical_prices_section.find_all('li') if historical_prices_section else []

            # 遍历历史价格
            price_history = []
            for price in historical_prices:
                date = price.find('span').text.strip()
                amount = price.contents[1].strip()  # 价格通常是span之后的文本
                price_history.append((date, amount))

            # 打印房产信息
            print(f'地址: {address}')
            print(f'经度: {latitude}, 纬度: {longitude}')
            print(f'卧室数量: {bedrooms}')
            print(f'卫生间数量: {bathrooms}')
            print(f'房产类型: {property_type}')
            print(f'最后广告价格: {last_price}')
            print("历史价格:")
            for date, amount in price_history:
                print(f"日期: {date}, 价格: {amount}")
            print('-' * 40)

        else:
            print(f'页面未找到: {url}')



正在检查第一页数据: https://www.oldlistings.com.au/real-estate/VIC/Australian+Capital+Territory/Home/rent/1
页面未找到: https://www.oldlistings.com.au/real-estate/VIC/Australian+Capital+Territory/Home/rent/1
正在检查第一页数据: https://www.oldlistings.com.au/real-estate/VIC/New+South+Wales/Home/rent/1
页面未找到: https://www.oldlistings.com.au/real-estate/VIC/New+South+Wales/Home/rent/1
正在检查第一页数据: https://www.oldlistings.com.au/real-estate/VIC/Northern+Territory/Home/rent/1
页面未找到: https://www.oldlistings.com.au/real-estate/VIC/Northern+Territory/Home/rent/1
正在检查第一页数据: https://www.oldlistings.com.au/real-estate/VIC/Queensland/Home/rent/1
页面未找到: https://www.oldlistings.com.au/real-estate/VIC/Queensland/Home/rent/1
正在检查第一页数据: https://www.oldlistings.com.au/real-estate/VIC/South+Australia/Home/rent/1
页面未找到: https://www.oldlistings.com.au/real-estate/VIC/South+Australia/Home/rent/1
正在检查第一页数据: https://www.oldlistings.com.au/real-estate/VIC/Tasmania/Home/rent/1
页面未找到: https://www.oldlistings.com.au/real-estate/VIC/Tasma

KeyboardInterrupt: 