# School and Hospital external dataset

API_keys:AIzaSyCN_HvPAmTVnCRkeJybUm7wee9YvTYWCcs

In [2]:
import requests
import pandas as pd
import time
import os

# Google Maps API密钥
API_KEY = 'AIzaSyCN_HvPAmTVnCRkeJybUm7wee9YvTYWCcs'  # 请将这个值替换为您的实际API密钥

# 搜索地点的基础URL
places_url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"

# API每日调用限制
MAX_DAILY_CALLS = 6000  # 每天最大调用次数
CALLS_MADE = 0  # 已经调用的次数

# 维多利亚州的经纬度范围
VIC_LAT_MIN = -39.159  # 南纬
VIC_LAT_MAX = -33.981  # 北纬
VIC_LNG_MIN = 140.961  # 西经
VIC_LNG_MAX = 149.976  # 东经

# 网格化划分维多利亚州的区域
GRID_STEP = 0.05  # 经纬度间隔

# 搜索医院和学校的函数
def get_places_data(keyword, location, radius=5000):
    global CALLS_MADE
    if CALLS_MADE >= MAX_DAILY_CALLS:
        print("已达到每日API调用限制，暂停操作。")
        return None

    params = {
        'location': location,  # 经纬度
        'radius': radius,  # 搜索半径，单位为米
        'type': keyword,  # 类型，例如 'hospital' 或 'school'
        'key': API_KEY
    }

    response = requests.get(places_url, params=params)
    CALLS_MADE += 1  # 每次API调用计数

    if response.status_code == 200:
        return response.json().get('results', [])
    else:
        print(f"Error: {response.status_code}")
        return None

# 将结果转换为DataFrame
def places_to_dataframe(places_data):
    if not places_data:
        return pd.DataFrame()

    places_list = []
    for place in places_data:
        place_info = {
            'name': place['name'],
            'address': place.get('vicinity'),
            'lat': place['geometry']['location']['lat'],
            'lng': place['geometry']['location']['lng'],
            'place_id': place['place_id']
        }
        places_list.append(place_info)
    
    return pd.DataFrame(places_list)

# 生成维多利亚州范围内的经纬度网格
def generate_grid():
    latitudes = [VIC_LAT_MIN + i * GRID_STEP for i in range(int((VIC_LAT_MAX - VIC_LAT_MIN) / GRID_STEP) + 1)]
    longitudes = [VIC_LNG_MIN + i * GRID_STEP for i in range(int((VIC_LNG_MAX - VIC_LNG_MIN) / GRID_STEP) + 1)]
    
    grid = []
    for lat in latitudes:
        for lng in longitudes:
            grid.append(f"{lat},{lng}")
    
    return grid

# 批量获取数据并控制每日调用次数
def fetch_places_for_victoria(grid_locations):
    all_places = []

    for location in grid_locations:
        if CALLS_MADE >= MAX_DAILY_CALLS:
            print(f"已达到每日最大调用次数 {MAX_DAILY_CALLS}，明天继续爬取数据。")
            break

        # 获取医院数据
        hospitals = get_places_data('hospital', location)
        if hospitals:
            all_places.extend(hospitals)
        
        # 获取学校数据
        schools = get_places_data('school', location)
        if schools:
            all_places.extend(schools)
        
        # 控制请求频率，防止被限流
        time.sleep(2)  # 每次调用后暂停2秒，避免触发限流

    return places_to_dataframe(all_places)

# 生成维多利亚州的网格经纬度
grid_locations = generate_grid()

# 获取数据
df_all_places = fetch_places_for_victoria(grid_locations)

# 检查并创建保存数据的文件夹
output_folder = '../data/landing/External_data/school_hospital_data'
os.makedirs(output_folder, exist_ok=True)

# 保存结果到指定的文件夹
output_path = os.path.join(output_folder, 'hospitals_and_schools_victoria.csv')
df_all_places.to_csv(output_path, index=False)

print(f"今天已爬取 {CALLS_MADE} 次API调用，数据已保存到 {output_path}。")


已达到每日最大调用次数 6000，明天继续爬取数据。
今天已爬取 6000 次API调用，数据已保存到 ../data/landing/External_data/school_hospital_data/hospitals_and_schools_victoria.csv。
