In [1]:
import json
import csv

# 读取本地 JSON 文件
def read_json_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        return json.load(f)

# 筛选数据函数
def filter_data(data):
    filtered_data = []
    for item in data:
        if '山海关' in item["Name"] or '山海关' in item["Address"]:
            filtered_data.append(item)
    return filtered_data

# 将数据保存为 CSV 文件
def save_to_csv(data, file_path):
    # 提取字段名（表头）
    fieldnames = ["Name", "Website", "Address", "Phone", "Latitude", "Longitude", "City"]
    
    # 写入 CSV 文件
    with open(file_path, 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()  # 写入表头
        for row in data:
            writer.writerow(row)  # 写入每一行数据

# 主函数
def main():
    # 指定你的文件路径
    json_file_path = 'output.json'  # 这里的 'data.json' 替换对应的json文件路径

    # 读取 JSON 数据
    data = read_json_file(json_file_path)

    # 筛选数据
    cleaned_data = filter_data(data)

    # 输出清洗后的数据（可选）
    print(json.dumps(cleaned_data, ensure_ascii=False, indent=2))

    # 保存清洗后的数据到新的 JSON 文件
    with open('cleaned_data.json', 'w', encoding='utf-8') as f:
        json.dump(cleaned_data, f, ensure_ascii=False, indent=2)

    # 保存清洗后的数据到 CSV 文件
    save_to_csv(cleaned_data, 'cleaned_data.csv')

    print("清洗后的数据已保存到 'cleaned_data.json' 和 'cleaned_data.csv' 文件中。")

if __name__ == "__main__":
    main()


[
  {
    "Name": "大悲院",
    "Website": "https://you.ctrip.com/sight/qinhuangdao132/123777.html?scene=online",
    "Address": "河北省秦皇岛市山海关区古城东北片区",
    "Phone": null,
    "Latitude": null,
    "Longitude": null,
    "City": "QingHuangDao"
  },
  {
    "Name": "燕塞湖景区-凉亭",
    "Website": "https://you.ctrip.com/sight/qinhuangdao132/67629241.html?scene=online",
    "Address": "秦皇岛市山海关区城西北7华里燕塞湖景区内",
    "Phone": null,
    "Latitude": null,
    "Longitude": null,
    "City": "QingHuangDao"
  },
  {
    "Name": "燕塞湖鸟语林",
    "Website": "https://you.ctrip.com/sight/qinhuangdao132/5039.html?scene=online",
    "Address": "秦皇岛山海关城西北3.5公里处",
    "Phone": "0335-5158828",
    "Latitude": null,
    "Longitude": null,
    "City": "QingHuangDao"
  },
  {
    "Name": "五佛山国际生态旅游度假区",
    "Website": "https://you.ctrip.com/sight/qinhuangdao132/123747.html?scene=online",
    "Address": "河北省秦皇岛市山海关区附近长寿山景区内",
    "Phone": null,
    "Latitude": null,
    "Longitude": null,
    "City": "QingHuangDao"
  },
  {


In [5]:
import json
import requests
import time

# 百度地图 API 地址和密钥
BAIDU_MAP_API_URL = "http://api.map.baidu.com/geocoding/v3/"
BAIDU_API_KEY = "YOZjzHZx5FBFPoy17TzmSaGmJVtjeZsz"  # 替换为百度地图 API 密钥

# 读取 JSON 文件
def read_json_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        return json.load(f)

# 写入 JSON 文件
def write_json_file(data, file_path):
    with open(file_path, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=2)

# 获取地址的经纬度
def get_latitude_longitude(address):
    params = {
        'address': address,
        'ak': BAIDU_API_KEY,
        'output': 'json'
    }
    try:
        response = requests.get(BAIDU_MAP_API_URL, params=params)
        result = response.json()

        if result['status'] == 0:  # 请求成功
            location = result['result']['location']
            return location['lat'], location['lng']
        else:
            print(f"获取地址 {address} 的经纬度失败，错误信息: {result['msg']}")
            return None, None
    except Exception as e:
        print(f"请求百度地图 API 时出错: {e}")
        return None, None

# 更新数据中的经纬度
def update_latitude_longitude(data):
    for item in data:
        if item["Address"]:
            lat, lng = get_latitude_longitude(item["Name"] + item["Address"])
            item["Latitude"] = lat
            item["Longitude"] = lng
            print("Name: ", item["Name"],"  Address: ",item["Address"] ,"  Latitude: " , lat, "  Longitude: " ,lng)
            time.sleep(2)  # 百度地图 API 有访问频率限制，暂停 2 秒避免过于频繁的请求
    return data

# 主函数
def main():
    # 读取原始 JSON 数据
    json_file_path = 'c:\\Users\\86177\\Desktop\\scrapy\\景点\\attraction\\cleaned_data.json'  # 请确保文件路径正确
    data = read_json_file(json_file_path)

    # 更新经纬度信息
    updated_data = update_latitude_longitude(data)

    # 将更新后的数据写回 JSON 文件
    write_json_file(updated_data, 'updated_data.json')

    print("经纬度已更新并保存至 'updated_data.json' 文件。")

if __name__ == "__main__":
    main()


Name:  大悲院   Address:  河北省秦皇岛市山海关区古城东北片区   Latitude:  40.01036289252789   Longitude:  119.7679244359144
Name:  燕塞湖景区-凉亭   Address:  秦皇岛市山海关区城西北7华里燕塞湖景区内   Latitude:  40.04365283669972   Longitude:  119.7129434675151
Name:  燕塞湖鸟语林   Address:  秦皇岛山海关城西北3.5公里处   Latitude:  40.04950910514002   Longitude:  119.71105909204682
Name:  五佛山国际生态旅游度假区   Address:  河北省秦皇岛市山海关区附近长寿山景区内   Latitude:  40.07349606313338   Longitude:  119.74691100393333
Name:  山海关长城文化奇观园   Address:  河北省秦皇岛市山海关区第一关镇老龙头景区   Latitude:  39.97531919572714   Longitude:  119.8049934619364
Name:  山海关总兵府   Address:  秦皇岛市山海关区内古城西大街16号   Latitude:  40.01498873566932   Longitude:  119.76155724645969
Name:  山海关长城文化奇观园-山海行宫   Address:  秦皇岛市山海关区龙海大道1号   Latitude:  39.97329044592302   Longitude:  119.8034495442564
Name:  秦宫   Address:  秦皇岛市山海关区城东约6公里的望夫石村后山岗上孟姜女庙景区内   Latitude:  40.04253409379664   Longitude:  119.82935802262243
Name:  角山景区   Address:  秦皇岛市山海关区角山路山海关城北约3公里   Latitude:  40.05254605676165   Longitude:  119.7410960028756
Na

In [9]:
json_file_path = 'updated_data.json'
update_data = read_json_file(json_file_path)
print(update_data)

# 将数据保存为 CSV 文件，确保 Excel 正确读取 UTF-8 编码
def save_to_csvfile(data, file_path):
    # 提取字段名（表头）
    fieldnames = ["Name", "Website", "Address", "Phone", "Latitude", "Longitude", "City"]
    
    # 使用 UTF-8 编码并写入 BOM 以确保 Excel 正确识别
    with open(file_path, 'w', newline='', encoding='utf-8-sig') as f:  # 使用 'utf-8-sig' 编码
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()  # 写入表头
        for row in data:
            writer.writerow(row)  # 写入每一行数据

save_to_csvfile(update_data, 'updated_data.csv')


[{'Name': '大悲院', 'Website': 'https://you.ctrip.com/sight/qinhuangdao132/123777.html?scene=online', 'Address': '河北省秦皇岛市山海关区古城东北片区', 'Phone': None, 'Latitude': 40.01036289252789, 'Longitude': 119.7679244359144, 'City': 'QingHuangDao'}, {'Name': '燕塞湖景区-凉亭', 'Website': 'https://you.ctrip.com/sight/qinhuangdao132/67629241.html?scene=online', 'Address': '秦皇岛市山海关区城西北7华里燕塞湖景区内', 'Phone': None, 'Latitude': 40.04365283669972, 'Longitude': 119.7129434675151, 'City': 'QingHuangDao'}, {'Name': '燕塞湖鸟语林', 'Website': 'https://you.ctrip.com/sight/qinhuangdao132/5039.html?scene=online', 'Address': '秦皇岛山海关城西北3.5公里处', 'Phone': '0335-5158828', 'Latitude': 40.04950910514002, 'Longitude': 119.71105909204682, 'City': 'QingHuangDao'}, {'Name': '五佛山国际生态旅游度假区', 'Website': 'https://you.ctrip.com/sight/qinhuangdao132/123747.html?scene=online', 'Address': '河北省秦皇岛市山海关区附近长寿山景区内', 'Phone': None, 'Latitude': 40.07349606313338, 'Longitude': 119.74691100393333, 'City': 'QingHuangDao'}, {'Name': '山海关长城文化奇观园', 'Website': '