In [39]:
import os
import pandas as pd 
import numpy as np  # 新增，用于计算平均值
import json
from datetime import datetime
def safe_filename(name: str) -> str:
    """将名称中的特殊字符替换为下划线"""
    return name.replace(" ", "_").replace(",", "_").replace("(", "_").replace(")", "_")

src_path = "F:\DACHUANG_NETWORK\data\geo_info"
station_geo_info_path = "F:\DACHUANG_NETWORK\data\stations2uid.csv"
list_files = os.listdir(src_path)

# 读取站点地理信息
station2geo = pd.read_csv(station_geo_info_path, encoding='utf-8')
station2geo["Station Name"] = station2geo["Station Name"].apply(safe_filename)

# 初始化数据结构
#nested_data (dict): 嵌套数据，格式为 {时间戳: [{站点名称: [臭氧浓度, 经度, 纬度]}]}
nested_data = {}
station2geo_dict = {}
station2ozone_dict = {}
timestamps = set()

# 第一遍遍历：收集所有时间戳和各站点数据
for file in list_files:
    station_name = file.split(".")[0]
    station_data = pd.read_csv(os.path.join(src_path, file), encoding='utf-8')
    station_data["Time"] = station_data["Time"].apply(
         lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S").strftime("%Y-%m-%d-%H")
    )
    # 收集时间戳
    timestamps.update(station_data["Time"])
    
    # 存储站点臭氧数据（时间->值映射）
    station2ozone_dict[station_name] = dict(zip(station_data["Time"], station_data["Ozone (O₃)"]))
    
    # 存储站点地理信息
    station_info = station2geo[station2geo["Station Name"] == station_name].iloc[0]
    station2geo_dict[station_name] = [station_info["Longitude"], station_info["Latitude"]]

# 排序时间戳
timestamps = sorted(timestamps)

# 第二遍遍历：为每个站点补充缺失的时间戳，并用平均值填充
for station_name in station2ozone_dict:
    # 获取该站点原始数据
    original_data = station2ozone_dict[station_name]
    
    # 计算该站点的臭氧平均值（忽略None/NaN）
    ozone_values = list(original_data.values())
    valid_values = [v for v in ozone_values if v is not None and not np.isnan(v)]  # 过滤无效值
    mean_ozone = np.mean(valid_values) if valid_values else 0  # 如果没有有效数据，默认用0
    
    # 创建完整的时间序列数据（缺失值用平均值填充）
    complete_data = {ts: original_data.get(ts, mean_ozone) for ts in timestamps}
    new_data = list(complete_data.values())
    # 更新字典
    station2ozone_dict[station_name] = new_data
    print(f"站点 {station_name} 的臭氧数据已补全，长度为 {len(complete_data)}")
    print(f"站点 {station_name}的臭氧数据示例: {list(complete_data.items())[:20]}")

print("所有时间戳:", timestamps)
print("站点地理信息:", station2geo_dict)
print("站点臭氧数据示例:", {k: v[:3] for k,v in station2ozone_dict.items()})

站点 Henggang__Shenzhen__龙岗区_横岗街道_ 的臭氧数据已补全，长度为 846
站点 Henggang__Shenzhen__龙岗区_横岗街道_的臭氧数据示例: [('2025-03-06-00', 34.29297789336801), ('2025-03-06-01', 7.4), ('2025-03-06-02', 8.6), ('2025-03-06-09', 34.29297789336801), ('2025-03-06-12', 34.29297789336801), ('2025-03-06-13', 15.5), ('2025-03-07-04', 14.3), ('2025-03-07-05', 34.29297789336801), ('2025-03-07-06', 12.6), ('2025-03-07-08', 34.29297789336801), ('2025-03-07-09', 34.29297789336801), ('2025-03-07-10', 7.8), ('2025-03-07-12', 34.29297789336801), ('2025-03-07-13', 9.8), ('2025-03-07-14', 34.29297789336801), ('2025-03-07-15', 14.3), ('2025-03-15-20', 34.29297789336801), ('2025-03-15-21', 34.29297789336801), ('2025-03-15-22', 15.5), ('2025-03-15-23', 14.3)]
站点 Honghu__Shenzhen__罗湖区_笋岗街道_ 的臭氧数据已补全，长度为 846
站点 Honghu__Shenzhen__罗湖区_笋岗街道_的臭氧数据示例: [('2025-03-06-00', 1.7), ('2025-03-06-01', 1.7), ('2025-03-06-02', 5.7), ('2025-03-06-09', 30.129232804232807), ('2025-03-06-12', 30.129232804232807), ('2025-03-06-13', 7.8), ('2025-03-07-04', 11

In [40]:
# 生成格式为{时间戳: {站点名称: [臭氧浓度, 经度, 纬度]}}的原始数据
for index, timestamp in enumerate(timestamps):
    if timestamp not in nested_data:
        nested_data[timestamp] = {}  # 初始化空字典
    for station_name, ozone in station2ozone_dict.items():
        print(station_name, ozone[index], index)
        nested_data[timestamp][station_name] = [
            ozone[index], 
            station2geo_dict[station_name][0], 
            station2geo_dict[station_name][1]
        ]
print(nested_data)
# 将嵌套数据转换为JSON格式
json_data = json.dumps(nested_data, ensure_ascii=False, indent=4)
# 将JSON数据写入文件
output_path = "F:\DACHUANG_NETWORK\data\JSON_data_for_interpolating.json"
with open(output_path, "w", encoding="utf-8") as json_file:
    json_file.write(json_data)

Henggang__Shenzhen__龙岗区_横岗街道_ 34.29297789336801 0
Honghu__Shenzhen__罗湖区_笋岗街道_ 1.7 0
OCT__Shenzhen__南山区_沙河街道_ 29.500259403372244 0
Shenzhen__深圳_ 41.84674329501916 0
Yantian__Shenzhen__盐田区_海山街道_ 3.3 0
民治__Shenzhen__龙华区_民治街道_ 28.669250645994833 0
泥岗____深圳市泥岗_ 2.1 0
深南____深圳市深南_ 0.5 0
滨海____深圳市滨海_ 20.4 0
莲花__Shenzhen__福田区_莲花街道_ 1.3 0
通心岭子站____深圳市通心岭子站_ 1.3 0
Henggang__Shenzhen__龙岗区_横岗街道_ 7.4 1
Honghu__Shenzhen__罗湖区_笋岗街道_ 1.7 1
OCT__Shenzhen__南山区_沙河街道_ 0.5 1
Shenzhen__深圳_ 9.0 1
Yantian__Shenzhen__盐田区_海山街道_ 9.4 1
民治__Shenzhen__龙华区_民治街道_ 3.3 1
泥岗____深圳市泥岗_ 5.7 1
深南____深圳市深南_ 26.61900584795322 1
滨海____深圳市滨海_ 35.3073381294964 1
莲花__Shenzhen__福田区_莲花街道_ 31.972778561354016 1
通心岭子站____深圳市通心岭子站_ 31.972778561354016 1
Henggang__Shenzhen__龙岗区_横岗街道_ 8.6 2
Honghu__Shenzhen__罗湖区_笋岗街道_ 5.7 2
OCT__Shenzhen__南山区_沙河街道_ 1.3 2
Shenzhen__深圳_ 10.6 2
Yantian__Shenzhen__盐田区_海山街道_ 26.015364583333334 2
民治__Shenzhen__龙华区_民治街道_ 7.0 2
泥岗____深圳市泥岗_ 28.979039301310042 2
深南____深圳市深南_ 4.1 2
滨海____深圳市滨海_ 28.1 2
莲花__Shenzhen_