In [None]:
import numpy as np
import xarray as xr
import rasterio
from scipy.interpolate import griddata
from datetime import datetime
import os

# 读取极端降水指数数据
extreme_pr_path = 'G:\\lizhi_4090\\D\\lizhi\\guangdong_extreme_pre\\extreme_precipitation_indices_seasonal.nc'
extreme_pr_ds = xr.open_dataset(extreme_pr_path)

lat = extreme_pr_ds['lat'].values
lon = extreme_pr_ds['lon'].values
years = np.arange(2000, 2023)  # 注意这里范围是2000-2022（需要保证冬季1999年数据存在）
season_indices = [0, 1, 2, 3]  # 对应MAM, JJA, SON, DJF

# 定义广东略大一些的经纬度范围
lat_min, lat_max = 19, 30
lon_min, lon_max = 105, 120

# 读取并裁剪人口数据（保持原函数不变）
def read_and_clip_population_data(year):
    pop_year = int(year)
    pop_path = f'G:\\lizhi_4090\\D\\CMIP6extreme\\00 - CN051-2022\\extreme_pr\\人口暴露\\landscan-global-{pop_year}-colorized.tif'
    
    if not os.path.exists(pop_path):
        raise FileNotFoundError(f"Population data for year {pop_year} not found at {pop_path}")

    with rasterio.open(pop_path) as src:
        pop_data = src.read(1, masked=True)
        transform = src.transform
        pop_lon, pop_lat = np.meshgrid(
            np.arange(pop_data.shape[1]) * transform[0] + transform[2],
            np.arange(pop_data.shape[0]) * transform[4] + transform[5]
        )
        mask = (pop_lat >= lat_min) & (pop_lat <= lat_max) & (pop_lon >= lon_min) & (pop_lon <= lon_max)
        pop_lat = pop_lat[mask]
        pop_lon = pop_lon[mask]
        pop_data = pop_data[mask]
    return pop_lat, pop_lon, pop_data

# 插值人口数据到极端降水数据的网格上（保持原函数不变）
def interpolate_population_to_precip_grid(pop_lat, pop_lon, pop_data, precip_lat, precip_lon):
    points = np.array([pop_lon.flatten(), pop_lat.flatten()]).T
    grid_x, grid_y = np.meshgrid(precip_lon, precip_lat)
    grid_data = griddata(points, pop_data.flatten(), (grid_x, grid_y), method='linear')
    return grid_data

# 初始化暴露矩阵
exposure_indices = ['R20', 'PRCPTOT', 'R10']
pop_exposure = {index: np.zeros((len(years), 4, len(lat), len(lon)), dtype=np.float32) for index in exposure_indices}

for i, year in enumerate(years):
    print(f'Processing year: {year}')
    pop_lat, pop_lon, pop_data = read_and_clip_population_data(year)
    interpolated_pop = interpolate_population_to_precip_grid(pop_lat, pop_lon, pop_data, lat, lon)

    for index in exposure_indices:
        for season_idx in season_indices:
            # 季节索引3对应DJF，需要取前一年数据
            if season_idx == 3:
                precip_year = year - 1
            else:
                precip_year = year

            # 确保年份在可用范围内
            if precip_year not in extreme_pr_ds['year']:
                print(f"Warning: {precip_year} not available for {index} season_idx {season_idx}")
                continue

            # 使用isel选择季节索引
            precip_data = extreme_pr_ds[index].isel(season=season_idx).sel(year=precip_year).values.astype(np.float32)
            pop_exposure[index][i, season_idx, :, :] = precip_data * interpolated_pop

# 创建Xarray Dataset并保存为NetCDF文件
output_filename = 'population_exposure_2000_2022_adjusted.nc'
ds = xr.Dataset(
    {index: (('year', 'season', 'lat', 'lon'), pop_exposure[index]) for index in exposure_indices},
    coords={
        'year': years,
        'season': np.arange(4),  # season维度使用0-3索引
        'lat': lat,
        'lon': lon
    }
)

# 添加属性
ds['season'].attrs['season_names'] = '0:MAM, 1:JJA, 2:SON, 3:DJF'  # 添加季节解释
ds.year.attrs['units'] = 'year'
ds.lat.attrs['units'] = 'degrees_north'
ds.lon.attrs['units'] = 'degrees_east'
for index in exposure_indices:
    ds[index].attrs['units'] = 'people_exposed'

ds.attrs['title'] = 'Population Exposure to Extreme Precipitation (Seasonal, 2000-2022, Adjusted Winter)'
ds.attrs['history'] = f'Created {datetime.utcnow().isoformat()}'

ds.to_netcdf(output_filename)
print(f'Population exposure data for 2000-2022 has been saved to {output_filename}')


In [5]:
import numpy as np
import xarray as xr
import rasterio
from scipy.interpolate import griddata
from datetime import datetime
import os
import geopandas as gpd
from shapely.geometry import Point
import pandas as pd



# ================== 区域平均处理部分 ==================
print("\n开始计算广东省区域平均...")
years = np.arange(2000, 2023)  
# 1. 定义需要处理的指数列表（修复NameError）
exposure_indices = ['R20', 'PRCPTOT', 'R10']  # 新增定义

# 2. 读取生成的NetCDF文件
ds = xr.open_dataset('population_exposure_2000_2022_adjusted.nc')

# 3. 读取广东省边界
gd_shape = gpd.read_file(r'G:\lizhi_4090\D\lizhi\guangdong_extreme_pre\广东省\广东省市.shp')


if hasattr(gd_shape, 'union_all'):  # 兼容新旧版本
    union_geom = gd_shape.union_all()
else:
    union_geom = gd_shape.unary_union  # 回退旧方法

# 5. 创建空间掩膜
lon = ds.lon.values
lat = ds.lat.values

# 生成网格点坐标
lon_grid, lat_grid = np.meshgrid(lon, lat)
points = np.vstack([lon_grid.ravel(), lat_grid.ravel()]).T

# 转换为地理点
gdf_points = gpd.GeoDataFrame(
    geometry=[Point(x, y) for x, y in points],
    crs="EPSG:4326"
)

# 空间查询
mask = gdf_points.within(union_geom).values.reshape(len(lat), len(lon))

# 4. 计算区域平均
season_names = ['MAM', 'JJA', 'SON', 'DJF']
results = []

for index in exposure_indices:
    print(f"处理指数 {index}...")
    
    # 获取四维数据 (year, season, lat, lon)
    data = ds[index].values
    
    # 遍历每个年份和季节
    for y_idx, year in enumerate(years):
        for s_idx in range(4):
            # 提取二维空间数据
            grid_data = data[y_idx, s_idx, :, :]
            
            # 应用掩膜并计算平均
            masked_data = grid_data[mask]
            valid_values = masked_data[~np.isnan(masked_data)]
            
            if len(valid_values) > 0:
                avg_value = np.mean(valid_values)
            else:
                avg_value = np.nan
                
            results.append({
                'Year': year,
                'Season': season_names[s_idx],
                'Index': index,
                'Exposure': avg_value
            })

# 5. 转换为数据框并保存
df = pd.DataFrame(results)
pivot_df = df.pivot_table(index=['Year', 'Season'], columns='Index', values='Exposure')
pivot_df.reset_index(inplace=True)

# 按季节顺序排序
season_order = {'MAM': 0, 'JJA': 1, 'SON': 2, 'DJF': 3}
pivot_df['Season_Order'] = pivot_df['Season'].map(season_order)
pivot_df.sort_values(['Year', 'Season_Order'], inplace=True)
pivot_df.drop('Season_Order', axis=1, inplace=True)

# 保存到Excel
output_excel = 'Guangdong_Seasonal_Exposure.xlsx'
pivot_df.to_excel(output_excel, index=False)
print(f"\n结果已保存至 {output_excel}")

# 打印前5行示例
print("\n结果示例：")
print(pivot_df.head())



开始计算广东省区域平均...
处理指数 R20...
处理指数 PRCPTOT...
处理指数 R10...

结果已保存至 Guangdong_Seasonal_Exposure.xlsx

结果示例：
Index  Year Season        PRCPTOT          R10          R20
2      2000    MAM  154712.000000  4476.210938  2385.129883
1      2000    JJA  167574.890625  5078.025879  2362.354004
3      2000    SON   75309.914062  2160.932861   924.707397
0      2000    DJF   29364.595703   719.122253   209.663712
6      2001    MAM  149177.250000  4699.975098  2577.628906


In [6]:
import numpy as np
import xarray as xr
import geopandas as gpd
from scipy import stats
from shapely.geometry import Point
import warnings
warnings.filterwarnings("ignore")

# 1. 读取数据并创建掩膜
def create_mask(ds, shapefile_path):
    """创建广东省空间掩膜"""
    # 读取省界
    gd_shape = gpd.read_file(shapefile_path)
    
    # 生成网格点
    lon = ds.lon.values
    lat = ds.lat.values
    lon_grid, lat_grid = np.meshgrid(lon, lat)
    points = np.vstack([lon_grid.ravel(), lat_grid.ravel()]).T
    
    # 空间查询
    gdf_points = gpd.GeoDataFrame(
        geometry=[Point(x, y) for x, y in points],
        crs="EPSG:4326"
    )
    mask = gdf_points.within(gd_shape.unary_union).values.reshape(len(lat), len(lon))
    
    return mask.astype(bool)

# 2. 趋势计算函数
def calculate_trend(data, years):
    """计算时间序列的线性趋势和p值"""
    valid_mask = ~np.isnan(data)
    y = data[valid_mask]
    
    if len(y) < 2:  # 至少需要2个有效数据点
        return np.nan, np.nan
    
    x = years[valid_mask]
    slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
    return slope * 10, p_value  # 返回10年趋势量级

# 3. 主处理流程
def process_trends(input_nc, output_nc, shapefile_path):
    # 读取数据
    ds = xr.open_dataset(input_nc)
    mask = create_mask(ds, shapefile_path)
    
    # 初始化存储数组
    trend = {}
    pvalue = {}
    for var in ['R20', 'PRCPTOT', 'R10']:
        trend[var] = np.full((4, *ds[var].shape[-2:]), np.nan, dtype=np.float32)
        pvalue[var] = np.full((4, *ds[var].shape[-2:]), np.nan, dtype=np.float32)
    
    # 获取时间序列
    years = ds.year.values.astype(int)
    
    # 遍历每个格点
    for lat_idx in range(len(ds.lat)):
        for lon_idx in range(len(ds.lon)):
            if not mask[lat_idx, lon_idx]:  # 仅处理广东范围内的点
                continue
                
            print(f"Processing grid: lat={ds.lat[lat_idx].item():.2f}, lon={ds.lon[lon_idx].item():.2f}")
            
            for var in ['R20', 'PRCPTOT', 'R10']:
                # 提取四维数据 (year, season, lat, lon)
                data = ds[var][:, :, lat_idx, lon_idx].values
                
                for season in range(4):
                    ts = data[:, season]
                    slope, p = calculate_trend(ts, years)
                    trend[var][season, lat_idx, lon_idx] = slope
                    pvalue[var][season, lat_idx, lon_idx] = p
    
    # 创建输出数据集
    coords = {
        'season': np.arange(4),
        'lat': ds.lat,
        'lon': ds.lon
    }
    
    ds_out = xr.Dataset(
        data_vars={
            **{f"{var}_trend": (['season', 'lat', 'lon'], trend[var]) for var in ['R20', 'PRCPTOT', 'R10']},
            **{f"{var}_pvalue": (['season', 'lat', 'lon'], pvalue[var]) for var in ['R20', 'PRCPTOT', 'R10']}
        },
        coords=coords
    )
    
    # 添加元数据
    ds_out['season'].attrs['description'] = '0:MAM, 1:JJA, 2:SON, 3:DJF'
    for var in ['R20', 'PRCPTOT', 'R10']:
        ds_out[f"{var}_trend"].attrs['units'] = 'people/year'
        ds_out[f"{var}_trend"].attrs['long_name'] = f'{var} decadal trend'
        ds_out[f"{var}_pvalue"].attrs['long_name'] = f'{var} trend significance p-value'
    
    ds_out.to_netcdf(output_nc)
    print(f"结果已保存至 {output_nc}")

# 4. 执行处理
if __name__ == "__main__":
    input_file = "population_exposure_2000_2022_adjusted.nc"
    output_file = "guangdong_trends_significance.nc"
    shapefile = r"G:\lizhi_4090\D\lizhi\guangdong_extreme_pre\广东省\广东省市.shp"
    
    process_trends(input_file, output_file, shapefile)


Processing grid: lat=20.25, lon=109.95
Processing grid: lat=20.25, lon=110.15
Processing grid: lat=20.35, lon=109.95
Processing grid: lat=20.35, lon=110.05
Processing grid: lat=20.35, lon=110.15
Processing grid: lat=20.35, lon=110.25
Processing grid: lat=20.35, lon=110.35
Processing grid: lat=20.35, lon=110.45
Processing grid: lat=20.45, lon=109.95
Processing grid: lat=20.45, lon=110.05
Processing grid: lat=20.45, lon=110.15
Processing grid: lat=20.45, lon=110.25
Processing grid: lat=20.45, lon=110.35
Processing grid: lat=20.45, lon=110.45
Processing grid: lat=20.55, lon=109.85
Processing grid: lat=20.55, lon=109.95
Processing grid: lat=20.55, lon=110.05
Processing grid: lat=20.55, lon=110.15
Processing grid: lat=20.55, lon=110.25
Processing grid: lat=20.55, lon=110.35
Processing grid: lat=20.55, lon=110.45
Processing grid: lat=20.65, lon=109.75
Processing grid: lat=20.65, lon=109.85
Processing grid: lat=20.65, lon=109.95
Processing grid: lat=20.65, lon=110.05
Processing grid: lat=20.6