In [4]:
import glob
import xarray as xr
import numpy as np
from datetime import datetime
import re
from collections import defaultdict
import os
# ---------------------------
# 1. 利用 glob 获取所有目标文件并排序
# ---------------------------
patterns = ['/N/project/Zli_lab/gongg/CONUS404_data/regrid_cons/*',]

all_files = []
for pattern in patterns:
    all_files.extend(glob.glob(pattern))
all_files.sort()

In [6]:
all_files[80:96]

['/N/project/Zli_lab/gongg/CONUS404_data/regrid_cons/19800820.nc',
 '/N/project/Zli_lab/gongg/CONUS404_data/regrid_cons/19800821.nc',
 '/N/project/Zli_lab/gongg/CONUS404_data/regrid_cons/19800822.nc',
 '/N/project/Zli_lab/gongg/CONUS404_data/regrid_cons/19800823.nc',
 '/N/project/Zli_lab/gongg/CONUS404_data/regrid_cons/19800824.nc',
 '/N/project/Zli_lab/gongg/CONUS404_data/regrid_cons/19800825.nc',
 '/N/project/Zli_lab/gongg/CONUS404_data/regrid_cons/19800826.nc',
 '/N/project/Zli_lab/gongg/CONUS404_data/regrid_cons/19800827.nc',
 '/N/project/Zli_lab/gongg/CONUS404_data/regrid_cons/19800828.nc',
 '/N/project/Zli_lab/gongg/CONUS404_data/regrid_cons/19800829.nc',
 '/N/project/Zli_lab/gongg/CONUS404_data/regrid_cons/19800830.nc',
 '/N/project/Zli_lab/gongg/CONUS404_data/regrid_cons/19800831.nc',
 '/N/project/Zli_lab/gongg/CONUS404_data/regrid_cons/19800901.nc',
 '/N/project/Zli_lab/gongg/CONUS404_data/regrid_cons/19800902.nc',
 '/N/project/Zli_lab/gongg/CONUS404_data/regrid_cons/19800903.

In [11]:
month_groups = defaultdict(list)
pattern = re.compile(r".*/(\d{8})\.nc$")  # 用于从文件路径中提取形如 20020601 的日期

for fpath in all_files:
    match = pattern.match(fpath)
    if match:
        date_str = match.group(1)  # '20020601'
        year = date_str[:4]        # '2002'
        month = date_str[4:6]      # '06'
        # day = date_str[6:]       # '01'
        month_groups[(year, month)].append(fpath)

# 3. 打开参考数据 ds_ref，用来做空间掩膜
ds_ref = xr.open_dataset("/N/project/Zli_lab/gongg/regrid/ref_.1deg.nc")

# 定义经度区间及对应的 UTC 偏移量
lon_ranges = [(-np.inf, -112.5), (-112.5, -97.5), (-97.5, -82.5), (-82.5, np.inf)]
utc_offsets = [-8, -7, -6, -5]

# 设定输出目录
output_dir = "/N/project/Zli_lab/gongg/CONUS404_data/LST0.1"
os.makedirs(output_dir, exist_ok=True) 

In [12]:
global_counter = 0

for (year, month), file_list in sorted(month_groups.items()):
    ds_CONUS = xr.open_mfdataset(file_list, combine='by_coords')
    mask_nan = ds_ref["tp"].isel(time=0).isnull()
    ds_CONUS = ds_CONUS.where(~mask_nan)
    for (lon_min, lon_max), offset in zip(lon_ranges, utc_offsets):
        mask = (ds_CONUS.lon >= lon_min) & (ds_CONUS.lon < lon_max)
        ds_lon_subset = ds_CONUS.where(mask, drop=True)
        if ds_lon_subset.lon.size > 0 and ds_lon_subset.lat.size > 0:
            original_times = ds_lon_subset.time
            adjusted_times = original_times + np.timedelta64(offset, 'h')
            ds_lon_subset = ds_lon_subset.assign_coords(time=adjusted_times)
            output_filename = f"{year}{month}_U{offset:+d}.nc"
            output_path = os.path.join(output_dir, output_filename)
            ds_lon_subset.to_netcdf(output_path)
            global_counter += 1
        if global_counter % 100 == 0:
            print(datetime.now())

2025-03-29 00:20:56.276884
2025-03-29 00:23:07.133153
2025-03-29 00:25:19.283529
2025-03-29 00:27:44.928937
2025-03-29 00:29:49.642304
2025-03-29 00:31:56.922817


In [20]:
# 输入与输出目录
input_dir = "/N/project/Zli_lab/gongg/CONUS404_data/LST0.1"
output_dir = "/N/project/Zli_lab/gongg/CONUS404_data/JJA_LST0.1"

# 获取输入目录下所有 .nc 文件
all_files = glob.glob(os.path.join(input_dir, "*.nc"))

# 根据文件名按 (year, utc_offset) 分组
groups = {}
for file in all_files:
    basename = os.path.basename(file)  # 如 "200506_U-8.nc"
    name, ext = os.path.splitext(basename)  # name: "200506_U-8"
    # year 为前4位
    year = name[:4]
    # utc 部分：通过下划线分隔，第二部分如 "U-8"
    parts = name.split('_')
    if len(parts) < 2:
        continue
    utc_str = parts[1]  # e.g., "U-8"
    # 去掉 'U' 后转成 int（这里 -8 表示 UTC-8）
    offset = int(utc_str.replace('U', ''))
    key = (year, offset)
    groups.setdefault(key, []).append(file)


# 遍历每个分组，读取组合数据，筛选6、7、8月，并保存到指定目录
counter = 0
for (year, offset), files in groups.items():
    ds = xr.open_mfdataset(files, combine='by_coords')
    ds_jja = ds.where(ds.time.dt.month.isin([6, 7, 8]), drop=True)

    ds_jja = ds_jja.rename({"PREC_ACC_NC": "tp"})
    
    output_filename = f"{year}_U_{abs(offset)}.nc"
    output_path = os.path.join(output_dir, output_filename)
    ds_jja.to_netcdf(output_path)
    
    counter += 1
    if counter % 10 == 0:
        print(datetime.now())

2025-03-29 00:43:00.397297
2025-03-29 00:43:11.562587
2025-03-29 00:43:21.935539
2025-03-29 00:43:32.668595
2025-03-29 00:43:45.059444
2025-03-29 00:43:57.333292
2025-03-29 00:44:08.333224
2025-03-29 00:44:19.799225
2025-03-29 00:44:30.529520
2025-03-29 00:44:42.087037
2025-03-29 00:44:53.127890
2025-03-29 00:45:05.131532
2025-03-29 00:45:16.699286
2025-03-29 00:45:27.616329
2025-03-29 00:45:38.311645
2025-03-29 00:45:49.950216
2025-03-29 00:46:00.369426


In [21]:
dss = xr.open_mfdataset('/N/project/Zli_lab/gongg/CONUS404_data/JJA_LST0.1/2022*')