In [None]:
#!/usr/bin/env python3
import os
import glob
import pandas as pd
import datetime
import numpy as np
from timezonefinder import TimezoneFinder
import pytz

In [1]:
import os
import pandas as pd
import numpy as np
import xarray as xr
from datetime import datetime
from timezonefinder import TimezoneFinder
import pytz
import glob

# --- Configuration ---
meta_path = "/p/largedata2/detectdata/CentralDB/projects/d05/working_directory/NeuralFAS/GRDC_Meta_AIFAS.txt"
input_dir = "/p/largedata2/detectdata/CentralDB/projects/d05/working_directory/NeuralFAS/inference_aifas/s_92"  # change to your actual input dir
output_dir = "/p/largedata2/detectdata/CentralDB/projects/d05/working_directory/NeuralFAS/s_92_local"
os.makedirs(output_dir, exist_ok=True)

# --- Read metadata ---
meta = pd.read_csv(meta_path, comment="#", header=None)




In [2]:
meta_path = "/p/largedata2/detectdata/CentralDB/projects/d05/working_directory/NeuralFAS/GRDC_Meta_AIFAS.txt"

selected_data = []  # Will hold (index_AIFAS, GRDC_ID, lat, lon)

with open(meta_path, "r", encoding="utf-8") as f:
    for line in f:
        line = line.strip()
        if not line or line.startswith("#"):
            continue  # Skip blank or comment lines

        fields = line.split(",")

        # Check if it has at least enough columns
        if len(fields) >= 9:
            index_aifas = fields[0]
            grdc_id = fields[1]
            lat = float(fields[7])
            lon = float(fields[8])
            selected_data.append((index_aifas, grdc_id, lat, lon))
        else:
            print("⚠️ Skipping malformed line:", line)

print(f"✅ Loaded {len(selected_data)} valid stations.")

✅ Loaded 3366 valid stations.


In [3]:
selected_data # 6836310

[('3212', 'GRDC_4210450', 72.525, -123.475),
 ('3011', 'GRDC_2999850', 72.025, 102.525),
 ('514', 'GRDC_2999150', 72.025, 114.025),
 ('515', 'GRDC_2999910', 71.875, 123.575),
 ('512', 'GRDC_2999110', 70.775, 136.125),
 ('3006', 'GRDC_2903420', 70.725, 127.425),
 ('841', 'GRDC_4101700', 70.475, -157.425),
 ('835', 'GRDC_4101400', 70.325, -149.025),
 ('839', 'GRDC_4101550', 70.225, -151.875),
 ('2706', 'GRDC_6730501', 70.075, 28.075),
 ('2705', 'GRDC_6730400', 69.875, 25.025),
 ('2720', 'GRDC_6731920', 69.825, 23.475),
 ('2743', 'GRDC_6830510', 69.825, 27.025),
 ('840', 'GRDC_4101600', 69.775, -154.675),
 ('2703', 'GRDC_6730100', 69.725, 29.425),
 ('834', 'GRDC_4101300', 69.675, -144.175),
 ('2704', 'GRDC_6730330', 69.675, 29.375),
 ('437', 'GRDC_2903981', 69.625, 147.425),
 ('513', 'GRDC_2999140', 69.575, 132.275),
 ('2742', 'GRDC_6830200', 69.525, 28.075),
 ('2719', 'GRDC_6731910', 69.475, 23.675),
 ('838', 'GRDC_4101501', 69.375, -152.075),
 ('3150', 'GRDC_4201010', 69.375, -139.525),

In [4]:
target_id = 'GRDC_6836310'

for i, (index_aifas, grdc_id, lat, lon) in enumerate(selected_data):
    if grdc_id == target_id:
        print(f"Found at position {i}: AIFAS index = {index_aifas}, lat = {lat}, lon = {lon}")
        break
else:
    print("GRDC_6836310 not found in selected_data.")

Found at position 1070: AIFAS index = 2750, lat = 49.725, lon = 6.475


In [5]:
from timezonefinder import TimezoneFinder
from zoneinfo import ZoneInfo
from datetime import datetime

tf = TimezoneFinder()
ref_date = datetime(2020, 1, 1)
tz_offsets = {}

for i, (_, grdc_id, lat, lon) in enumerate(selected_data):
    tz_str = tf.timezone_at(lat=lat, lng=lon)
    if tz_str is None:
        tz_offsets[i] = 0
    else:
        try:
            tz = ZoneInfo(tz_str)
            offset = tz.utcoffset(ref_date).total_seconds() / 3600
            tz_offsets[i] = int(round(offset))
        except Exception as e:
            print(f"Skipping {grdc_id} at position {i}: {e}")
            tz_offsets[i] = 0

Skipping GRDC_3186500 at position 3364: 'No time zone found with key America/Coyhaique'


In [22]:
len(tz_offsets)

3366

In [44]:
test = xr.open_dataset("/p/largedata2/detectdata/CentralDB/projects/d05/working_directory/NeuralFAS/inference_aifas/s_ungauged/20241015.nc")

In [28]:
nc_files = sorted(f for f in os.listdir(input_dir) if f.endswith(".nc"))

# --- 提取 issue timestamps ---
timestamps = [datetime.strptime(f.split(".")[0], "%Y%m%d") for f in nc_files]

# --- 读取并堆叠所有文件 ---
all_data = []

for f in nc_files:
    ds = xr.open_dataset(os.path.join(input_dir, f))
    da = ds["dis24"]  # shape: (time=7, x=3366)
    all_data.append(da)  # list of (time, x)
    ds.close()

# --- 合并为 (timestamp, time, x) ---
data_stack = xr.concat(all_data, dim="timestamp")
data_stack = data_stack.assign_coords(timestamp=("timestamp", timestamps))

# 🔁 data_stack: shape = (timestamp=N, time=7, x=3366)
print(f"✅ 拼接完成，维度结构为: {data_stack.dims}")

✅ 拼接完成，维度结构为: ('timestamp', 'time', 'x')


In [6]:
import os
import xarray as xr
import numpy as np
from datetime import datetime
from timezonefinder import TimezoneFinder
from zoneinfo import ZoneInfo

# === 配置部分 ===
input_dir = "/p/largedata2/detectdata/CentralDB/projects/d05/working_directory/NeuralFAS/inference_aifas/s_92"  # change to your actual input dir
output_dir = "/p/largedata2/detectdata/CentralDB/projects/d05/working_directory/NeuralFAS/s_92_local"
os.makedirs(output_dir, exist_ok=True)

# === 读取所有 NetCDF 文件，并拼成 (timestamp, time, x) ===
nc_files = sorted(f for f in os.listdir(input_dir) if f.endswith(".nc"))
timestamps = [
    np.datetime64(datetime.strptime(f.split(".")[0], "%Y%m%d"), "ns")
    for f in nc_files
]

all_data = []
for f in nc_files:
    ds = xr.open_dataset(os.path.join(input_dir, f))
    all_data.append(ds["dis24"])  # shape: (time, x)
    ds.close()

data_stack = xr.concat(all_data, dim="timestamp")  # (timestamp, time, x)
data_stack = data_stack.assign_coords(timestamp=("timestamp", timestamps))


# === 转换：沿 timestamp 轴，对每个 (x, time) 做 shift ===
data_local = data_stack.copy(deep=True)

for x in range(data_stack.shape[2]):  # for each station
    offset = tz_offsets.get(x, 0)
    if offset == 0:
        continue
    for t in range(data_stack.shape[1]):  # for each lead time
        q = data_stack[:, t, x]
        if offset > 0:
            q_next = data_stack.shift(timestamp=-1)[:, t, x]
            data_local[:-1, t, x] = ((24 - offset) * q[:-1] + offset * q_next[:-1]) / 24
            data_local[-1, t, x] = np.nan
        else:
            abs_offset = abs(offset)
            q_prev = data_stack.shift(timestamp=1)[:, t, x]
            data_local[1:, t, x] = (abs_offset * q_prev[1:] + (24 - abs_offset) * q[1:]) / 24
            data_local[0, t, x] = np.nan




In [7]:
# === 保存每一天的文件 ===
for i, t in enumerate(data_local["timestamp"].values):
    one_day = data_local.isel(timestamp=i).drop_vars("timestamp")  # 去掉 timestamp
    date_str = pd.Timestamp(t).strftime("%Y%m%d")
    out_path = os.path.join(output_dir, f"{date_str}.nc")

    # ✅ 正确封装为 Dataset
    ds_out = xr.Dataset({"dis24": one_day})
    ds_out.to_netcdf(out_path)

print("✅ 所有文件已按天保存完成 (shape: time=7, x=3366)，完成 local time 转换。")

✅ 所有文件已按天保存完成 (shape: time=7, x=3366)，完成 local time 转换。


In [57]:
test

In [58]:
ds_out

In [59]:
# ✅ 显式设置坐标标签
one_day = one_day.assign_coords(
    time=np.arange(one_day.shape[0]),
    x=np.arange(one_day.shape[1])
)

In [62]:
one_day[:,0]