In [6]:
# filter_grids_sc_v2.py
# -*- coding: utf-8 -*-
from __future__ import annotations

import os
from pathlib import Path
from typing import Optional, List, Dict

import numpy as np
import pandas as pd
import xarray as xr
import pygmt

# ========= 配置（按需改） =========
IN_PATH = r"E:\wjy\Gravity\SCS_Gravity\out\Outdata\Sediment_Correction_expand_crop\CBA_sed_expand5.0deg_5.0deg_rho2300.nc"
OUT_DIR = Path(r"E:\wjy\Gravity\SCS_Gravity\out\Outdata\filter")

# 低通核“直径”(km) 不是硬截止波长；建议把 100–300 作为 Moho 主试验带
WIDTHS_KM = [100, 150, 200, 300, 400, 500, 600]

REGION_TARGET = [111, 117, 14.5, 18.5]  # 最终输出/IGMAS+ 使用区域
REGION_WORK: Optional[List[float]] = None  # None = 使用输入网格全域（expand 区）参与滤波

# 线程：避免 OMP 与 cores 叠加导致过度并行（可按机器调）
os.environ.setdefault("OMP_NUM_THREADS", "1")
OUT_DIR.mkdir(parents=True, exist_ok=True)

try:
    from tqdm.auto import tqdm
except Exception:
    def tqdm(x, **k):  # type: ignore
        return x


# ========= I/O & 对齐 =========
def _pick_data_var(ds: xr.Dataset) -> xr.DataArray:
    for v in ds.data_vars:
        da = ds[v]
        if da.ndim == 2:
            dims = set(da.dims)
            if {"lat", "lon"} <= dims or {"y", "x"} <= dims or {"latitude", "longitude"} <= dims:
                return da
    for v in ds.data_vars:
        da = ds[v]
        if da.ndim == 2:
            return da
    return next(iter(ds.data_vars.values()))


def open_any_grid(path: str, varname: str = "z") -> xr.DataArray:
    p = Path(path)
    if p.suffix.lower() in [".nc", ".grd", ".nc4"]:
        ds = xr.open_dataset(p)
        da = ds[varname] if varname in ds.data_vars else _pick_data_var(ds)
        da = da.squeeze()

        if "x" in da.dims:
            da = da.rename({"x": "lon"})
        if "y" in da.dims:
            da = da.rename({"y": "lat"})
        if "longitude" in da.coords:
            da = da.rename({"longitude": "lon"})
        if "latitude" in da.coords:
            da = da.rename({"latitude": "lat"})

        return da.sortby(["lat", "lon"]).rename("z")

    df = pd.read_csv(p, delim_whitespace=True, header=None,
                     names=["lon", "lat", "val"], comment="#")
    lons = np.round(np.sort(df["lon"].unique()), 10)
    lats = np.round(np.sort(df["lat"].unique()), 10)
    Z = df.pivot(index="lat", columns="lon", values="val").reindex(index=lats, columns=lons).values
    return xr.DataArray(Z, coords={"lat": lats, "lon": lons}, dims=("lat", "lon"), name="z").sortby(["lat", "lon"])


def force_latlon_dims(da: xr.DataArray) -> xr.DataArray:
    if "latitude" in da.coords:
        da = da.rename({"latitude": "lat"})
    if "longitude" in da.coords:
        da = da.rename({"longitude": "lon"})
    if "y" in da.dims or "x" in da.dims:
        da = da.rename({k: v for k, v in [("y", "lat"), ("x", "lon")] if k in da.dims})
    return da.transpose("lat", "lon").rename("z")


def align_to_1min_gridline(da: xr.DataArray, region: Optional[List[float]] = None) -> xr.DataArray:
    if region is None:
        region = [float(da.lon.min()), float(da.lon.max()),
                  float(da.lat.min()), float(da.lat.max())]
        region = [np.floor(region[0]), np.ceil(region[1]),
                  np.floor(region[2]), np.ceil(region[3])]
    out = pygmt.grdsample(da, region=region, spacing="1m", registration="gridline")
    return force_latlon_dims(out)


def save_grid_as_txt(da: xr.DataArray, out_txt: Path) -> None:
    df = pygmt.grd2xyz(da, output_type="pandas")  # x y z（这里 x=lon, y=lat）
    df.to_csv(out_txt, sep=" ", header=False, index=False, float_format="%.6f")


def safe_inner_region(region: List[float], half_width_km: float, lat_ref: float) -> Optional[List[float]]:
    lon0, lon1, lat0, lat1 = region
    dlat = half_width_km / 110.574
    denom = 111.320 * np.cos(np.deg2rad(lat_ref))
    if denom <= 0:
        return None
    dlon = half_width_km / denom
    inner = [lon0 + dlon, lon1 - dlon, lat0 + dlat, lat1 - dlat]
    if inner[0] >= inner[1] or inner[2] >= inner[3]:
        return None
    return inner


# ========= 1) 读入 & 对齐（关键：用 WORK 区=expand 全域参与滤波） =========
grid = open_any_grid(IN_PATH, "z")
grid = grid.where(np.isfinite(grid))

grid = align_to_1min_gridline(grid, REGION_WORK)   # <<<<<<<< 关键改动：不裁到目标区
grid = force_latlon_dims(grid)

lon_t = grid.lon.copy(deep=True)
lat_t = grid.lat.copy(deep=True)
lat_ref = float(lat_t.mean())

REGION_WORK_EXACT = [
    float(np.round(float(grid.lon.min()), 12)),
    float(np.round(float(grid.lon.max()), 12)),
    float(np.round(float(grid.lat.min()), 12)),
    float(np.round(float(grid.lat.max()), 12)),
]

stem = Path(IN_PATH).stem

# ========= 2) 多档滤波（工作区）→ 裁目标区输出 =========
lp: Dict[int, xr.DataArray] = {}
hp: Dict[int, xr.DataArray] = {}

for w in tqdm(WIDTHS_KM, desc="Filtering (PyGMT grdfilter)"):
    # 工作区低通
    g_lp = pygmt.grdfilter(grid, filter=f"g{w}", distance="4", cores=0, fg=True)
    g_lp = force_latlon_dims(g_lp).assign_coords(lon=lon_t, lat=lat_t).rename("z")

    # 工作区高通
    g_hp = pygmt.grdfilter(grid, filter=f"g{w}+h", distance="4", cores=0, fg=True)
    g_hp = force_latlon_dims(g_hp).assign_coords(lon=lon_t, lat=lat_t).rename("z")

    # 裁到目标区（你真正要给 IGMAS+ 的区域）
    lp_t = pygmt.grdcut(g_lp, region=REGION_TARGET)
    hp_t = pygmt.grdcut(g_hp, region=REGION_TARGET)
    lp_t = force_latlon_dims(lp_t).rename("z")
    hp_t = force_latlon_dims(hp_t).rename("z")

    lp[w] = lp_t
    hp[w] = hp_t

    # 输出：目标区
    save_grid_as_txt(lp_t, OUT_DIR / f"{stem}_lp_{w}km_target.txt")
    save_grid_as_txt(hp_t, OUT_DIR / f"{stem}_hp_{w}km_target.txt")
    lp_t.to_netcdf(OUT_DIR / f"{stem}_lp_{w}km_target.nc")
    hp_t.to_netcdf(OUT_DIR / f"{stem}_hp_{w}km_target.nc")

    # 输出：可信内区（基于目标区裁边）
    inner = safe_inner_region(REGION_TARGET, half_width_km=w / 2.0, lat_ref=float((REGION_TARGET[2] + REGION_TARGET[3]) / 2.0))
    if inner is not None:
        lp_in = force_latlon_dims(pygmt.grdcut(lp_t, region=inner)).rename("z")
        hp_in = force_latlon_dims(pygmt.grdcut(hp_t, region=inner)).rename("z")
        save_grid_as_txt(lp_in, OUT_DIR / f"{stem}_lp_{w}km_inner.txt")
        save_grid_as_txt(hp_in, OUT_DIR / f"{stem}_hp_{w}km_inner.txt")
        lp_in.to_netcdf(OUT_DIR / f"{stem}_lp_{w}km_inner.nc")
        hp_in.to_netcdf(OUT_DIR / f"{stem}_hp_{w}km_inner.nc")

# ========= 3) 可选：推荐给 Moho 的 band-pass（去短波 + 去超长波） =========
# 经验上：band = LP(100~200) - LP(500~600) 作为“Moho 优先”场的候选
for w_lo, w_hi in [(100, 500), (150, 600), (200, 600)]:
    if w_lo in lp and w_hi in lp:
        band = (lp[w_lo] - lp[w_hi]).rename("z")
        save_grid_as_txt(band, OUT_DIR / f"{stem}_band_lp{w_lo}_minus_lp{w_hi}_target.txt")
        band.to_netcdf(OUT_DIR / f"{stem}_band_lp{w_lo}_minus_lp{w_hi}_target.nc")

print("滤波完成，输出目录：", OUT_DIR)
print("WORK region (exact):", REGION_WORK_EXACT)
print("TARGET region:", REGION_TARGET)


Filtering (PyGMT grdfilter): 100%|██████████| 7/7 [01:40<00:00, 14.38s/it]


滤波完成，输出目录： E:\wjy\Gravity\SCS_Gravity\out\Outdata\filter
WORK region (exact): [111.016666666667, 116.983333333333, 14.516666666667, 18.483333333333]
TARGET region: [111, 117, 14.5, 18.5]


In [12]:
# convert_cba_to_igmas_official_style_linear.py
# -*- coding: utf-8 -*-
"""
将 CBA lon/lat/gz 三列表转换为 IGMAS+ 官方示例格式 CSV：
- 表头: "x" "y" "z" "measured z component"
- x, y 单位为 km
- 坐标原点 (0,0) = 区域左下角 (lon_min, lat_min)
- 使用简单等距圆柱近似: 
    x = (lon - lon_min) * 111.2 * cos(lat_ref)
    y = (lat - lat_min) * 111.2
"""

from pathlib import Path
import math

# ---------- 配置区 ----------

# 1. 输入文件 (lon lat gz 三列表)
IN_TXT  = r"E:\wjy\Gravity\SCS_Gravity\out\Outdata\filter\CBA_sed_expand5.0deg_5.0deg_rho2300_band_lp200_minus_lp600_target.txt"

# 2. 输出文件 (IGMAS+ 格式 CSV)
OUT_DIR = Path(r"E:\wjy\Gravity\SCS_Gravity\out\Outdata\igmas")
OUT_CSV = OUT_DIR / "SCS_IGMAS_Stations_CBA_band_lp200_minus_lp600_linear.csv"

# 3. 区域范围（与你计算 CBA 时保持一致）
LON_MIN = 111.0   # 左边界经度
LAT_MIN = 14.5   # 下边界纬度
LON_MAX = 117.0   # 右边界经度
LAT_MAX = 18.5    # 上边界纬度

# 4. 参考纬度，用于计算经向尺度 (deg2km_lon)
LAT_REF = 16.5    # 大致区域中部纬度

# 5. 观测高度 (km)
Z_LEVEL = 0.0


def main():
    # --- A. 预计算度→km 的转换因子 ---
    # 1° 纬度约等于 111.2 km
    DEG2KM_LAT = 111.2
    # 1° 经度约等于 111.2 * cos(纬度)
    DEG2KM_LON = DEG2KM_LAT * math.cos(math.radians(LAT_REF))

    print(f"[INFO] DEG2KM_LAT = {DEG2KM_LAT:.4f} km/deg")
    print(f"[INFO] DEG2KM_LON = {DEG2KM_LON:.4f} km/deg @ lat_ref={LAT_REF}°")

    in_path = Path(IN_TXT)
    OUT_DIR.mkdir(parents=True, exist_ok=True)

    print(f"[INFO] Reading: {in_path}")
    print(f"[INFO] Writing: {OUT_CSV}")
    print(f"[INFO] Origin (0,0) = (lon={LON_MIN}E, lat={LAT_MIN}°)")

    n_used = 0

    with open(in_path, "r", encoding="utf-8", errors="ignore") as fin, \
         open(OUT_CSV, "w", newline="", encoding="utf-8") as fout:

        # 写 IGMAS 官方表头
        fout.write('"x" "y" "z" "measured z component"\n')

        for line in fin:
            line = line.strip()
            if (not line) or line.startswith("#"):
                continue

            parts = line.split()
            if len(parts) < 3:
                continue

            try:
                lon = float(parts[0])
                lat = float(parts[1])
                g   = float(parts[2])
            except ValueError:
                continue

            # --- B. lon/lat → km (左下角为原点) ---
            dx_deg = lon - LON_MIN
            dy_deg = lat - LAT_MIN

            x_km = dx_deg * DEG2KM_LON
            y_km = dy_deg * DEG2KM_LAT

            # 写入数据行
            fout.write(f"{x_km:.6f} {y_km:.6f} {Z_LEVEL:.6f} {g:.6f}\n")
            n_used += 1

    # 粗略打印一下理论尺寸，方便你和 IGMAS 里的尺寸对照
    width_km  = (LON_MAX - LON_MIN) * DEG2KM_LON
    height_km = (LAT_MAX - LAT_MIN) * DEG2KM_LAT

    print(f"[DONE] Converted {n_used} stations.")
    print(f"[INFO] 预期区域尺寸 ~ {width_km:.1f} km (E-W) × {height_km:.1f} km (N-S)")


if __name__ == "__main__":
    main()


[INFO] DEG2KM_LAT = 111.2000 km/deg
[INFO] DEG2KM_LON = 106.6208 km/deg @ lat_ref=16.5°
[INFO] Reading: E:\wjy\Gravity\SCS_Gravity\out\Outdata\filter\CBA_sed_expand5.0deg_5.0deg_rho2300_band_lp200_minus_lp600_target.txt
[INFO] Writing: E:\wjy\Gravity\SCS_Gravity\out\Outdata\igmas\SCS_IGMAS_Stations_CBA_band_lp200_minus_lp600_linear.csv
[INFO] Origin (0,0) = (lon=111.0E, lat=14.5°)
[DONE] Converted 85801 stations.
[INFO] 预期区域尺寸 ~ 639.7 km (E-W) × 444.8 km (N-S)


In [5]:
# thin_igmas_csv_auto.py
# -*- coding: utf-8 -*-
import os

# ============== 配置区 ===================
# 输入文件
INFILE  = r"E:\wjy\Gravity\SCS_Gravity\out\Outdata\igmas\SCS_IGMAS_Stations_CBA_lp_300km_linear.csv"
# 输出文件
OUTFILE = r"E:\wjy\Gravity\SCS_Gravity\out\Outdata\igmas\SCS_IGMAS_Stations_CBA_lp_300km_2x2.csv"

# 抽稀步长 (保留 1/5 的数据)
STEP_X  = 2   
STEP_Y  = 2   
# ========================================

def detect_nx_auto(filename: str) -> int:
    """
    通过检测 X 坐标的“复位”（从大变小）来自动判断每行的点数 Nx。
    """
    print("[INFO] 正在自动检测网格宽度 (Nx)...")
    with open(filename, "r", encoding="utf-8") as f:
        # 1. 跳过表头
        header = f.readline() 
        
        # 2. 读取第一个点
        first_line = f.readline()
        if not first_line:
            raise ValueError("数据文件为空")
        
        try:
            parts = first_line.split()
            x_prev = float(parts[0])
        except ValueError:
            # 假如表头之后有空行或格式不对
            raise ValueError("第一行数据格式无法解析")

        count = 1
        
        # 3. 逐行扫描，直到发现 X 值变小（换行了）
        for line in f:
            line = line.strip()
            if not line: continue
            
            parts = line.split()
            if len(parts) < 1: continue
            
            x_curr = float(parts[0])
            
            # 核心逻辑：如果当前 X 小于 上一个 X，说明换行了
            if x_curr < x_prev:
                print(f"[INFO] 检测到换行！第一行结束于 x={x_prev}, 下一行开始于 x={x_curr}")
                return count
            
            # 没换行，计数加一，继续向右走
            x_prev = x_curr
            count += 1
            
            # 安全刹车：如果检测了超过 10万个点还没换行，可能是非网格数据
            if count > 100000:
                print("[WARN] 扫描了 10万个点仍未发现换行，假定为单行数据或非网格数据。")
                return count

    # 如果读完了文件还没换行（只有一行数据）
    return count


def thin_regular_grid(infile: str, outfile: str, step_x: int, step_y: int):
    if step_x <= 0 or step_y <= 0:
        raise ValueError("步长必须为正整数")

    # 1. 自动检测 Nx
    try:
        nx = detect_nx_auto(infile)
        print(f"[SUCCESS] 自动检测结果: Nx = {nx}")
    except Exception as e:
        print(f"[ERROR] 自动检测失败: {e}")
        return

    print(f"[INFO] 开始抽稀: X方向步长={step_x}, Y方向步长={step_y}")

    kept = 0
    total_read = 0
    
    row_idx = 0 
    col_idx = 0

    with open(infile, "r", encoding="utf-8") as fin, \
         open(outfile, "w", encoding="utf-8", newline="") as fout:

        # 复制表头
        header = fin.readline()
        fout.write(header)

        for line in fin:
            line_st = line.strip()
            if not line_st: continue
            
            total_read += 1

            # 抽稀逻辑
            if (row_idx % step_y == 0) and (col_idx % step_x == 0):
                fout.write(line) # 保持原有格式（包含换行符）
                kept += 1

            # 更新计数器
            col_idx += 1
            if col_idx == nx:  # 到达行尾
                col_idx = 0
                row_idx += 1

    print("-" * 30)
    print(f"[RESULT] 处理完成")
    print(f"         原始行数: {total_read}")
    print(f"         输出行数: {kept}")
    print(f"         输出文件: {outfile}")

if __name__ == "__main__":
    if not os.path.exists(INFILE):
        print(f"找不到文件: {INFILE}")
    else:
        thin_regular_grid(INFILE, OUTFILE, STEP_X, STEP_Y)

[INFO] 正在自动检测网格宽度 (Nx)...
[INFO] 检测到换行！第一行结束于 x=637.947479, 下一行开始于 x=1.777048
[SUCCESS] 自动检测结果: Nx = 359
[INFO] 开始抽稀: X方向步长=2, Y方向步长=2
------------------------------
[RESULT] 处理完成
         原始行数: 85801
         输出行数: 21600
         输出文件: E:\wjy\Gravity\SCS_Gravity\out\Outdata\igmas\SCS_IGMAS_Stations_CBA_lp_300km_2x2.csv


In [1]:
#裁剪网格
import pandas as pd
from pathlib import Path

# ====================== 1. 读入原始 IGMAS 站点文件 ======================
# 原始文件：空格分隔，首行有带引号的表头
in_path = Path(r"E:\wjy\Gravity\OJP_Gravity\out\Outdata\igmas\OJP_IGMAS_Stations_CBA_lp_200km_5x5.csv")

df = pd.read_csv(in_path, delim_whitespace=True)

print("原始列名:", df.columns.tolist())
print("原始点数:", len(df))

# ====================== 2. 线性映射 x,y -> lon,lat（仅用于裁剪） ======================
# 根据你 OJP 工程整体区域：150–174°E, 10°S–6°N
LON_MIN_FULL, LON_MAX_FULL = 150.0, 174.0
LAT_MIN_FULL, LAT_MAX_FULL = -10.0, 6.0

x_min_full, x_max_full = df["x"].min(), df["x"].max()
y_min_full, y_max_full = df["y"].min(), df["y"].max()

df["lon"] = LON_MIN_FULL + (df["x"] - x_min_full) * (LON_MAX_FULL - LON_MIN_FULL) / (x_max_full - x_min_full)
df["lat"] = LAT_MIN_FULL + (df["y"] - y_min_full) * (LAT_MAX_FULL - LAT_MIN_FULL) / (y_max_full - y_min_full)

print("映射后经度范围:", df["lon"].min(), df["lon"].max())
print("映射后纬度范围:", df["lat"].min(), df["lat"].max())

# ====================== 3. 按 155–160°E, 5°S–0° 进行裁剪 ======================
lon_min_cut, lon_max_cut = 151.0, 174.0
lat_min_cut, lat_max_cut = -10.0, 6.0

mask = (
    (df["lon"] >= lon_min_cut) & (df["lon"] <= lon_max_cut) &
    (df["lat"] >= lat_min_cut) & (df["lat"] <= lat_max_cut)
)

df_clip = df.loc[mask].copy()

print("裁剪后点数:", len(df_clip))
print("裁剪后经度范围:", df_clip["lon"].min(), df_clip["lon"].max())
print("裁剪后纬度范围:", df_clip["lat"].min(), df_clip["lat"].max())

# ====================== 3.5 关键修改：x,y 归一化到左下角为 (0,0) ======================
# —— 在“df_clip = df.loc[mask].copy()” 之后立刻加入这一段 ——
x0 = df_clip["x"].min()
y0 = df_clip["y"].min()

df_clip["x"] = df_clip["x"] - x0
df_clip["y"] = df_clip["y"] - y0

print(f"平移后的 x 范围: {df_clip['x'].min():.3f} ~ {df_clip['x'].max():.3f}")
print(f"平移后的 y 范围: {df_clip['y'].min():.3f} ~ {df_clip['y'].max():.3f}")

# 也可以算一下模型长宽（单位与原始 x,y 一致，基本是 km）
Lx = df_clip["x"].max() - df_clip["x"].min()
Ly = df_clip["y"].max() - df_clip["y"].min()
print(f'模型水平范围约为: Lx = {Lx:.3f}, Ly = {Ly:.3f}')

# ====================== 4. 输出 IGMAS 使用的 CSV（4 列，供导入用） ======================
# 只保留 IGMAS 需要的四列：x, y, z, measured z component
cols_igmas = ["x", "y", "z", "measured z component"]
df_igmas = df_clip[cols_igmas].copy()

out_path_igmas = Path(r"E:\wjy\Gravity\OJP_Gravity\out\Outdata\igmas\OJP_IGMAS_Stations_CBA_lp_200km_5×5_clip.csv")

# 标准 CSV：逗号分隔，带表头（x,y,z,measured z component）
df_igmas.to_csv(out_path_igmas, index=False)

print(f"IGMAS 输入 CSV（x,y 已归零）已生成: {out_path_igmas.resolve()}")




原始列名: ['x', 'y', 'z', 'measured z component']
原始点数: 55777
映射后经度范围: 150.0 174.0
映射后纬度范围: -10.0 6.0
裁剪后点数: 53461
裁剪后经度范围: 151.00000000037494 174.0
裁剪后纬度范围: -10.0 6.0
平移后的 x 范围: 0.000 ~ 2556.042
平移后的 y 范围: 0.000 ~ 1779.200
模型水平范围约为: Lx = 2556.042, Ly = 1779.200
IGMAS 输入 CSV（x,y 已归零）已生成: E:\wjy\Gravity\OJP_Gravity\out\Outdata\igmas\OJP_IGMAS_Stations_CBA_lp_200km_5×5_clip.csv
