In [None]:
import os
import re
from pathlib import Path
import xarray as xr
import numpy as np
from datetime import datetime

# 常量
INVALID_VALUE = -214748.3647
FILE_PATTERN = re.compile(r"MUL_OPER_nrt_global_allsat_phy_(\d{8})_\d{8}_T\.nc$")

def scan_and_process_latest_nc_file(base_path, output_base_path, variables_to_extract):
    latest_file = None
    latest_date = None

    for root, _, files in os.walk(base_path):
        for file in files:
            if file.startswith("MUL_OPER_nrt_global_allsat_phy") and file.endswith(".nc"):
                match = FILE_PATTERN.match(file)
                if match:
                    file_date = match.group(1)
                    try:
                        date_obj = datetime.strptime(file_date, "%Y%m%d")
                        file_path = Path(root) / file

                        if latest_date is None or date_obj > latest_date:
                            latest_date = date_obj
                            latest_file = file_path
                    except ValueError:
                        print(f"Invalid date format in file: {file}")

    if latest_file:
        print(f"Latest file: {latest_file}, Date: {latest_date.strftime('%Y-%m-%d')}")
        process_nc_file(latest_file, output_base_path, latest_date, variables_to_extract)
    else:
        print("No valid files found.")

def process_nc_file(input_nc_file, output_base_path, date_obj, variables_to_extract):
    ds = xr.open_dataset(input_nc_file)

    year, month, day = date_obj.year, date_obj.month, date_obj.day

    for var in variables_to_extract:
        if var in ds.variables:
            var_data = ds[var]
            var_data.coords['lat'] = ds['lat']
            var_data.coords['lon'] = ds['lon']
            var_data.attrs = ds[var].attrs
            var_data = var_data.where(var_data != INVALID_VALUE, np.nan)
            
            output_dir = Path(output_base_path) / str(year) / str(month) / str(day) / f"{var}_nsoas"
            output_dir.mkdir(parents=True, exist_ok=True)
            output_file = output_dir / "data.nc"

            var_data.to_netcdf(output_file)
            print(f"Saved {var} to {output_file}")
        else:
            print(f"Variable {var} not found in the dataset.")

if __name__ == "__main__":
    base_path = "/datadisk1/2024"
    output_base_path = "/data/fishdata"
    variables_to_extract = ['sla', 'err', 'adt', 'u', 'ua', 'v', 'va']
    
    scan_and_process_latest_nc_file(base_path, output_base_path, variables_to_extract)

In [None]:
import xarray as xr
