In [1]:
import os
import glob
import xarray as xr
import pandas as pd

# === 1) Locate one NetCDF file ===
folder = "/mnt/cephfs-mount/chenchen/CygnssData/201808"
nc_files = sorted(glob.glob(os.path.join(folder, "*.nc*")))

if not nc_files:
    raise FileNotFoundError(f"No .nc files found in: {folder}")

nc_path = nc_files[0]
print(f"Opening: {nc_path}")

# === 2) Open with xarray (decode times & scale where possible) ===
# Tip: if the file is large, you can add chunks={} to enable dask lazy loading.
ds = xr.open_dataset(nc_path, decode_times=True, mask_and_scale=True)
print("\n=== Dataset summary (ds) ===")
print(ds)

# === 3) Global attributes ===
print("\n=== Global attributes ===")
for k, v in ds.attrs.items():
    print(f"- {k}: {v}")

# === 4) Variables (name, dims, dtype, units, long_name) ===
def var_info(da):
    return {
        "dims": tuple(da.dims),
        "dtype": str(da.dtype),
        "units": da.attrs.get("units", ""),
        "long_name": da.attrs.get("long_name", da.attrs.get("description", "")),
        "shape": tuple(da.shape),
    }

var_rows = []
for name, da in ds.data_vars.items():
    var_rows.append({"var": name, **var_info(da)})

vars_df = pd.DataFrame(var_rows).sort_values("var").reset_index(drop=True)
print("\n=== Variables overview ===")
display(vars_df.head(30))  # show first 30; remove or change as needed

# === 5) Quick peek at commonly-used CYGNSS fields if present ===
candidate_cols = [
    "sp_lon", "sp_lat",
    "roi_major_km", "roi_minor_km",
    "sp_theta_orbit",  # specular point track angle
    "ddm_brcs", "brcs", "nbrcs",  # backscatter/normalized backscatter if available
    "ddm_timestamp_utc", "ddm_timestamp_gps_sec", "ddm_timestamp_gps_week",
]

present = [c for c in candidate_cols if c in ds.variables]
if present:
    # Assemble a small preview table (drop missing silently)
    try:
        preview = ds[present].to_dataframe().reset_index(drop=True)
    except Exception:
        # Some variables might be on different dims; try selecting first slice for those with extra dims
        slim = {}
        for c in present:
            da = ds[c]
            if da.ndim > 1:
                # heuristically take first index along non-sample dims
                indexers = {dim: 0 for dim in da.dims[:-1]}  # keep last dim if you prefer
                da = da.isel(**indexers)
            slim[c] = da
        preview = xr.Dataset(slim).to_dataframe().reset_index(drop=True)

    print("\n=== Preview of key fields (first 10 rows) ===")
    display(preview.head(10))
else:
    print("\n(No expected key columns found among:", candidate_cols, ")")

# === 6) Close when done (optional; xarray uses lazy loading) ===
# ds.close()  # uncomment if you want to explicitly close the file handle


Opening: /mnt/cephfs-mount/chenchen/CygnssData/201808/cyg01.ddmi.s20180810-214112-e20180810-235959.l1.power-brcs.a32.d33.nc

=== Dataset summary (ds) ===
<xarray.Dataset> Size: 145MB
Dimensions:                          (sample: 8304, ddm: 4, delay: 17,
                                      doppler: 11, lat_5km: 5, lon_5km: 5)
Coordinates:
  * sample                           (sample) int32 33kB 0 1 2 ... 8302 8303
  * ddm                              (ddm) int8 4B 0 1 2 3
    ddm_timestamp_utc                (sample) datetime64[ns] 66kB ...
    sp_lat                           (sample, ddm) float32 133kB ...
    sp_lon                           (sample, ddm) float32 133kB ...
Dimensions without coordinates: delay, doppler, lat_5km, lon_5km
Data variables: (12/152)
    spacecraft_id                    int16 2B ...
    spacecraft_num                   int8 1B ...
    ddm_source                       int8 1B ...
    ddm_time_type_selector           int8 1B ...
    delay_resolution       

Unnamed: 0,var,dims,dtype,units,long_name,shape
0,add_range_to_sp,"(sample, ddm)",float32,1,Additional range to specular point at DDM samp...,"(8304, 4)"
1,add_range_to_sp_pvt,"(sample, ddm)",float32,1,Additional range to specular point at PVT time,"(8304, 4)"
2,att_timestamp_gps_sec,"(sample,)",float64,second,Attitude timestamp - GPS Seconds,"(8304,)"
3,att_timestamp_gps_week,"(sample,)",int32,week,Attitude timestamp - GPS Week,"(8304,)"
4,att_timestamp_utc,"(sample,)",datetime64[ns],,Attitude timestamp - UTC,"(8304,)"
5,bb_nearest,"(sample, ddm)",float32,second,Time to most recent black body reading,"(8304, 4)"
6,bb_power_temperature_density,"(sample, ddm)",float32,Counts/Kelvin,Black Body Power Temperature Density,"(8304, 4)"
7,bit_null_offset_port,"(sample,)",float32,1,Port bit count null offset,"(8304,)"
8,bit_null_offset_starboard,"(sample,)",float32,1,Starboard bit count null offset,"(8304,)"
9,bit_null_offset_zenith,"(sample,)",float32,1,Zenith bit count null offset,"(8304,)"



=== Preview of key fields (first 10 rows) ===


Unnamed: 0,sp_lon,sp_lat,sp_theta_orbit,brcs,ddm_timestamp_utc,ddm_timestamp_gps_sec,ddm_timestamp_gps_week
0,284.69577,-36.261345,13.324582,,2018-08-10 21:41:12.000261603,510090.000262,2013
1,284.69577,-36.261345,13.324582,,2018-08-10 21:41:12.000261603,510090.000262,2013
2,284.69577,-36.261345,13.324582,,2018-08-10 21:41:12.000261603,510090.000262,2013
3,284.69577,-36.261345,13.324582,,2018-08-10 21:41:12.000261603,510090.000262,2013
4,284.69577,-36.261345,13.324582,,2018-08-10 21:41:12.000261603,510090.000262,2013
5,284.69577,-36.261345,13.324582,,2018-08-10 21:41:12.000261603,510090.000262,2013
6,284.69577,-36.261345,13.324582,,2018-08-10 21:41:12.000261603,510090.000262,2013
7,284.69577,-36.261345,13.324582,,2018-08-10 21:41:12.000261603,510090.000262,2013
8,284.69577,-36.261345,13.324582,,2018-08-10 21:41:12.000261603,510090.000262,2013
9,284.69577,-36.261345,13.324582,,2018-08-10 21:41:12.000261603,510090.000262,2013
