In [1]:
import xarray as xr
import matplotlib.pyplot as plt
import pandas as pd
import torch
import numpy as np
from datetime import datetime
import os

In [2]:
def load_geomag_data(netcdf_path: str) -> xr.Dataset:
    """Load geomagnetic data from a NetCDF file."""
    try:
        print("loading raw data...")
        ds = xr.open_dataset(netcdf_path, chunks="auto")
        print(ds)
        return ds
    except Exception as e:
        raise RuntimeError(f"Error loading NetCDF file {netcdf_path}: {e}")

geomag_path = r"D:\earthquake-prediction\data\geomagnetic_data\supermag\all_stations_all2024.netcdf"
ds = load_geomag_data(geomag_path)

loading raw data...
<xarray.Dataset> Size: 7GB
Dimensions:  (block: 527040, vector: 201)
Dimensions without coordinates: block, vector
Data variables: (12/23)
    dbe_geo  (block, vector) float32 424MB dask.array<chunksize=(296940, 113), meta=np.ndarray>
    dbe_nez  (block, vector) float32 424MB dask.array<chunksize=(296940, 113), meta=np.ndarray>
    dbn_geo  (block, vector) float32 424MB dask.array<chunksize=(296940, 113), meta=np.ndarray>
    dbn_nez  (block, vector) float32 424MB dask.array<chunksize=(296940, 113), meta=np.ndarray>
    dbz_geo  (block, vector) float32 424MB dask.array<chunksize=(296940, 113), meta=np.ndarray>
    dbz_nez  (block, vector) float32 424MB dask.array<chunksize=(296940, 113), meta=np.ndarray>
    ...       ...
    time_dy  (block) int16 1MB dask.array<chunksize=(527040,), meta=np.ndarray>
    time_hr  (block) int16 1MB dask.array<chunksize=(527040,), meta=np.ndarray>
    time_mo  (block) int16 1MB dask.array<chunksize=(527040,), meta=np.ndarray>
    tim

In [3]:
print(ds.variables)

Frozen({'dbe_geo': <xarray.Variable (block: 527040, vector: 201)> Size: 424MB
dask.array<open_dataset-dbe_geo, shape=(527040, 201), dtype=float32, chunksize=(296940, 113), chunktype=numpy.ndarray>
Attributes:
    description:  Magnetic field east component, geographic coordinates [nT], 'dbe_nez': <xarray.Variable (block: 527040, vector: 201)> Size: 424MB
dask.array<open_dataset-dbe_nez, shape=(527040, 201), dtype=float32, chunksize=(296940, 113), chunktype=numpy.ndarray>
Attributes:
    description:  Magnetic field east component, NEZ coordinates [nT], 'dbn_geo': <xarray.Variable (block: 527040, vector: 201)> Size: 424MB
dask.array<open_dataset-dbn_geo, shape=(527040, 201), dtype=float32, chunksize=(296940, 113), chunktype=numpy.ndarray>
Attributes:
    description:  Magnetic field north component, geographic coordinates [nT], 'dbn_nez': <xarray.Variable (block: 527040, vector: 201)> Size: 424MB
dask.array<open_dataset-dbn_nez, shape=(527040, 201), dtype=float32, chunksize=(296940, 113

In [20]:

print(ds[[ "dbn_nez", "dbe_nez", "dbz_nez", "glat", "glon"]].isel(block=1, vector=1).compute())

<xarray.Dataset> Size: 20B
Dimensions:  ()
Data variables:
    dbn_nez  float32 4B 4.452
    dbe_nez  float32 4B 1.046
    dbz_nez  float32 4B -0.7833
    glat     float32 4B 18.62
    glon     float32 4B 72.87
Attributes:
    description:  For the ground magnetometer data we gratefully acknowledge:...


In [9]:
print(ds.data_vars)

Data variables:
    dbe_geo  (block, vector) float32 424MB dask.array<chunksize=(296940, 113), meta=np.ndarray>
    dbe_nez  (block, vector) float32 424MB dask.array<chunksize=(296940, 113), meta=np.ndarray>
    dbn_geo  (block, vector) float32 424MB dask.array<chunksize=(296940, 113), meta=np.ndarray>
    dbn_nez  (block, vector) float32 424MB dask.array<chunksize=(296940, 113), meta=np.ndarray>
    dbz_geo  (block, vector) float32 424MB dask.array<chunksize=(296940, 113), meta=np.ndarray>
    dbz_nez  (block, vector) float32 424MB dask.array<chunksize=(296940, 113), meta=np.ndarray>
    decl     (block, vector) float32 424MB dask.array<chunksize=(296940, 113), meta=np.ndarray>
    extent   (block) float64 4MB dask.array<chunksize=(527040,), meta=np.ndarray>
    glat     (block, vector) float32 424MB dask.array<chunksize=(296940, 113), meta=np.ndarray>
    glon     (block, vector) float32 424MB dask.array<chunksize=(296940, 113), meta=np.ndarray>
    id       (block, vector) <U3 1GB d

In [30]:

start_time = {"time_yr": 2024, "time_mo": 1, "time_dy": 1, "time_hr": 0, "time_mt": 0}  # Start: Jan 1, 2025, 00:00
end_time = {"time_yr": 2024, "time_mo": 1, "time_dy": 7, "time_hr": 0, "time_mt": 0}    # End: Jan 2, 2025, 00:00

# Step 1: Select variables of interest
selected_vars = ds[["dbn_nez", "dbe_nez", "dbz_nez", "glat", "glon"]]


time_mask = (
    (ds["time_yr"] >= start_time["time_yr"]) & (ds["time_yr"] <= end_time["time_yr"]) &
    (ds["time_mo"] >= start_time["time_mo"]) & (ds["time_mo"] <= end_time["time_mo"]) &
    (ds["time_dy"] >= start_time["time_dy"]) & (ds["time_dy"] <= end_time["time_dy"]) 
    # (ds["time_hr"] >= start_time["time_hr"]) & (ds["time_hr"] <= end_time["time_hr"]) &
    # (ds["time_mt"] >= start_time["time_mt"]) & (ds["time_mt"] <= end_time["time_mt"])
).compute()

# Step 3: Apply the time mask
selected_data = selected_vars.where(time_mask, drop=True)
# Step 4: Compute the selection (load into memory)
final_data = selected_data.sel(vector=0).compute()

# Step 5: Print the result
print(final_data)

<xarray.Dataset> Size: 202kB
Dimensions:  (block: 10080)
Dimensions without coordinates: block
Data variables:
    dbn_nez  (block) float32 40kB 4.957 4.903 4.831 4.864 ... 8.531 8.304 8.188
    dbe_nez  (block) float32 40kB 2.935 2.933 2.732 ... -2.593 -2.682 -2.68
    dbz_nez  (block) float32 40kB -2.558 -2.558 -2.559 ... -3.162 -3.26 -3.263
    glat     (block) float32 40kB 43.25 43.25 43.25 43.25 ... 43.25 43.25 43.25
    glon     (block) float32 40kB 76.92 76.92 76.92 76.92 ... 76.92 76.92 76.92
Attributes:
    description:  For the ground magnetometer data we gratefully acknowledge:...


In [None]:
'''
load geomagnetic data of corresponding year
for earthquake in catalog of corresponding year:
    for i in range(0, num_stations):
        get "dbn_nez", "dbe_nez", "dbz_nez", "glat", "glon" for time period before earthquake
        if earthquake lat lon is in range 200km:
            save "dbn_nez", "dbe_nez", "dbz_nez" as sample to dataframe


'''