# HP Processing Pipeline - DOD Template Based

Process radar files and output to DOD-compliant NetCDF using template header structure.

**Workflow:**
0. Make netcdf from DOD
1. Provide DOD template NetCDF file path
2. Provide radar input file path  
3. Process radar data (classify, grid)
4. Write output using DOD template header with processed data

In [None]:
# Import Required Libraries
import numpy as np
import xarray as xr
from pathlib import Path
import sys
import act

# Import local hp_processing module
import hp_processing

## 0. Create NetCDF From DOD

Once this is done, we will not run this part.

In [None]:
# Define DOD parameters for xprecipradarhp-c1-1.3
proc = 'xprecipradarhp.c1'
version = '1.3'

# Set dimensions as per DOD spec
set_dims = {
    'time': 1,  # UNLIMITED, but we process one time slice
    'y': 160,
    'x': 160,
    'bound': 2
}

DOD_FILL_VALUE = -9999.0
scalar_fill_dim = 'time'  # For scalar variables with time dimension

print(f"Creating DOD dataset: {proc} {level} v{version}")
print(f"Dimensions: {set_dims}")

dod_ds = act.io.create_ds_from_arm_dod(
    proc, 
    set_dims, 
    version=version,
    fill_value=DOD_FILL_VALUE,
    scalar_fill_dim=scalar_fill_dim,
    local_file=False
)


Creating DOD dataset: xprecipradarhp.c1 c1 v1.3
Dimensions: {'time': 1, 'y': 160, 'x': 160, 'bound': 2}
✓ DOD dataset created successfully
  Dimensions: {'time': 1, 'y': 160, 'x': 160}
  Variables: ['base_time', 'time_offset', 'corrected_reflectivity', 'hp_fhc', 'hp_ssc', 'lowest_height', 'lat', 'lon', 'radar_lat', 'radar_lon', 'radar_alt']
  Coordinates: ['time', 'y', 'x']
✓ DOD dataset created successfully
  Dimensions: {'time': 1, 'y': 160, 'x': 160}
  Variables: ['base_time', 'time_offset', 'corrected_reflectivity', 'hp_fhc', 'hp_ssc', 'lowest_height', 'lat', 'lon', 'radar_lat', 'radar_lon', 'radar_alt']
  Coordinates: ['time', 'y', 'x']


  print(f"  Dimensions: {dict(dod_ds.dims)}")


In [None]:
# Save DOD template to compressed NetCDF4 file. This is locale desktop path.
dod_file_path = '/Users/bhupendra/projects/sail/vaps/hp_proc/dod_template.nc'

# Convert xarray to NetCDF4 format with compression
encoding = {}
for var in dod_ds.data_vars:
    encoding[var] = {
        'zlib': True,
        'complevel': 4,
        'dtype': dod_ds[var].dtype
    }

dod_ds.to_netcdf(dod_file_path, encoding=encoding, format='NETCDF4')
print(f"✓ DOD template saved: {dod_file_path}")
print(f"  File size: {Path(dod_file_path).stat().st_size / 1024:.1f} KB")
print(f"  Variables: {list(dod_ds.data_vars.keys())}")
print(f"  Dimensions: {dict(dod_ds.dims)}")

✓ DOD template saved: /Users/bhupendra/projects/sail/vaps/hp_proc/dod_template.nc
  File size: 68.0 KB
  Variables: ['base_time', 'time_offset', 'corrected_reflectivity', 'hp_fhc', 'hp_ssc', 'lowest_height', 'lat', 'lon', 'radar_lat', 'radar_lon', 'radar_alt']
  Dimensions: {'time': 1, 'y': 160, 'x': 160}


  print(f"  Dimensions: {dict(dod_ds.dims)}")


## 1. Load DOD Template

Use xarray to load the DOD template structure (no need for custom function).

## 2. Process Radar Data

Use hp_processing module to classify and grid radar data.

## 3. Define Helper Function

Extract processed data from gridded dataset.

In [None]:
def process_radar_file(radar_file, season='summer'):
    """
    Process radar file and return processed data dict with:
    - corrected_reflectivity: (time, y, x)
    - hp_fhc_summer or hp_fhc_winter: (time, y, x)
    - hp_semisupervised: (time, y, x)
    - lowest_height: (time, y, x)
    - lat: (y, x)
    - lon: (y, x)
    - radar_lat, radar_lon, radar_alt: scalars
    """
    print(f"Reading {radar_file}...")
    radar = hp_processing.process_radar(radar_file, season)
    
    print("Gridding and extracting data...")
    data_ds = hp_processing.make_squire_grid(radar)
    
    # Determine which hp_fhc field to use based on season
    hp_fhc_field = f'hp_fhc_{season}'
    
    # Extract lowest vertical level
    dbz = data_ds['corrected_reflectivity'].values
    if dbz.ndim == 2:
        dbz = np.expand_dims(dbz, axis=0)  # Add time dimension (1, y, x)
    
    hp_fhc = data_ds[hp_fhc_field].values if hp_fhc_field in data_ds else np.full((1, 160, 160), -9999, dtype=np.int16)
    if hp_fhc.ndim == 2:
        hp_fhc = np.expand_dims(hp_fhc, axis=0)
    
    hp_ssc = data_ds['hp_semisupervised'].values if 'hp_semisupervised' in data_ds else np.full((1, 160, 160), -9999, dtype=np.int16)
    if hp_ssc.ndim == 2:
        hp_ssc = np.expand_dims(hp_ssc, axis=0)
    
    # Get lat/lon coordinates
    lat = data_ds['lat'].values if 'lat' in data_ds else np.full((160, 160), -9999)
    lon = data_ds['lon'].values if 'lon' in data_ds else np.full((160, 160), -9999)
    
    return {
        'corrected_reflectivity': dbz,
        'hp_fhc': hp_fhc,
        'hp_ssc': hp_ssc,
        'lat': lat,
        'lon': lon,
        'radar_lat': float(data_ds.attrs.get('radar_latitude', -9999)),
        'radar_lon': float(data_ds.attrs.get('radar_longitude', -9999)),
        'radar_alt': float(data_ds.attrs.get('radar_altitude', -9999)),
    }

## 4. Define Output Writer

Write processed data to DOD-compliant NetCDF using xarray.

In [None]:
def write_output_with_dod_template(dod_template, processed_data, output_file):
    """
    Write processed radar data to NetCDF using DOD template structure.
    Uses xarray for cleaner handling.
    """
    # Create a copy of the template
    output_ds = dod_template.copy(deep=True)
    
    # Update with processed data
    if 'corrected_reflectivity' in output_ds:
        output_ds['corrected_reflectivity'].values = processed_data['corrected_reflectivity']
    
    if 'hp_fhc' in output_ds:
        output_ds['hp_fhc'].values = processed_data['hp_fhc'].astype(output_ds['hp_fhc'].dtype)
    
    if 'hp_ssc' in output_ds:
        output_ds['hp_ssc'].values = processed_data['hp_ssc'].astype(output_ds['hp_ssc'].dtype)
    
    if 'lat' in output_ds:
        output_ds['lat'].values = processed_data['lat']
    
    if 'lon' in output_ds:
        output_ds['lon'].values = processed_data['lon']
    
    if 'radar_lat' in output_ds:
        output_ds['radar_lat'].values[0] = processed_data['radar_lat']
    
    if 'radar_lon' in output_ds:
        output_ds['radar_lon'].values[0] = processed_data['radar_lon']
    
    if 'radar_alt' in output_ds:
        output_ds['radar_alt'].values[0] = processed_data['radar_alt']
    
    # Save to NetCDF with compression
    encoding = {}
    for var in output_ds.data_vars:
        encoding[var] = {
            'zlib': True,
            'complevel': 4,
        }
    
    output_ds.to_netcdf(output_file, encoding=encoding, format='NETCDF4')
    print(f"✓ Output saved to: {output_file}")
    
    return output_ds

## 5. MAIN EXECUTION - Configuration

Edit these variables with your actual file paths:

In [8]:
# EDIT THESE THREE VARIABLES
dod_template_file = "/ccsopen/home/braut/projects/xprecipradarhp_vap_proc/dod_template.nc"  # DOD template NetCDF file
radar_input_file = "/gpfs/wolf2/arm/atm124/world-shared/gucxprecipradarcmacS2.c1/ppi/202201/gucxprecipradarcmacppiS2.c1.20220107.200317.nc"     # Radar input file
output_file = "/ccsopen/home/braut/projects/xprecipradarhp_vap_proc/output_test.nc"              # Output file path
season = "winter"                                # "summer" or "winter"

### Step 1: Load DOD Template

In [None]:
# Load DOD template using xarray
dod_template = xr.open_dataset(dod_template_file)
print(f"✓ DOD template loaded from {Path(dod_template_file).name}")
print(f"  Dimensions: {dict(dod_template.dims)}")
print(f"  Variables: {list(dod_template.data_vars.keys())}")
print(f"  Coordinates: {list(dod_template.coords.keys())}")

✓ Header loaded from dod_template.nc


RuntimeError: NetCDF: Not a valid ID

### Step 2: Process Radar File

In [None]:
processed_data = process_radar_file(radar_input_file, season)
print(f"✓ Radar processing complete")
print(f"  corrected_reflectivity shape: {processed_data['corrected_reflectivity'].shape}")
print(f"  hp_fhc shape: {processed_data['hp_fhc'].shape}")
print(f"  hp_ssc shape: {processed_data['hp_ssc'].shape}")
print(f"  lowest_height shape: {processed_data['lowest_height'].shape}")
print(f"  lat shape: {processed_data['lat'].shape}")
print(f"  radar location: ({processed_data['radar_lat']}, {processed_data['radar_lon']}, {processed_data['radar_alt']}m)")

### Step 3: Write Output Using DOD Template

In [None]:
output_ds = write_output_with_dod_template(dod_template, processed_data, output_file)
print(f"✓ Output NetCDF created: {output_file}")
print(f"  Dimensions: {dict(output_ds.dims)}")
print(f"  Variables: {list(output_ds.data_vars.keys())}")

## 6. Verify Output File

In [None]:
print("\n" + "="*60)
print("OUTPUT FILE VERIFICATION")
print("="*60)

verify_ds = xr.open_dataset(output_file)
print(f"\nFile: {output_file}")
print(f"\nDimensions: {dict(verify_ds.dims)}")
print(f"\nVariables:")
for var_name in verify_ds.data_vars:
    var = verify_ds[var_name]
    print(f"  {var_name}: {var.shape} ({var.dtype})")

print(f"\nGlobal attributes (first 5):")
for attr in list(verify_ds.attrs.keys())[:5]:
    print(f"  {attr}: {verify_ds.attrs[attr]}")

verify_ds.close()
print("\n✓ Output file structure verified")