Extract the ground truth x=3D measurements from the `inflow-bc-3D` planes. Assume that `preprocess_train_data/learn_plane_directly/01_*.ipynb` has already been run so that the inflow data has been reformatted from AMR-Wind style to netCDF.

In [1]:
from datetime import datetime
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pathlib import Path
import xarray as xr

In [2]:
### Point to data
gt_dir = Path('/scratch/orybchuk/wakedynamics/bcs-ldm/data/072415/post_processing/nc_dimensioned')
gt_files = list(gt_dir.glob('inflow-bc-3D*'))
gt_files.sort()

out_dir = Path('/scratch/orybchuk/wakedynamics/bcs-ldm/simulations/072415/large_campaigns/precursor/postprocessing/wide_ground_truth')
out_dir.mkdir(exist_ok=True)

In [3]:
### Helpful parameters
## Read one upsampled 3D file
##   We want this file to grab its time axis
up_dir = Path('/scratch/orybchuk/wakedynamics/bcs-ldm/simulations/072415/large_campaigns/precursor/postprocessing/upsampled')
f_up = Path(up_dir, '3D_0000.nc')
ds_up = xr.open_dataset(f_up)
time_up = ds_up['time']
print("Preparing to downsample GT data to the same time axis as lidar measurements.")
print("WARNING! I assume that lidar sample 0000 uses the same time axis as all other lidar samples.")

## Lidar params
z_lidar = 120.0  # m
# ylo_3D = ds_up['y'].values.min()  # m
# yhi_3D = ds_up['y'].values.max()   # m
ylo_3D = -160
yhi_3D = 150

Preparing to downsample GT data to the same time axis as lidar measurements.


In [4]:
### Extract data
for icampaign, fcampaign in enumerate(gt_files):
    if icampaign % 25 == 0: print(datetime.now(), icampaign, '...')
    campaign_id = fcampaign.name[12:16]
    
    # Load ground truth data
    ds_gt = xr.open_dataset(fcampaign)
    
    # Convert the GT time axis to datetime format
    time_gt = ds_gt['time'].values
    time_datetime = np.array(pd.to_datetime(time_gt, unit='s'))  # We use this same process in 01.ipynb
    ds_gt['time'] = time_datetime
    
    # Downsample the GT data to the lidar time axis + spatial extents
    y_mid = ds_gt['y'].values[len(ds_gt['y'])//2]  # TODO: double check. Shift by one dx?
    y_lo, y_hi = y_mid+ylo_3D, y_mid+yhi_3D
    ds_gt = ds_gt.sel(time=time_up, z=z_lidar, y=slice(y_lo, y_hi)).isel(x=0)
    
    # Save
    ds_gt.to_netcdf(Path(out_dir, f'gt_{campaign_id}.nc'))

2024-06-12 09:03:24.828463 0 ...
2024-06-12 09:04:01.633421 25 ...
2024-06-12 09:04:33.893776 50 ...
2024-06-12 09:05:08.609690 75 ...
2024-06-12 09:05:42.037963 100 ...
2024-06-12 09:06:16.746714 125 ...
2024-06-12 09:06:49.543166 150 ...
2024-06-12 09:07:21.823148 175 ...
2024-06-12 09:07:54.244709 200 ...
2024-06-12 09:08:26.572951 225 ...
2024-06-12 09:08:59.637231 250 ...
2024-06-12 09:09:31.729764 275 ...
2024-06-12 09:10:05.707349 300 ...
2024-06-12 09:10:43.306493 325 ...


In [5]:
ds_gt