## How to run saved Sup3rWind models on ERA5 input data

#### The _proper_ way to run models, at least for large domains and / or time periods, is to modify the config templates in the `sup3rwind/examples/run_configs` directory and the execute the `sup3r -c config_pipeline.json pipeline` command. Here were show a simple interactive small run. The config below could be used to modify the `config_fwp_spatial.json` file. 

In [None]:
import os
import warnings

import matplotlib.pyplot as plt
from IPython.display import HTML
from matplotlib.animation import FuncAnimation
from rex import init_logger

from sup3r.models import MultiStepGan as GanModel
from sup3r.pipeline.forward_pass import ForwardPass
from sup3r.pipeline.strategy import ForwardPassStrategy
from sup3r.postprocessing import CollectorNC
from sup3r.preprocessing import DataHandler

os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
init_logger('sup3r', log_level='DEBUG')

<Logger sup3r (DEBUG)>

### Define the config for the run

#### To modify the config below for a production run over a large domain and time period the `shape` and `time_slice` parameters should be changed while keeping `fwp_chunk_shape` the same or similar. `max_nodes` can also be changed to run across multiple HPC nodes.

In [23]:
bc_dir = '/datasets/sup3rwind/bias_correction/ukraine'
model_dir = '/datasets/sup3rwind/models/sup3rwind_models_202407/'
exo_data_dir = '/datasets/sup3rwind/exogenous_data/ukraine'
era_data_dir = '/datasets/sup3rwind/era5/ukraine'

bc_kwargs = {
    f: {
        'feature_name': f,
        'bias_fp': f'{bc_dir}/{f}.h5',
        'out_range': [-100, 100],
        'temporal_avg': False,
    }
    for f in ['u_10m', 'v_10m', 'u_100m', 'v_100m', 'u_200m', 'v_200m']
}

config = {
    'pass_workers': 1,
    'input_handler_kwargs': {
        'time_slice': slice(
            0, 20
        ),  # time period to run (20 is a single chunk)
        'chunks': {'time': 100, 'south_north': 30, 'west_east': 30},
        'target': [
            43.0,
            22.0,
        ],  # (lat, lon) lower left coordinate of the domain to run.
        'shape': [30, 30],  # shape of domain to use (same as a single chunk)
    },
    'bias_correct_method': 'monthly_local_linear_bc',
    'bias_correct_kwargs': bc_kwargs,
    'max_nodes': 1,
    'input_handler_name': 'DataHandler',
    'file_paths': f'{era_data_dir}/era5_ukraine_2023.nc',
    'fwp_chunk_shape': [
        30,
        30,
        20,
    ],  # the full domain is split into chunks of this shape
    'model_class': 'MultiStepGan',
    'model_kwargs': {
        'model_dirs': [
            f'{model_dir}/sup3rwind_wind_step1_3x_1x_14f',
            f'{model_dir}/sup3rwind_wind_step2_5x_1x_14f',
        ]
    },
    'out_pattern': './output/ukraine_chunk_{file_id}.nc',  # where the chunks will be saved
    'spatial_pad': 5,  # spatial padding around each chunk
    'temporal_pad': 5,  # temporal padding around each chunk
    'exo_handler_kwargs': {
        'topography': {
            'chunks': {'south_north': 30, 'west_east': 30},
            'file_paths': f'{era_data_dir}/era5_ukraine_2023.nc',
            'source_files': f'{exo_data_dir}/ukraine_topo.nc',
        }
    },
}

### Load the model to check meta data

In [4]:
model = GanModel.load(config['model_kwargs']['model_dirs'])

INFO - 2025-08-01 08:15:38,694 [base.py:183] : Loading GAN from disk in directory: /datasets/sup3rwind/models/sup3rwind_models_202407//sup3rwind_wind_step1_3x_1x_14f
INFO - 2025-08-01 08:15:38,695 [base.py:189] : Active python environment versions: 
{   'cftime': '1.6.4',
    'dask': '2024.11.2',
    'h5netcdf': '1.3.0',
    'netCDF4': '1.6.4',
    'nrel-phygnn': '0.0.34.dev3+g4d6f44d.d20250605',
    'nrel-rex': '0.2.100',
    'numpy': '1.26.4',
    'pandas': '2.2.3',
    'python': '3.11.9 (main, Apr 19 2024, 16:48:06) [GCC 11.2.0]',
    'sklearn': '1.6.1',
    'sup3r': '0.2.3.dev175+g2dec721f.d20250505',
    'tensorflow': '2.15.1',
    'xarray': '2025.1.2'}
INFO - 2025-08-01 08:15:38,697 [abstract.py:385] : Loading model from disk that was created with the following package versions: 
{ 'cftime': '1.6.4',
  'dask': '2024.11.2',
  'h5netcdf': '1.3.0',
  'netCDF4': '1.6.4',
  'nrel-phygnn': '0.0.34.dev3+g4d6f44d.d20250605',
  'nrel-rex': '0.2.100',
  'numpy': '1.26.4',
  'pandas': '2.2.

2025-08-01 08:15:38.813785: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:274] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2025-08-01 08:15:38.814560: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:129] retrieving CUDA diagnostic information for host: kd1
2025-08-01 08:15:38.814569: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:136] hostname: kd1
2025-08-01 08:15:38.814707: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:159] libcuda reported version is: 545.23.8
2025-08-01 08:15:38.814744: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:163] kernel reported version is: 545.23.8
2025-08-01 08:15:38.814752: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:241] kernel version seems to match DSO: 545.23.8


INFO - 2025-08-01 08:15:44,520 [base.py:183] : Loading GAN from disk in directory: /datasets/sup3rwind/models/sup3rwind_models_202407//sup3rwind_wind_step2_5x_1x_14f
INFO - 2025-08-01 08:15:44,521 [base.py:189] : Active python environment versions: 
{   'cftime': '1.6.4',
    'dask': '2024.11.2',
    'h5netcdf': '1.3.0',
    'netCDF4': '1.6.4',
    'nrel-phygnn': '0.0.34.dev3+g4d6f44d.d20250605',
    'nrel-rex': '0.2.100',
    'numpy': '1.26.4',
    'pandas': '2.2.3',
    'python': '3.11.9 (main, Apr 19 2024, 16:48:06) [GCC 11.2.0]',
    'sklearn': '1.6.1',
    'sup3r': '0.2.3.dev175+g2dec721f.d20250505',
    'tensorflow': '2.15.1',
    'xarray': '2025.1.2'}
INFO - 2025-08-01 08:15:44,523 [abstract.py:385] : Loading model from disk that was created with the following package versions: 
{ 'cftime': '1.6.4',
  'dask': '2024.11.2',
  'h5netcdf': '1.3.0',
  'netCDF4': '1.6.4',
  'nrel-phygnn': '0.0.34.dev3+g4d6f44d.d20250605',
  'nrel-rex': '0.2.100',
  'numpy': '1.26.4',
  'pandas': '2.2.

### Double check what the model needs as input. 

------------------
Models can injest low-resolution data (`lr_features`), high-resolution "exogenous data" (`hr_exo_features`) like topography, and sparse observations (`obs_features`).

In [5]:
model.lr_features

['pressure_0m',
 'temperature_2m',
 'ie',
 'zust',
 'slhf',
 'd2m',
 'cape',
 'kx',
 'U_10m',
 'V_10m',
 'U_100m',
 'V_100m',
 'U_200m',
 'V_200m',
 'topography']

In [6]:
model.hr_exo_features

[['topography'], ['topography']]

In [7]:
model.obs_features

[]

#### Double check model output

In [8]:
model.hr_out_features

['U_10m',
 'V_10m',
 'U_40m',
 'V_40m',
 'U_80m',
 'V_80m',
 'U_100m',
 'V_100m',
 'U_120m',
 'V_120m',
 'U_160m',
 'V_160m',
 'U_200m',
 'V_200m']

### Initialize the forward pass strategy 
-------

#### *This is done automatically when running `sup3r -c config_pipeline.json pipeline` according to the parameters set in `config_fwp_spatial.json`

In [9]:
with warnings.catch_warnings():
    warnings.simplefilter('ignore')
    strat = ForwardPassStrategy(**config)

INFO - 2025-08-01 08:15:55,583 [utilities.py:125] : Initialized ForwardPassStrategy with:
{ 'allowed_const': None,
  'bias_correct_kwargs': { 'u_100m': { 'bias_fp': '/datasets/sup3rwind/bias_correction/ukraine/u_100m.h5',
                                       'feature_name': 'u_100m',
                                       'out_range': [-100, 100],
                                       'temporal_avg': False},
                           'u_10m': { 'bias_fp': '/datasets/sup3rwind/bias_correction/ukraine/u_10m.h5',
                                      'feature_name': 'u_10m',
                                      'out_range': [-100, 100],
                                      'temporal_avg': False},
                           'u_200m': { 'bias_fp': '/datasets/sup3rwind/bias_correction/ukraine/u_200m.h5',
                                       'feature_name': 'u_200m',
                                       'out_range': [-100, 100],
                                       'temporal_avg':

### Run forward pass for a single chunk

#### *This is also done automatically when running with `sup3r -c config_pipeline.json pipeline`, according to the parameters set in `config_fwp_spatial.json`. When this command is run something similar to the loop below will be run on N (`max_nodes`) nodes and the output will be saved to the location defined by `out_pattern`. 

In [11]:
with warnings.catch_warnings():
    warnings.simplefilter('ignore')

    # In this case there is only one node and one chunk
    fwp = ForwardPass(strat, node_index=0)
    for idx in strat.node_chunks[0]:
        chunk = fwp.get_input_chunk(chunk_index=idx)
        failed, out = fwp.run_chunk(
            chunk=chunk,
            model_kwargs=strat.model_kwargs,
            model_class=strat.model_class,
            allowed_const=strat.allowed_const,
            output_workers=strat.output_workers,
            invert_uv=strat.invert_uv,
            nn_fill=strat.nn_fill,
            meta=fwp.meta,
        )

INFO - 2025-08-01 08:20:13,715 [utilities.py:125] : Initialized ForwardPass with:
{ 'node_index': 0,
  'strategy': ForwardPassStrategy(file_paths=[ '/datasets/sup3rwind/era5/ukraine/era5_ukraine_2023.nc'],
                                  model_kwargs={ 'model_dirs': [ '/datasets/sup3rwind/models/sup3rwind_models_202407//sup3rwind_wind_step1_3x_1x_14f',
                                                                 '/datasets/sup3rwind/models/sup3rwind_models_202407//sup3rwind_wind_step2_5x_1x_14f']},
                                  fwp_chunk_shape=(30, 30, 20),
                                  spatial_pad=5,
                                  temporal_pad=5,
                                  min_width=(4, 4, 4),
                                  model_class='MultiStepGan',
                                  out_pattern='./output/uk_{file_id}.nc',
                                  input_handler_name='DataHandler',
                                  input_handler_kwargs={ 'chunks': {

### Collect the chunks into a single file

#### This isn't needed with only one chunk but we mention it for completeness. When multiple chunks are run the data collection step performs some QC and then stitches the chunks together. When running with `sup3r -c config_pipeline.json pipeline` this data collection is done automatically according to the parameters set in `config_collect_nc.json`

In [25]:
CollectorNC.collect(
    file_paths=strat.out_files, out_file='./output/ukraine_combined.nc'
)

INFO - 2025-08-01 08:40:46,686 [nc.py:78] : Initializing collection for file_paths=['./output/uk_000000_000000.nc']
INFO - 2025-08-01 08:40:46,687 [nc.py:89] : Collecting 1 files to ./output/ukraine_combined.nc
INFO - 2025-08-01 08:40:46,688 [base.py:70] : Loading features: all from files: ['./output/uk_000000_000000.nc']
INFO - 2025-08-01 08:40:46,787 [base.py:110] : Writing ['u_10m', 'v_10m', 'u_40m', 'v_40m', 'u_80m', 'v_80m', 'u_100m', 'v_100m', 'u_120m', 'v_120m', 'u_160m', 'v_160m', 'u_200m', 'v_200m'] to ./output/ukraine_combined.nc.tmp. Memory usage is 143.826 GB out of 269.755 GB
DEBUG - 2025-08-01 08:40:46,794 [base.py:526] : Adding latitude to ./output/ukraine_combined.nc.tmp with chunks=None
DEBUG - 2025-08-01 08:40:46,799 [base.py:526] : Adding longitude to ./output/ukraine_combined.nc.tmp with chunks=None
DEBUG - 2025-08-01 08:40:46,802 [base.py:526] : Adding time to ./output/ukraine_combined.nc.tmp with chunks=None
DEBUG - 2025-08-01 08:40:46,805 [base.py:526] : Adding u

#### Visualize output 

In [None]:
dh = DataHandler(
    file_paths='./output/ukraine_combined.nc', features='windspeed_100m'
)

fig, ax = plt.subplots(
    nrows=1,
    ncols=1,
    figsize=(5, 5),
    constrained_layout=True,
)

im = ax.imshow(
    dh['windspeed_100m'].isel(time=0).values,
    cmap='YlGnBu',
    origin='upper',
    clim=(0, 12),
    zorder=0,
)

fig.suptitle(
    'Wind speed over Carpathian Mountains (100m)',
    fontsize=12,
)

ax.set_xticks([])
ax.set_yticks([])
plt.colorbar(im, ax=ax, label='ws_100m', shrink=0.85)
ax.axis('off')

anim = FuncAnimation(
    fig,
    lambda frame: im.set_array(dh['windspeed_100m'].isel(time=frame).values),
    len(dh.time),
    interval=200,
    blit=False,
)

plt.close(fig)

HTML(anim.to_jshtml(fps=3, default_mode='once', embed_frames=True))

INFO - 2025-08-01 08:43:01,728 [utilities.py:125] : Initialized DataHandler with:
{ 'BaseLoader': None,
  'FeatureRegistry': None,
  'cache_kwargs': None,
  'chunks': 'auto',
  'features': 'windspeed_100m',
  'file_paths': './output/ukraine_combined.nc',
  'hr_spatial_coarsen': 1,
  'interp_kwargs': None,
  'load_features': 'all',
  'nan_method_kwargs': None,
  'res_kwargs': None,
  'shape': None,
  'target': None,
  'threshold': None,
  'time_roll': 0,
  'time_shift': None,
  'time_slice': slice(None, None, None)}
DEBUG - 2025-08-01 08:43:01,730 [utilities.py:129] : Memory usage is 145.839 GB out of 269.755 GB
INFO - 2025-08-01 08:43:01,730 [base.py:224] : ['windspeed_100m'] not found in cache
INFO - 2025-08-01 08:43:01,731 [base.py:70] : Loading features: all from files: ./output/ukraine_combined.nc
INFO - 2025-08-01 08:43:01,804 [base.py:61] : Rasterizing features: all from files: ['./output/ukraine_combined.nc']
INFO - 2025-08-01 08:43:01,805 [base.py:155] : Getting raster index fo