# Combining verification results

* This notebook combines verification results on individual initializaitons for visualization.
* The notebook runs with `verif_config_6h.yml` in the same folder.

In [1]:
import os
import sys
import yaml
import argparse
from glob import glob
from datetime import datetime, timedelta

import numpy as np
import xarray as xr

sys.path.insert(0, os.path.realpath('../libs/'))
import verif_utils as vu
import score_utils as su

### Load config

In [2]:
config_name = os.path.realpath('verif_config_6h.yml')

with open(config_name, 'r') as stream:
    conf = conf = yaml.safe_load(stream)

### RMSE and ACC

In [3]:
model_names = ['wxformer', 'fuxi', 'IFS']
IND_max = 2*(365+366+365)
INDs = np.arange(0, IND_max+100, 100)
INDs[-1] = IND_max

**Check NaNs**

In [6]:
VERIF = {}

for model_name in ['wxformer', 'fuxi', 'IFS']:
    # file names to load
    verif_lead_range = conf[model_name]['verif_lead_range']
    path_RMSE_verif = conf[model_name]['save_loc_verif']+'combined_rmse_{:04d}_{:04d}_{:03d}h_{:03d}h_{}.nc'

    # file names to save
    path_RMSE_save = conf[model_name]['save_loc_verif']+'RMSE_{:03d}h_{:03d}h_{}.nc'
    
    # load xarray.Dataset and merge all verified days
    RMSE_verif = []

    for i, ind_start in enumerate(INDs[:-1]):
        ind_end = INDs[i+1]
    
        filename = path_RMSE_verif.format(ind_start, ind_end, verif_lead_range[0], verif_lead_range[-1], model_name)
        ds_verf_temp = xr.open_dataset(filename)
        RMSE_verif.append(ds_verf_temp)
    
    # merge by concat
    ds_RMSE_verif = xr.concat(RMSE_verif, dim='days')

    # save to one dictionary for some checking
    VERIF['{}_RMSE'.format(model_name)] = ds_RMSE_verif

# # if see NaN, find its indices
for model_name in ['wxformer', 'fuxi', 'IFS']:
    for var_name in ['U500', 'V500', 'T500', 'Q500', 'Z500', 't2m', 'SP']:
        test = np.mean(np.array(VERIF[f'{model_name}_RMSE'][var_name]), axis=1)
        ind_found = np.argwhere(np.isnan(test))
    
        if len(ind_found) > 0:

            filename_OURS = sorted(glob(conf[model_name]['save_loc_gather']+'*.nc'))
        
            year_range = conf[model_name]['year_range']
            years_pick = np.arange(year_range[0], year_range[1]+1, 1).astype(str)
            filename_OURS = [fn for fn in filename_OURS if any(year in fn for year in years_pick)]
    
            for i in range(len(ind_found)):
                ind_check = ind_found[i][0]
                print('bad file: {}'.format(filename_OURS[ind_check]))

print('Checking completed')

Checking completed


**Save RMSE as netCDF**

In [7]:
VERIF = {}

for model_name in model_names:
    # file names to load
    verif_lead_range = conf[model_name]['verif_lead_range']
    path_RMSE_verif = conf[model_name]['save_loc_verif']+'combined_rmse_{:04d}_{:04d}_{:03d}h_{:03d}h_{}.nc'

    # file names to save
    path_RMSE_save = conf[model_name]['save_loc_verif']+'RMSE_{:03d}h_{:03d}h_{}.nc'
    
    # load xarray.Dataset and merge all verified days
    RMSE_verif = []

    for i, ind_start in enumerate(INDs[:-1]):
        ind_end = INDs[i+1]
        filename = path_RMSE_verif.format(ind_start, ind_end, verif_lead_range[0], verif_lead_range[-1], model_name)
        ds_verf_temp = xr.open_dataset(filename)
        RMSE_verif.append(ds_verf_temp)
    
    # merge by concat
    ds_RMSE_verif = xr.concat(RMSE_verif, dim='days')

    # save to one dictionary for some checking
    VERIF['{}_RMSE'.format(model_name)] = ds_RMSE_verif

    # save to nc
    save_name_RMSE = path_RMSE_save.format(verif_lead_range[0], verif_lead_range[-1], model_name)
    ds_RMSE_verif.to_netcdf(save_name_RMSE)
    print('Save to {}'.format(save_name_RMSE))

Save to /glade/campaign/cisl/aiml/ksha/CREDIT_arXiv/VERIF/verif_6h/wxformer_6h/RMSE_006h_240h_wxformer.nc
Save to /glade/campaign/cisl/aiml/ksha/CREDIT_arXiv/VERIF/verif_6h/fuxi_6h/RMSE_006h_240h_fuxi.nc
Save to /glade/campaign/cisl/aiml/ksha/CREDIT_arXiv/VERIF/verif_6h/IFS/RMSE_006h_240h_IFS.nc


**Save RMSE as netCDF**

In [8]:
for model_name in model_names:
    # file names to load
    verif_lead_range = conf[model_name]['verif_lead_range']
    path_ACC_verif = conf[model_name]['save_loc_verif']+'combined_acc_{:04d}_{:04d}_{:03d}h_{:03d}h_{}.nc'

    # file names to save
    path_ACC_save = conf[model_name]['save_loc_verif']+'ACC_{:03d}h_{:03d}h_{}.nc'
    
    # load xarray.Dataset and merge all verified days
    ACC_verif = []

    for i, ind_start in enumerate(INDs[:-1]):
        ind_end = INDs[i+1]
        filename = path_ACC_verif.format(ind_start, ind_end, verif_lead_range[0], verif_lead_range[-1], model_name)
        ds_verf_temp = xr.open_dataset(filename)
        ACC_verif.append(ds_verf_temp)
    
    # merge by concat
    ds_ACC_verif = xr.concat(ACC_verif, dim='days')

    # save to one dictionary for some checking
    VERIF['{}_ACC'.format(model_name)] = ds_ACC_verif
    
    # save to nc
    save_name_ACC = path_ACC_save.format(verif_lead_range[0], verif_lead_range[-1], model_name)
    ds_ACC_verif.to_netcdf(save_name_ACC)
    print('Save to {}'.format(save_name_ACC))

Save to /glade/campaign/cisl/aiml/ksha/CREDIT_arXiv/VERIF/verif_6h/wxformer_6h/ACC_006h_240h_wxformer.nc
Save to /glade/campaign/cisl/aiml/ksha/CREDIT_arXiv/VERIF/verif_6h/fuxi_6h/ACC_006h_240h_fuxi.nc
Save to /glade/campaign/cisl/aiml/ksha/CREDIT_arXiv/VERIF/verif_6h/IFS/ACC_006h_240h_IFS.nc


**Prepare data for visualizaiton**

In [9]:
path_clim = conf['ERA5_weatherbench']['save_loc_clim']+'combined_rmse_clim_2020_2022.nc'
clim_scores = xr.open_dataset(path_clim)

model_names = ['wxformer', 'fuxi', 'IFS']
varnames_plot = ['U500', 'V500', 'T500', 'Q500', 'Z500', 't2m', 'SP']
N_boost = 2

PLOT_data = {}

for var in varnames_plot:
    for model_name in model_names:
        # =============================================================================================== #
        # RMSE
        np_RMSE = np.array(VERIF['{}_RMSE'.format(model_name)][var])
        N_samples = int(N_boost*len(np_RMSE))

        mean_score, ci_lower, ci_upper = su.bootstrap_confidence_intervals(np_RMSE, 
                                                                           num_bootstrap_samples=N_samples, 
                                                                           lower_quantile=0.05, 
                                                                           upper_quantile=0.95)
        
        PLOT_data['RMSE_{}_{}_mean'.format(model_name, var)] = mean_score
        PLOT_data['RMSE_{}_{}_95p'.format(model_name, var)] = ci_upper
        PLOT_data['RMSE_{}_{}_05p'.format(model_name, var)] = ci_lower

        # =============================================================================================== #
        # ACC
        np_ACC = np.array(VERIF['{}_ACC'.format(model_name)][var])
        N_samples = int(N_boost*len(np_ACC))
        
        mean_score, ci_lower, ci_upper = su.bootstrap_confidence_intervals(np_ACC, 
                                                                           num_bootstrap_samples=N_samples, 
                                                                           lower_quantile=0.05, 
                                                                           upper_quantile=0.95)
        
        PLOT_data['ACC_{}_{}_mean'.format(model_name, var)] = mean_score
        PLOT_data['ACC_{}_{}_95p'.format(model_name, var)] = ci_upper
        PLOT_data['ACC_{}_{}_05p'.format(model_name, var)] = ci_lower

    # save RMSE clim
    rmse_clim = np.array(clim_scores[var]).mean()
    PLOT_data['RMSE_clim_{}_mean'.format(var)] = rmse_clim

# Save
save_name = conf['qsub']['plot_data_loc'] + 'scores_CREDIT_arXiv_2024_fuxi_wxformer.npy'
np.save(save_name, PLOT_data)
print(f'Save to {save_name}')

Save to /glade/campaign/cisl/aiml/ksha/CREDIT_arXiv/PLOT_data/scores_CREDIT_arXiv_2024_fuxi_wxformer.npy


### Zonal energy spectrum

**Save ZES as netCDF**

In [12]:
VERIF = {}
lead_names =[24, 120, 240]

for model_name in model_names:

    # file name to open
    path_ZES_verif = conf[model_name]['save_loc_verif']+'combined_zes_{:04d}_{:04d}_{}_lead{}.nc'
    # file names to save
    path_ZES_save = conf[model_name]['save_loc_verif']+'ZES_{:03d}h_{}.nc'
    # verified lead times
    for lead_name in lead_names:
        ZES_verif = []
        # load xarray.Dataset and merge all verified days
        for i, ind_start in enumerate(INDs[:-1]):
            ind_end = INDs[i+1]
            filename = path_ZES_verif.format(ind_start, ind_end, model_name, lead_name)
            ds_verf_temp = xr.open_dataset(filename)
            ZES_verif.append(ds_verf_temp)
    
        # merge by concat
        ds_ZES_verif = xr.concat(ZES_verif, dim='time')

        # save to one dictionary for some checking
        VERIF['{}_ZES_{}'.format(model_name, lead_name)] = ds_ZES_verif

        # save to nc
        save_name_ZES = path_ZES_save.format(lead_name, model_name)
        ds_ZES_verif.to_netcdf(save_name_ZES)
        print('Save to {}'.format(save_name_ZES))

Save to /glade/campaign/cisl/aiml/ksha/CREDIT_arXiv/VERIF/verif_6h/wxformer_6h/ZES_024h_wxformer.nc
Save to /glade/campaign/cisl/aiml/ksha/CREDIT_arXiv/VERIF/verif_6h/wxformer_6h/ZES_120h_wxformer.nc
Save to /glade/campaign/cisl/aiml/ksha/CREDIT_arXiv/VERIF/verif_6h/wxformer_6h/ZES_240h_wxformer.nc
Save to /glade/campaign/cisl/aiml/ksha/CREDIT_arXiv/VERIF/verif_6h/fuxi_6h/ZES_024h_fuxi.nc
Save to /glade/campaign/cisl/aiml/ksha/CREDIT_arXiv/VERIF/verif_6h/fuxi_6h/ZES_120h_fuxi.nc
Save to /glade/campaign/cisl/aiml/ksha/CREDIT_arXiv/VERIF/verif_6h/fuxi_6h/ZES_240h_fuxi.nc
Save to /glade/campaign/cisl/aiml/ksha/CREDIT_arXiv/VERIF/verif_6h/IFS/ZES_024h_IFS.nc
Save to /glade/campaign/cisl/aiml/ksha/CREDIT_arXiv/VERIF/verif_6h/IFS/ZES_120h_IFS.nc
Save to /glade/campaign/cisl/aiml/ksha/CREDIT_arXiv/VERIF/verif_6h/IFS/ZES_240h_IFS.nc


**Pack ERA5 ZES & prepare data for visualizaiton**

In [13]:
path_clim = conf['ERA5_weatherbench']['save_loc_clim']+'combined_zes_clim.nc'
clim_scores = xr.open_dataset(path_clim)

model_names = ['wxformer', 'fuxi', 'IFS']
#varnames_plot = ['U500', 'V500', 'T500', 'Q500', 'Z500', 't2m', 'SP']
#N_boost = 2

PLOT_data = {}

for model_name in model_names:
    for lead_name in lead_names:
        # =============================================================================================== #
        # RMSE
        np_theta = np.array(VERIF['{}_ZES_{}'.format(model_name, lead_name)]['theta'])
        np_u = np.array(VERIF['{}_ZES_{}'.format(model_name, lead_name)]['U500'])
        np_v = np.array(VERIF['{}_ZES_{}'.format(model_name, lead_name)]['V500'])
        KE = 0.5*(np_u + np_v)

        PLOT_data['ZES_{}_{}_theta'.format(model_name, lead_name)] = np.mean(np_theta, axis=0)
        PLOT_data['ZES_{}_{}_KE'.format(model_name, lead_name)] = np.mean(KE, axis=0)

# save RMSE clim

np_theta = np.array(clim_scores['theta'])
np_u = np.array(clim_scores['U500'])
np_v = np.array(clim_scores['V500'])
KE = 0.5*(np_u + np_v)

PLOT_data['ZES_ERA5_theta'] = np.mean(np_theta, axis=0)
PLOT_data['ZES_ERA5_KE'] = np.mean(KE, axis=0)

# Save
save_name = conf['qsub']['plot_data_loc'] + 'ZES_CREDIT_arXiv_2024_fuxi_wxformer.npy'
np.save(save_name, PLOT_data)
print(f'Save to {save_name}')

Save to /glade/campaign/cisl/aiml/ksha/CREDIT_arXiv/PLOT_data/ZES_CREDIT_arXiv_2024_fuxi_wxformer.npy


### Spatial correlation

**6 hourly ERA5 coorelation**

In [14]:
IND_max_6h = 4*(365+366+365)
INDs_6h = np.arange(0, IND_max_6h+50, 50)
INDs_6h[-1] = IND_max_6h

In [15]:
VERIF = {}

# =========================================================== #
# 6 hourly ERA5
ERA5_corr = []
for i, ind_start in enumerate(INDs_6h[:-1]):
    verif_ind_start = ind_start
    verif_ind_end = INDs_6h[i+1]
    filename = conf['ERA5_ours']['save_loc_verif']+'spatial_corr_{:04d}_{:04d}_6h_ERA5.nc'.format(
        verif_ind_start, verif_ind_end)
    ERA5_corr.append(xr.open_dataset(filename))
    
ERA5_corr_verif = xr.concat(ERA5_corr, dim='day')
ERA5_corr_mean = np.mean(ERA5_corr_verif['correlation'].values, axis=0)
VERIF['ERA5_6h'] = ERA5_corr_mean

np.save(conf['qsub']['plot_data_loc'] + 'CORR_CREDIT_arXiv_2024_ERA5.npy', VERIF)

**6 hourly FuXi and wxformer correlation**

In [10]:
model_names = ['fuxi', 'wxformer']

In [13]:
IND_max_6h = 2*(365+366+365)
INDs_6h = np.arange(0, IND_max_6h+50, 50)
INDs_6h[-1] = IND_max_6h

In [14]:
VERIF = {}
for model_name in model_names:
    model_corr = []
    for i, ind_start in enumerate(INDs_6h[:-1]):
        # ========================== #
        verif_ind_start = ind_start
        verif_ind_end = INDs_6h[i+1]
        filename = conf[model_name]['save_loc_verif']+'spatial_corr_{:04d}_{:04d}_240h_{}.nc'.format(
            verif_ind_start, verif_ind_end, model_name)
        model_corr.append(xr.open_dataset(filename))
        
    model_corr_verif = xr.concat(model_corr, dim='day')
    model_corr_mean = np.mean(model_corr_verif['correlation'].values, axis=0)
    VERIF[model_name] = model_corr_mean
    np.save(conf['qsub']['plot_data_loc'] + 'CORR_CREDIT_arXiv_2024_models.npy', VERIF)