#### Before running this notebook, please make sure to download and unpack the FTS dataset:
The Forecaster Test Set contains both the test sequences and the corresponding forecasts for the three models (GPTCast8x8, GPTCast16x16, and Linda) that are used in the paper.
- run the `download_data.py` script in the `data` directory.
```bash
cd data
python download_data.py
```
- unpack the downloaded `fts.tar` file in the `data` directory.
```bash
cd data
tar -xvf fts.tar
```

In [1]:
import sys
sys.path.append('..')
import os

import pickle
from tqdm import tqdm
from multiprocessing import Pool

from glob import glob
import xarray as xr

from pysteps.verification.probscores import CRPS_init, CRPS_accum
import numpy as np
from tqdm import tqdm
from matplotlib import pyplot as plt


Pysteps configuration file found at: /home/gabriele/Documents/fbk/meteo/GPTCast/.venv/lib/python3.12/site-packages/pysteps/pystepsrc



In [2]:
def reflectivity_to_rainrate(arr: np.ndarray,
                             minmax: tuple = (-20, 60),
                             a: float = 200.0,
                             b: float = 1.6):
    """
    Input is 0 - 60 reflectivity value (dbZ)
        
    Output is mm/h rain rate
    """
    Z = 10.0 ** (arr / 10.0)  # wradlib.trafo.idecibel
    rr = (Z / a) ** (1.0 / b)  # wradlib.zr.z_to_r
    rr[rr < 0.04] = 0.
    return rr

In [3]:
# available models are "gptcast_16x16", "gptcast_8x8" and "linda"
mod_name = 'gptcast_8x8'
# mod_name = 'gptcast_16x16'
# mod_name = 'linda'

input_data_path = f'../data/fts/{mod_name}/'
output_data_path = f'../data/verification_fts/{mod_name}/'


In [4]:
# Fix thresholds, scales and preds times
pred_times = list(range(5, 125, 5))
crps_times = [5, 15, 30, 60, 90, 120]
idx_crps_times = [i for i in range(len(pred_times)) if pred_times[i] in crps_times]
# pred_times

min_lat = 0
max_lat = 256
min_lon = 0
max_lon = 256
output_fn = 'tables_verification_crps.pkl'

In [5]:
crps_times, idx_crps_times

([5, 15, 30, 60, 90, 120], [0, 2, 5, 11, 17, 23])

In [6]:
# loop over all available verification sequences and calculate scores
file_list = glob(input_data_path + '*.nc')
file_list.sort()
len(file_list)

197

In [7]:
def compute_crps_table_by_lead_time_for_file(fname):
    np.random.seed(seed=44)
    nc_file = xr.open_dataset(fname)
    nc_file = nc_file.sel(height=slice(min_lat,max_lat), width=slice(min_lon,max_lon)) 
    ens = reflectivity_to_rainrate(nc_file.ensemble.data)
    obs = reflectivity_to_rainrate(nc_file.observation.data)
    tables = [CRPS_init() for _ in range(ens.shape[1])]
    for i in range(ens.shape[1]):
        CRPS_accum(tables[i], ens[:, i], obs[i])
        # CRPS_accum(tables[i], obs[i][np.newaxis, ...], obs[i])
    return tables


In [8]:
# Calculating rank hist table for each selected lead time
with Pool(30) as p:
    tables = list(tqdm(p.imap(compute_crps_table_by_lead_time_for_file, file_list), total=len(file_list)))

100%|██████████| 197/197 [02:03<00:00,  1.59it/s]


In [9]:
for table in tables[1:]:
    for i in range(len(table)):
        tables[0][i]['CRPS_sum'] += table[i]['CRPS_sum']
        tables[0][i]['n'] += table[i]['n']
tables = tables[0]

In [10]:
os.makedirs(output_data_path, exist_ok=True)
out_file = open(os.path.join(output_data_path, output_fn), "wb")
pickle.dump(tables, out_file)
out_file.close()