# FFDI and Thresholding

code is setup to process the intial bias_input_data into FFDI and thresholding.

The FFDI code uses the zarr stores of the individual simulations which requires the rechunking

Thresholding uses the FFDI files of the individual simulations

Setup to provide both average value for a GWL for a given RCM and values for each year of a GWL

In [10]:
import sys
import os

import intake
import xarray as xr
from matplotlib import pyplot as plt
import glob
import pathlib
import traceback
from datetime import datetime
from xclim.indices import (
    keetch_byram_drought_index,
    griffiths_drought_factor,
    mcarthur_forest_fire_danger_index
)
from dask.distributed import Client        
import dask
import warnings

# Needed for the GWL code
from importlib import reload
# adding folder to the system path
sys.path.insert(0, '/g/data/xv83/rxm599/acs/gwls')

import gwl

In [11]:
def setup_dask_client():
# Set configuration options
    dask.config.set({
    'distributed.comm.timeouts.connect': '90s',  # Timeout for connecting to a worker
    'distributed.comm.timeouts.tcp': '90s',  # Timeout for TCP communications
    })
    client = Client()
    return client

# Start Cluster 
import dask
# Set configuration options
dask.config.set({
    'distributed.comm.timeouts.connect': '90s',  # Timeout for connecting to a worker
    'distributed.comm.timeouts.tcp': '90s',  # Timeout for TCP communications
})

client = Client()
#client

In [12]:
def extract_model_info(filepath):
    filename = filepath.split('/')[-1]  # Get the filename from the full path
    filename_no_ext = filename.split('.')[0]  # Remove the file extension
    parts = filename_no_ext.split('_')  # Split filename by underscores
#    print(parts)
    RCM=parts[1]
    GCM=parts[2]
    
    # Extract model name
    model_name = filename_no_ext
    extension = filename.split('.')[-1]  # Get the extension
    
    match = '_'.join(parts[1:3])  # Extract match (e.g., EC-Earth3_ssp370_r1i1p1f1)
    pathway = parts[3]  # Extract model (e.g., ssp370)
    ensemble = parts[4]  # Extract ensemble (e.g., r1i1p1f1)
    
    return [model_name, extension], RCM, GCM, ensemble, pathway

In [13]:
def calculate_ffdi(pra, tasmaxa, pr_annual, hursmina, sfcWindmaxa):
    KBDI = keetch_byram_drought_index(pra, tasmaxa, pr_annual)
    DF = griffiths_drought_factor(pra, KBDI)
    FFDI = mcarthur_forest_fire_danger_index(DF, tasmaxa, hursmina, sfcWindmaxa)
    return FFDI.to_dataset(name='FFDI')

In [14]:
def process_threshold(data, time_dim, syear, eyear, var, period):
# first set time slice
    start=str(syear)+'-01-01'
    end=str(eyear)+'-12-31'
    nyear=eyear-syear+1; inyear=1./nyear
    data = data.sel(**{time_dim: slice(start, end)}).persist() # use file chunking .persist()
    print(f"Processing period {period}")
# days over key thresholds (days/yr)
    d3 = (data > 100).sum('time').to_dataset(name='days100') *inyear
    d2 = (data > 75).sum('time').to_dataset(name='days75') *inyear
    d1 = (data > 50).sum('time').to_dataset(name='days50') *inyear
    dss=xr.merge([d1,d2,d3]) 
    return dss,data

In [15]:
def process_thresholda(data, time_dim, syear, eyear, var, period):
# first set time slice
    start=str(syear)+'-01-01'
    end=str(eyear)+'-12-31'
    nyear=eyear-syear+1; inyear=1./nyear
    data = data.sel(**{time_dim: slice(start, end)}).persist() # use file chunking .persist()
    print(f"Processing period {period}")
# days over key thresholds (days/yr)
#    d100 = data > 100 ; d100.groupby(time_dim.year).mean
    d3a=(data > 100 ).groupby('time.year').sum('time').to_dataset(name='days100')
    d2a=(data > 75 ).groupby('time.year').sum('time').to_dataset(name='days75')
    d1a=(data > 50 ).groupby('time.year').sum('time').to_dataset(name='days50')
    d3 = (data > 100).sum('time').to_dataset(name='days100') *inyear
    d2 = (data > 75).sum('time').to_dataset(name='days75') *inyear
    d1 = (data > 50).sum('time').to_dataset(name='days50') *inyear
    dss=xr.merge([d1,d2,d3]) 
    dsa=xr.merge([d1a,d2a,d3a]) 
    return dss,data,dsa

In [16]:
def process_GWL_levels(model_name, GCM, ensemble, pathway, ffdi_data):
    CMIP='CMIP6'
    GWL_levels = ['1.2', '1.5', '2.0', '3.0']
#    GWL_levels = ['1.2']
    for GWL in GWL_levels:
        print(f"Processing GWL {GWL} for model: {GCM}, ensemble: {ensemble}, pathway: {pathway}")
        try:
            start, end = gwl.get_GWL_syear_eyear(CMIP, GCM, ensemble, pathway,GWL= GWL)
        except:
            print(f"No GWL data found for {GWL}")
            d4=0
            continue
#        ffdi_data=xr.open_zarr('/g/data/ia39/ncra/fire/'+model_name+'_FFDI.zarr')
#        ffdi_data=xr.open_zarr('/scratch/xv83/rxm599/biascor/'+model_name+'_FFDI.zarr')
        ffdi_data=xr.open_zarr('/scratch/xv83/rxm599/ffdi/'+model_name+'_FFDI.zarr')
        d4, ddata, d4a = process_thresholda(ffdi_data.FFDI, 'time', start, end, 'FFDI', GWL)
#        d4, ddata = process_threshold(ffdi_data.FFDI, 'time', start, end, 'FFDI', GWL)
        output_path = f'/scratch/xv83/rxm599/tmp/{model_name}_GWL{GWL}_ffdi_threshold.nc'
        output_path2= f'/scratch/xv83/rxm599/tmp/{model_name}_GWL{GWL}_ffdi_thresholda.nc'
        output_path1 = f'/scratch/xv83/rxm599/tmp/{model_name}_GWL{GWL}_ffdi.zarr'
        print(f"Saved  to {output_path}")
        d4.to_netcdf(output_path)
        d4a.to_netcdf(output_path2)
#        ddata.to_zarr(output_path1)
    return d4

In [17]:
# Example code to read from command line if not interactive (just make the cell code)
def is_interactive():
    import __main__ as main
    return not hasattr(main, '__file__')


In [None]:
%%time
def main():
    warnings.filterwarnings('ignore')
# get RCMs to processes
    catalogue_path = '/g/data/ia39/catalogues/bias-output'
    mRuns = sorted(glob.glob(catalogue_path + '/*ssp126*.json'))
    #mRuns = sorted(glob.glob(catalogue_path + '/*his*.json'))
    print(len(mRuns))
# start dask client
    client = setup_dask_client()
# iterate through all modesl
    if is_interactive():
        print(f"Dashboard available at: {client.dashboard_link}")
    else:
        print(f"batch:  {client.dashboard_link}")
        
    print(mRuns)
    for mindex, file in enumerate(mRuns):
        print(file)
        model_name, RCM, GCM, ensemble, pathway = extract_model_info(file)
        var='FFDI'  # not used
        if mindex != 91:         #this model failed to convert
            print (mindex)
            d4=process_GWL_levels(model_name[0],GCM,ensemble,pathway,var)
    
    print("Processing of all catalogues is complete.")

    print(f"close client {client}")
    client.close()

if __name__ == "__main__":
    main()

In [None]:
exit

#dd=xr.open_zarr('/scratch/xv83/rxm599/nobiascor/AGCD-05i_BOM_EC-Earth3_ssp370_r1i1p1f1_BARPA-R_v1-r1_day.zarr')
dd1=xr.open_zarr('/scratch/xv83/rxm599/biascor/AGCD-05i_CSIRO_EC-Earth3_ssp370_r1i1p1f1_CCAM-v2203-SN_v1-r1-ACS-QME-BARRA-R2-1980-2022_day.zarr')

start1='2015'+'-01-01'
end1='2035'+'-12-31'
start2='2055'+'-01-01'
end2='2075'+'-12-31'
time_dim='time'
rhm=(dd1.hursmaxAdjust + dd1.hursminAdjust)*.5 
data1 = rhm.sel(**{time_dim: slice(start1, end1)}).persist() 
data2 = rhm.sel(**{time_dim: slice(start2, end2)}).persist() 
data=data2-data1

(data2.mean('time')- data1.mean('time')).plot()

plt.subplot(2,1,1); data2.mean('time').plot()
plt.subplot(2,1,2); data1.mean('time').plot()


client.close()