# Drainage basin area average

In this notebook we calculate the fractional area average of Australian drainage basins for all ensemble members.

The outcome is to calculate the ensemble median of the mean hazard value for each basin.

Data is to be sorted by the hazard value.

In [1]:
# navigate to correct working directory

In [2]:
cd /g/data/mn51/users/gt3409/plotting_maps/

/g/data/mn51/users/gt3409/plotting_maps


In [3]:
# import needed packages
from acs_area_statistics import acs_regional_stats, regions
import xarray as xr
import geopandas as gpd
import regionmask
import cartopy.crs as ccrs
from glob import glob

In [4]:
# get the individual models for GWL30-GWL12 anomalies only
var = "TXx"

filelist = glob(f"/g/data/ia39/ncra/heat/data/{var}/bias-corrected/individual_models/GWL-change/\
{var}_AGCD-05i_*_ssp370*_v1-r1-ACS-QME-AGCD-1960-2022_GWL30-GWL12-change.nc")


In [5]:
# read in the data for the areas to average across
gdf = gpd.read_file("/g/data/mn51/users/ah7841/NCBLevel2DrainageBasinGroup_gda2020_v01.gpkg")

#convert geometry to lat lon (from northings)
gdf.geometry = gdf.geometry.to_crs("EPSG:4326")

# There are duplicated of IDs. Merge geometries with the same IDs
gdf = gdf.dissolve(by="HydroID")

# use the geopandas dataframe to make a regionmask object
regions = regionmask.from_geopandas(gdf.reset_index(), 
                                    names= "Level2Name",
                                    abbrevs= "HydroID",
                                    name="NCBLevel2DrainageBasinGroup_gda2020_v01", 
                                    overlap=True)

  as_dt = pd.to_datetime(df[k], errors="ignore")
  as_dt = pd.to_datetime(df[k], errors="ignore")


In [6]:
# create your mask
# You can also use a keyword for the function to calculate the mask, but if you're performing multiple calculations, this can be slow.

In [7]:
%%time
# calculate weighted mask this is a very slow part of the code and
# can be reused for any datasets using the same regions and the same lat lon
ds = xr.open_dataset(filelist[0], use_cftime = True,)
mask_frac = regions.mask_3D_frac_approx(ds)

CPU times: user 57 s, sys: 10 s, total: 1min 7s
Wall time: 1min 13s


In [8]:
# calculate the area averages for each drainage basin

dict_means = {}
for file in filelist:
    ensemble_member = "-".join([file.split("/")[-1].split("_")[i] for i in (5,6,2,4)])
    ds =  xr.open_dataset(file, use_cftime = True,)
    ds = ds.expand_dims(dim= {"ensemble_member":[ensemble_member]}, axis=0)
    df = acs_regional_stats(ds=ds,
                                var=var, 
                                mask=mask_frac, 
                                dims = ("lat", "lon",), 
                                how = ["mean"])
    dict_means.update({ensemble_member:df})
# merge each ensemble member into one xarray dataset
ds_means = xr.merge([dict_means[key] for key in dict_means.keys()])

# calculate the ensemble median per region
ens_median = ds_means.median(dim="ensemble_member").expand_dims(dim= {"ensemble_member":["ensemble_median"]}, axis=0)
# then add it to the xr dataset
ds_means = xr.merge([ds_means, ens_median])

# present the data as a simple table
df_means = xr.merge([ds_means, ens_median]).to_dataframe()[f"{var}_mean"].unstack().T

# Rank regions by ensemble median
df_means_sorted = df_means.sort_values(by = "ensemble_median", ascending=False)

ensemble_member,BOM-BARPA-R-ACCESS-CM2-r4i1p1f1,BOM-BARPA-R-ACCESS-ESM1-5-r6i1p1f1,BOM-BARPA-R-CESM2-r11i1p1f1,BOM-BARPA-R-CMCC-ESM2-r1i1p1f1,BOM-BARPA-R-EC-Earth3-r1i1p1f1,BOM-BARPA-R-MPI-ESM1-2-HR-r1i1p1f1,BOM-BARPA-R-NorESM2-MM-r1i1p1f1,CSIRO-CCAM-v2203-SN-ACCESS-CM2-r4i1p1f1,CSIRO-CCAM-v2203-SN-ACCESS-ESM1-5-r6i1p1f1,CSIRO-CCAM-v2203-SN-CESM2-r11i1p1f1,CSIRO-CCAM-v2203-SN-CMCC-ESM2-r1i1p1f1,CSIRO-CCAM-v2203-SN-CNRM-ESM2-1-r1i1p1f2,CSIRO-CCAM-v2203-SN-EC-Earth3-r1i1p1f1,ensemble_median
region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
113,2.777473,2.287747,2.826370,2.016964,2.199661,2.838504,3.085597,2.546352,2.604925,2.959156,1.796164,2.316178,1.858334,2.546352
112,2.899780,2.281566,2.716180,1.982643,2.127458,2.956862,3.072969,2.498098,2.379517,2.867806,1.903590,2.731863,2.130787,2.498098
131,2.550825,1.855355,3.128977,1.929803,2.223995,2.674351,2.795648,2.544012,2.496535,3.144141,1.795141,2.209961,1.858626,2.496535
119,2.651293,2.494416,2.960712,2.054279,2.037197,2.708307,2.795595,2.473882,2.254951,2.763330,1.454892,2.089825,1.576199,2.473882
111,2.466865,2.219083,3.016505,2.093789,2.031129,2.654076,2.736684,2.176746,2.674183,2.844550,2.044865,3.059814,2.130639,2.466865
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2,1.584196,1.236819,2.792983,1.173601,1.032565,1.673929,2.041197,0.513228,1.757747,1.271970,0.712468,0.948062,1.491762,1.271970
98,2.584032,1.049837,2.281479,1.205284,0.848929,2.242733,1.920146,1.215421,1.622323,1.140940,1.173360,1.426915,0.622041,1.215421
17,1.220248,1.597188,2.141106,0.892829,1.153510,1.462078,0.817142,0.143178,0.203784,1.989218,1.191936,0.270618,1.639675,1.191936
45,,,,,,,,,,,,,,


In [11]:
df_means_sorted.to_csv(f"/g/data/mn51/users/gt3409/plotting_maps/{var}_NCBLevel2DrainageBasinGroup-gda2020-v01_MME_ssp370_v1-r1-ACS-QME-AGCD-1960-2022_GWL30-GWL12-change.csv")
