# Drainage basin area average

In this notebook we calculate the fractional area average of Australian drainage basins for all ensemble members.

This notebook uses a custom shapefile to describe the regions and make your custom mask. 

The outcome is to calculate each basin's ensemble median of the mean hazard value.

Data is to be sorted by the hazard value.

In [1]:
# navigate to correct working directory

In [2]:
cd /g/data/mn51/users/gt3409/plotting_maps/

/g/data/mn51/users/gt3409/plotting_maps


In [3]:
# import needed packages
from acs_area_statistics import acs_regional_stats, get_regions
import xarray as xr
import geopandas as gpd
import regionmask
import cartopy.crs as ccrs
from glob import glob

regions = get_regions(["nrm_regions", "australia"])

In [4]:
# get the individual models for GWL30-GWL12 anomalies only
var = "TXx"

filelist = glob(f"/g/data/ia39/ncra/heat/data/{var}/bias-corrected/individual_models/GWL-change/\
{var}_AGCD-05i_*_ssp370*_v1-r1-ACS-QME-AGCD-1960-2022_GWL30-GWL12-change.nc")


In [5]:
# read in the data for the areas to average across
gdf = gpd.read_file("/g/data/mn51/users/ah7841/NCBLevel2DrainageBasinGroup_gda2020_v01.gpkg")

#convert geometry to lat lon (from northings)
gdf.geometry = gdf.geometry.to_crs("EPSG:4326")

# There are duplicated of IDs. Merge geometries with the same IDs
gdf = gdf.dissolve(by="HydroID").reset_index()

# use the geopandas dataframe to make a regionmask object
# you will need to change the names, abbrevs and, name for your custom file. 
regions = regionmask.from_geopandas(gdf, 
                                    names= "Level2Name",
                                    abbrevs= "HydroID",
                                    name="NCBLevel2DrainageBasinGroup_gda2020_v01", 
                                    overlap=True)

In [6]:
# create your mask
# You can also use a keyword for the function to calculate the mask, but if you're performing multiple calculations, this can be slow.

In [7]:
%%time
# You may bneed to request lots of memory
# For this example, make sure you request more than 24GB memory
# calculate weighted mask this is a very slow part of the code and
# can be reused for any datasets using the same regions and the same lat lon
ds = xr.open_dataset(filelist[0], use_cftime = True,)
mask_frac = regions.mask_3D_frac_approx(ds)

CPU times: user 57.7 s, sys: 37.2 s, total: 1min 34s
Wall time: 1min 38s


In [8]:
# calculate the area averages for each drainage basin

dict_means = {}
for file in filelist:
    ensemble_member = "-".join([file.split("/")[-1].split("_")[i] for i in (5,6,2,4)])
    ds =  xr.open_dataset(file, use_cftime = True,)
    ds = ds.expand_dims(dim= {"ensemble_member":[ensemble_member]}, axis=0)
    df = acs_regional_stats(ds=ds,
                                var=var, 
                                mask=mask_frac, 
                                dims = ("lat", "lon",), 
                                how = ["mean"])
    # Round to two decimal places (eg for temperature anomalies)
    df[f"{var}_mean"] = df[f"{var}_mean"].round(2)

    dict_means.update({ensemble_member:df})
# merge each ensemble member into one xarray dataset
ds_means = xr.merge([dict_means[key] for key in dict_means.keys()])

# calculate the ensemble median per region
ens_median = ds_means.median(dim="ensemble_member").expand_dims(dim= {"ensemble_member":["ensemble_median"]}, axis=0)
# then add it to the xr dataset
ds_means = xr.merge([ds_means, ens_median])

# present the data as a simple table
df_means = xr.merge([ds_means, ens_median]).to_dataframe()[f"{var}_mean"].unstack().T

df_means["ID"] = gdf.HydroID
df_means["name"] = gdf.Level2Name
df_means = df_means[["ID", "name"] + list(df_means.keys()[0:-2])]

# Rank regions by ensemble median
df_means_sorted = df_means.sort_values(by = "ensemble_median", ascending=False)
df_means_sorted

ensemble_member,ID,name,BOM-BARPA-R-ACCESS-CM2-r4i1p1f1,BOM-BARPA-R-ACCESS-ESM1-5-r6i1p1f1,BOM-BARPA-R-CESM2-r11i1p1f1,BOM-BARPA-R-CMCC-ESM2-r1i1p1f1,BOM-BARPA-R-EC-Earth3-r1i1p1f1,BOM-BARPA-R-MPI-ESM1-2-HR-r1i1p1f1,BOM-BARPA-R-NorESM2-MM-r1i1p1f1,CSIRO-CCAM-v2203-SN-ACCESS-CM2-r4i1p1f1,CSIRO-CCAM-v2203-SN-ACCESS-ESM1-5-r6i1p1f1,CSIRO-CCAM-v2203-SN-CESM2-r11i1p1f1,CSIRO-CCAM-v2203-SN-CMCC-ESM2-r1i1p1f1,CSIRO-CCAM-v2203-SN-CNRM-ESM2-1-r1i1p1f2,CSIRO-CCAM-v2203-SN-EC-Earth3-r1i1p1f1,ensemble_median
region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
113,61116964,SALT LAKE,2.78,2.29,2.83,2.02,2.20,2.84,3.09,2.55,2.60,2.96,1.80,2.32,1.86,2.55
112,61116963,NULLARBOR,2.90,2.28,2.72,1.98,2.13,2.96,3.07,2.50,2.38,2.87,1.90,2.73,2.13,2.50
131,80908403,GASCOYNE RIVER,2.55,1.86,3.13,1.93,2.22,2.67,2.80,2.54,2.50,3.14,1.80,2.21,1.86,2.50
111,61116962,GAIRDNER,2.47,2.22,3.02,2.09,2.03,2.65,2.74,2.18,2.67,2.84,2.04,3.06,2.13,2.47
119,70411056,MOORE-HILL RIVERS,2.65,2.49,2.96,2.05,2.04,2.71,2.80,2.47,2.25,2.76,1.45,2.09,1.58,2.47
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2,12117853,WATER PARK CREEK,1.58,1.24,2.79,1.17,1.03,1.67,2.04,0.51,1.76,1.27,0.71,0.95,1.49,1.27
98,31286329,KING ISAND,2.58,1.05,2.28,1.21,0.85,2.24,1.92,1.22,1.62,1.14,1.17,1.43,0.62,1.22
17,12117868,STRADBROKE ISLAND,1.22,1.60,2.14,0.89,1.15,1.46,0.82,0.14,0.20,1.99,1.19,0.27,1.64,1.19
45,19999998,WHITSUNDAY ISLANDS,,,,,,,,,,,,,,


In [9]:
df_means_sorted.to_csv(f"/g/data/mn51/users/gt3409/plotting_maps/{var}_NCBLevel2DrainageBasinGroup-gda2020-v01_MME_ssp370_v1-r1-ACS-QME-AGCD-1960-2022_GWL30-GWL12-change.csv")
