# Drainage basin area average

In this notebook we calculate the fractional area average of Australian drainage basins for all ensemble members.

The outcome is to calculate the ensemble median of the mean hazard value for each basin.

Data is to be sorted by the hazard value.

In [1]:
# navigate to correct working directory

In [2]:
cd /g/data/mn51/users/gt3409/plotting_maps/

/g/data/mn51/users/gt3409/plotting_maps


In [3]:
# import needed packages
from acs_area_statistics import acs_regional_stats, regions
import xarray as xr
import geopandas as gpd
import regionmask
import cartopy.crs as ccrs
from glob import glob

In [4]:
# get the individual models for GWL30-GWL12 anomalies only
var = "TXx"

filelist = glob(f"/g/data/ia39/ncra/heat/data/{var}/bias-corrected/individual_models/GWL-change/\
{var}_AGCD-05i_*_ssp370*_v1-r1-ACS-QME-AGCD-1960-2022_GWL30-GWL12-change.nc")


In [5]:
# read in the data for the areas to average across
gdf = gpd.read_file("/g/data/mn51/users/ah7841/NCBLevel2DrainageBasinGroup_gda2020_v01.gpkg")

#convert geometry to lat lon (from northings)
gdf.geometry = gdf.geometry.to_crs("EPSG:4326")

# There are duplicated of IDs. Merge geometries with the same IDs
gdf = gdf.dissolve(by="HydroID").reset_index()

# use the geopandas dataframe to make a regionmask object
regions = regionmask.from_geopandas(gdf, 
                                    names= "Level2Name",
                                    abbrevs= "HydroID",
                                    name="NCBLevel2DrainageBasinGroup_gda2020_v01", 
                                    overlap=True)

  as_dt = pd.to_datetime(df[k], errors="ignore")
  as_dt = pd.to_datetime(df[k], errors="ignore")


In [6]:
# create your mask
# You can also use a keyword for the function to calculate the mask, but if you're performing multiple calculations, this can be slow.

In [None]:
%%time
# make sure you request more than 24GB memory
# calculate weighted mask this is a very slow part of the code and
# can be reused for any datasets using the same regions and the same lat lon
ds = xr.open_dataset(filelist[0], use_cftime = True,)
mask_frac = regions.mask_3D_frac_approx(ds)

In [None]:
# calculate the area averages for each drainage basin

dict_means = {}
for file in filelist:
    ensemble_member = "-".join([file.split("/")[-1].split("_")[i] for i in (5,6,2,4)])
    ds =  xr.open_dataset(file, use_cftime = True,)
    ds = ds.expand_dims(dim= {"ensemble_member":[ensemble_member]}, axis=0)
    df = acs_regional_stats(ds=ds,
                                var=var, 
                                mask=mask_frac, 
                                dims = ("lat", "lon",), 
                                how = ["mean"])
    # Round to two decimal places (eg for temperature anomalies)
    df[f"{var}_mean"] = df[f"{var}_mean"].round(2)

    dict_means.update({ensemble_member:df})
# merge each ensemble member into one xarray dataset
ds_means = xr.merge([dict_means[key] for key in dict_means.keys()])

# calculate the ensemble median per region
ens_median = ds_means.median(dim="ensemble_member").expand_dims(dim= {"ensemble_member":["ensemble_median"]}, axis=0)
# then add it to the xr dataset
ds_means = xr.merge([ds_means, ens_median])

# present the data as a simple table
df_means = xr.merge([ds_means, ens_median]).to_dataframe()[f"{var}_mean"].unstack().T

df_means["ID"] = gdf.HydroID
df_means["name"] = gdf.Level2Name
df_means = df_means[["ID", "name"] + list(df_means.keys()[0:-2])]

# Rank regions by ensemble median
df_means_sorted = df_means.sort_values(by = "ensemble_median", ascending=False)
df_means_sorted

In [None]:
df_means_sorted.to_csv(f"/g/data/mn51/users/gt3409/plotting_maps/{var}_NCBLevel2DrainageBasinGroup-gda2020-v01_MME_ssp370_v1-r1-ACS-QME-AGCD-1960-2022_GWL30-GWL12-change.csv")
