# Compute the Budyko precipitation recycling ratio

In [63]:
import os
import glob
import netCDF4 as nc

import matplotlib as mpl
import matplotlib.colors as colors
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from matplotlib import cm
import cartopy as cart
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import cartopy.mpl.ticker as cticker
from cartopy.util import add_cyclic_point
import seaborn as sns

import math
import numpy as np
import xarray as xr
import cftime
import pandas as pd

# Regridding
# import xesmf as xe


In [64]:
def find_dl(lat, lons):
        """
        Find average length of 1 grid cell - convert lat to radians, take cos +
        divide by Earth's circumference (m)
        """
        earth_circ = (40075*10**3)  # Earth's circumference in m
        lon_shape = len([lons])
        lat_rad = np.radians(abs(lat))
        dl = math.cos(lat_rad)*(earth_circ/lon_shape)  # length of pixel in m
        return(dl)

def lon180(ds):
    ds.coords['lon'] = (ds.coords['lon'] + 180) % 360 - 180
    ds = ds.sortby(ds.lon)
    return ds

def get_pixel_size(lat, lon):
    if lat[0] > lat[-1]:
        temp_lat = lat[::-1]
    else:
        temp_lat = lat
    r = 6.371*1e6
    rad = (2*math.pi/360)  # (m)
    da = np.nan * np.zeros((len(temp_lat)))      # (m2)

    for i in range(len(temp_lat)-1):
        da[i] = (2*math.pi * (1/len(lon)) *
                 r**2*(math.sin(rad*temp_lat[i+1]) -
                       math.sin(rad*temp_lat[i])))

    # Check if top and bottom latitude are same
    if temp_lat[0] == -temp_lat[-1]:
        da[-1] = da[0]
    return(da)

In [65]:
data_path = "F:/Data/LUMIP"


In [66]:
model = "ACCESS-ESM1-5"
q1 = xr.open_dataset(os.path.join(data_path + "/" + model + "/" + "hus_Amon_" + model + "_ssp370_r1i1p1f1_gn_201501-210012.nc"), chunks={"time": 258})["hus"]  # Specific humidity (kg/kg)
u1 = xr.open_dataset(os.path.join(data_path + "/" + model + "/" + "ua_Amon_" + model + "_ssp370_r1i1p1f1_gn_201501-210012.nc"), chunks={"time": 258})["ua"]  # Zonal wind (m/s)
v1 = xr.open_dataset(os.path.join(data_path + "/" + model + "/" + "va_Amon_" + model + "_ssp370_r1i1p1f1_gn_201501-210012.nc"), chunks={"time": 258})["va"]  # Meridional wind (m/s)


FileNotFoundError: [Errno 2] No such file or directory: b'F:\\Data\\LUMIP\\CanESM5\\hus_Amon_CanESM5_ssp370_r1i1p1f1_gn_201501-210012.nc'

In [None]:
q2 = xr.open_dataset(os.path.join(data_path + "/" + model + "/" + "hus_Amon_" + model + "_ssp370-ssp126Lu_r1i1p1f1_gn_201501-210012.nc"), chunks={"time": 258})["hus"]  # Specific humidity (kg/kg)
u2 = xr.open_dataset(os.path.join(data_path + "/" + model + "/" + "ua_Amon_" + model + "_ssp370-ssp126Lu_r1i1p1f1_gn_201501-210012.nc"), chunks={"time": 258})["ua"]  # Zonal wind (m/s)
v2 = xr.open_dataset(os.path.join(data_path + "/" + model + "/" + "va_Amon_" + model + "_ssp370-ssp126Lu_r1i1p1f1_gn_201501-210012.nc"), chunks={"time": 258})["va"]  # Meridional wind (m/s)

In [None]:
data_path = "G:/My Drive/MPIM/data/"

scenario = 'ssp126Lu'
filepath = glob.glob(os.path.join(data_path + 'xr_' + scenario + "_" + model + '_pft.nc'))[0]                                       ## List of files sorted by name
xr_aff_pft = xr.open_dataset(filepath, drop_variables=["time_bnds","lon_bnds","lat_bnds"],engine = 'netcdf4',chunks={"time": 258})["treeFrac"]

scenario = 'ssp370'
filepath = glob.glob(os.path.join(data_path + 'xr_' + scenario + "_" + model + '_pft.nc'))[0]                                       ## List of files sorted by name
xr_ctl_pft = xr.open_dataset(filepath, drop_variables=["time_bnds","lon_bnds","lat_bnds"],engine = 'netcdf4',chunks={"time": 258})["treeFrac"]


In [None]:
data_path = "G:/My Drive/MPIM/data/"

scenario = 'ssp126Lu'
filepath = glob.glob(os.path.join(data_path + 'xr_' + scenario + "_" + model + '.nc'))[0]                                       ## List of files sorted by name
xr_aff = xr.open_dataset(filepath, drop_variables=["time_bnds","lon_bnds","lat_bnds"],engine = 'netcdf4',chunks={"time": 258})["evspsbl"]

scenario = 'ssp370'
filepath = glob.glob(os.path.join(data_path + 'xr_' + scenario + "_" + model + '.nc'))[0]                                       ## List of files sorted by name
xr_ctl = xr.open_dataset(filepath, drop_variables=["time_bnds","lon_bnds","lat_bnds"],engine = 'netcdf4',chunks={"time": 258})["evspsbl"]


In [None]:
xr_delta_pft = (xr_aff_pft.sel(time = slice("2071-01","2100-12")).mean(dim = "time") - xr_ctl_pft.sel(time = slice("2071-01","2100-12")).mean(dim = "time"))
treefrac_pos = xr_delta_pft.where(xr_delta_pft > 10)

In [None]:
# Evapotranspiration in Kg m-2 s-1
et = (xr_aff - xr_ctl)

In [None]:
q2 = q2.assign_coords(plev=q1.plev)
u2 = u2.assign_coords(plev=u1.plev)
v2 = v2.assign_coords(plev=v1.plev)

In [None]:
q = (q2 - q1)
u = (u2 - u1)
v = (v2 - v1)

q = q.sel(lat=slice(-61,90))
u = u.sel(lat=slice(-61,90))
v = v.sel(lat=slice(-61,90))

treefrac_pos = treefrac_pos.assign_coords(lat = q.lat); treefrac_pos = treefrac_pos.assign_coords(lon = q.lon)
et = et.assign_coords(lat = q.lat); et = et.assign_coords(lon = q.lon); et = et.assign_coords(time = q.time)


In [None]:
# Ensure that the datasets have matching dimensions
assert q.dims == u.dims == v.dims

# Pressure levels (Pa)
pressure = q.coords['plev']

# Calculate the zonal (qx) and meridional (qy) water vapor fluxes (kg/(m^2*s))
qx = q * u
qy = q * v

# Integrate vertically over all pressure levels
# Use the trapezoidal rule for integration
dp = np.diff(pressure)  # Difference between consecutive pressure levels

# Add an extra level at the top for integration purposes
dp = np.append(dp, dp[-1])

# Reshape dp to make it broadcastable across qx and qy
dp = dp.reshape((1, -1, 1, 1))

# Vertical integration
# Multiply each level flux by the corresponding pressure difference and integrate
qu = (qx * dp).sum(dim='plev') / 9.81  # Dividing by gravitational acceleration to convert to (kg/m^2/s)
qv = (qy * dp).sum(dim='plev') / 9.81

# Resulting vertically integrated water vapor fluxes
print(qu)
print(qv)


<xarray.DataArray (time: 1032, lat: 121, lon: 192)>
dask.array<truediv, shape=(1032, 121, 192), dtype=float64, chunksize=(258, 121, 192), chunktype=numpy.ndarray>
Coordinates:
  * time     (time) datetime64[ns] 2015-01-16T12:00:00 ... 2100-12-16T12:00:00
  * lat      (lat) float64 -60.62 -59.38 -58.12 -56.88 ... 86.88 88.12 89.38
  * lon      (lon) float64 0.9375 2.812 4.688 6.562 ... 353.4 355.3 357.2 359.1
<xarray.DataArray (time: 1032, lat: 121, lon: 192)>
dask.array<truediv, shape=(1032, 121, 192), dtype=float64, chunksize=(258, 121, 192), chunktype=numpy.ndarray>
Coordinates:
  * time     (time) datetime64[ns] 2015-01-16T12:00:00 ... 2100-12-16T12:00:00
  * lat      (lat) float64 -60.62 -59.38 -58.12 -56.88 ... 86.88 88.12 89.38
  * lon      (lon) float64 0.9375 2.812 4.688 6.562 ... 353.4 355.3 357.2 359.1


In [None]:
pixel_size_grid = get_pixel_size(qu.lat.values, qu.lon.values)
pixel_size_grid = np.array([pixel_size_grid]*len(qu.lon)).transpose()

In [None]:
qu = qu.compute()
qv = qv.compute()
et = et.compute()

In [None]:
# Compute the operation on the cells with at least 10% of increased treeFrac in the 2070-2100 period
cells = treefrac_pos.stack(cell = ["lon","lat"]).where(treefrac_pos.stack(cell = ["lon","lat"]).notnull(), drop = True).cell 
grids = cells.values
res_y = (treefrac_pos.lat[1] - treefrac_pos.lat[0]).values
res_x = (treefrac_pos.lon[1] - treefrac_pos.lon[0]).values

qu_stack = qu.stack(cell = ["lon","lat"])
qv_stack = qv.stack(cell = ["lon","lat"])
et_stack = et.stack(cell = ["lon","lat"])

In [None]:
xr_B["B"] = xr.DataArray(data=None, coords=[qu_stack.time,qu_stack.cell], dims=["time","cell"])
xr_B["Rr"] = xr.DataArray(data=None, coords=[qu_stack.time,qu_stack.cell], dims=["time","cell"])

for i,c in enumerate(cells):
    locator = {'cell':c}
    g = grids[i]

    # Lon & Lat indices 
    ilon = np.where(qu.lon == g[0])[0][0]
    ilat = np.where(qu.lat == g[1])[0][0]

    lonmin = g[0] - res_x/2
    lonmax = g[0] + res_x/2
    latmin = g[1] - res_y/2
    latmax = g[1] + res_y/2

    # Find average grid cell length on E, W, N and S sides of domain in m

    dl_e = find_dl(np.mean((latmin, latmax)), lonmax)
    dl_w = find_dl(np.mean((latmin, latmax)), lonmin)
    dl_n = find_dl(latmax, np.mean((lonmin, lonmax)))
    dl_s = find_dl(latmin, np.mean((lonmin, lonmax)))

    # Calculate inward moisture flux in kg s-1
    # Get water vapour flux for each pixel along transect (kg m-1 s-1) and
    # multiply by pixel lenth (m) to get units of kg s-1
    # i.e. for each pixel change units to kg s-1

    inflow_from_E = -dl_e*qu_stack.loc[locator]
    inflow_from_W = dl_w*qu_stack.loc[locator]
    inflow_from_N = -dl_n*qv_stack.loc[locator]
    inflow_from_S = dl_s*qv_stack.loc[locator]

    # Initialize flux_in and flux_out with zeros
    flux_in = xr.zeros_like(inflow_from_E)
    flux_out = xr.zeros_like(inflow_from_E)

    # Calculate total water vapour flux into the cell
    # Calculate flux_in and flux_out using vectorized operations
    flux_in += inflow_from_E.where(inflow_from_E > 0, 0)
    #flux_out += abs(inflow_from_E.where(inflow_from_E < 0, 0))

    flux_in += inflow_from_W.where(inflow_from_W > 0, 0)
    #flux_out += abs(inflow_from_W.where(inflow_from_W < 0, 0))

    flux_in += inflow_from_N.where(inflow_from_N > 0, 0)
    #flux_out += abs(inflow_from_N.where(inflow_from_N < 0, 0))

    flux_in += inflow_from_S.where(inflow_from_S > 0, 0)
    #flux_out += abs(inflow_from_S.where(inflow_from_S < 0, 0))

    # Calculate Budyko recycling coefficient
    A = pixel_size_grid[ilat,ilon]
    EA = et_stack.loc[locator] * A

    B = 1 + (EA.values/(2*flux_in.values))
    Rr = 1 - (1/B)

    xr_B["B"].loc[locator] =  xr.DataArray(data = B)
    xr_B["Rr"].loc[locator] =  xr.DataArray(data = Rr)

xr_B = xr_B.unstack()
xr_B["B"] = xr_B["B"].astype(np.float64)
xr_B["Rr"] = xr_B["Rr"].astype(np.float64)

# Save and export regression list data
import pickle
data_path = "G:/My Drive/MPIM/data/"

with open(os.path.join(data_path+"budyko_pr_"+model), "wb") as fp:   #Pickling
    pickle.dump(xr_B, fp)