In [1]:
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Set global variables
PROJECT = r"Z:\Laboral\World Bank\Paper - Child mortality and Climate Shocks"
OUTPUTS = rf"{PROJECT}\Outputs"
DATA = rf"{PROJECT}\Data"
DATA_IN = rf"{DATA}\Data_in"
DATA_PROC = rf"{DATA}\Data_proc"
DATA_OUT = rf"{DATA}\Data_out"

## Variable definitions

- Drought: Monthly cumulative precipitation **2.5 standard deviations below** the monthly historical average, considering 30 years to estimate historical averages and SD
- Excessive Rain: Monthly cumulative precipitation **2.5 standard deviations above** the monthly historical average, considering 30 years to estimate historical averages and SD
- Heatwave: More than 6 days with temperature **over 40 degrees** and 2.5 standard deviations higher than monthly historical average, considering 30 years to estimate historical averages and SD. Furthermore, **warm spell** duration index is higher than 6 days.
- Coldwave: More than 6 days with temperature **below 0 degrees** and 2.5 standard deviations lower than monthly historical average, considering 30 years to estimate historical averages and SD. Furthermore, **cold spell** duration index is higher than 6 days.
  ​

In [159]:
# Load the data
precipitation_ds = xr.open_dataset(rf"{DATA_IN}\Climate Data\timeseries-pr-monthly-mean_cru_monthly_cru-ts4.06-timeseries_mean_1901-2021.nc")
frostdays_ds = xr.open_dataset(rf"{DATA_IN}\Climate Data\timeseries-fd-monthly-mean_era_monthly_era5-0.5x0.5-timeseries_mean_1950-2020.nc")
heatdays_ds = xr.open_dataset(rf"{DATA_IN}\Climate Data\timeseries-hd40-monthly-mean_era_monthly_era5-0.5x0.5-timeseries_mean_1950-2020.nc")
coldspell_ds = xr.open_dataset(rf"{DATA_IN}\Climate Data\timeseries-csdi-annual-mean_era_annual_era5-0.5x0.5-timeseries_mean_1950-2020.nc")
warmspell_ds = xr.open_dataset(rf"{DATA_IN}\Climate Data\timeseries-wsdi-annual-mean_era_annual_era5-0.5x0.5-timeseries_mean_1950-2020.nc")

In [160]:
# Set datetimes to int
frostdays_ds["timeseries-fd-monthly-mean"] = (frostdays_ds["timeseries-fd-monthly-mean"] - np.timedelta64(1, 'D')).astype(int)
heatdays_ds["timeseries-hd40-monthly-mean"] = (heatdays_ds["timeseries-hd40-monthly-mean"] - np.timedelta64(1, 'D')).astype(int)
coldspell_ds["timeseries-csdi-annual-mean"] = (coldspell_ds["timeseries-csdi-annual-mean"] - np.timedelta64(1, 'D')).astype(int)
warmspell_ds["timeseries-wsdi-annual-mean"] = (warmspell_ds["timeseries-wsdi-annual-mean"] - np.timedelta64(1, 'D')).astype(int)

In [161]:
# Clean unused variables and dims
precipitation_ds = precipitation_ds.sel(bnds=0).drop(["bnds", "lon_bnds", "lat_bnds"])
frostdays_ds = frostdays_ds.sel(bnds=0).drop(["bnds", "lon_bnds", "lat_bnds"])
heatdays_ds = heatdays_ds.sel(bnds=0).drop(["bnds", "lon_bnds", "lat_bnds"])
coldspell_ds = coldspell_ds.sel(year=slice(1990, 2020)).sel(bnds=0).drop(["bnds", "lon_bnds", "lat_bnds"])
warmspell_ds = warmspell_ds.sel(year=slice(1990, 2020)).sel(bnds=0).drop(["bnds", "lon_bnds", "lat_bnds"])

In [5]:
# Standarize Variables
def standarize_monthly_dataset(ds, historical_timeframe=30, monthly_means=False):
    ds = ds.sel(time=slice(f'{2020-historical_timeframe}', '2020'))
    
    if monthly_means is False:    
        climatology_mean = ds.mean("time")
        climatology_std = ds.std("time")
    else:
        climatology_mean = ds.groupby("time.month").mean("time")
        climatology_std = ds.groupby("time.month").std("time")
        ds = ds.groupby("time.month")
        
    stand_anomalies = xr.apply_ufunc(
        lambda x, m, s: (x - m) / (s + 0.000000001), # Add a very small number to solve situations where std is zero
        ds,
        climatology_mean,
        climatology_std,
    )

    return stand_anomalies

std_precipitation = standarize_monthly_dataset(precipitation_ds, historical_timeframe=30, monthly_means=False)
std_heatdays = standarize_monthly_dataset(heatdays_ds, historical_timeframe=30, monthly_means=False)
std_frostdays = standarize_monthly_dataset(frostdays_ds, historical_timeframe=30, monthly_means=False)
std_m_precipitation = standarize_monthly_dataset(precipitation_ds, historical_timeframe=30, monthly_means=True).drop("month")
std_m_heatdays = standarize_monthly_dataset(heatdays_ds, historical_timeframe=30, monthly_means=True).drop("month")
std_m_frostdays = standarize_monthly_dataset(frostdays_ds, historical_timeframe=30, monthly_means=True).drop("month")

In [6]:
#### Construct indicators
# FIXME: heatdays/coldays are annual
precipitation = std_precipitation["timeseries-pr-monthly-mean"].rename("standarized_precipitation")
precipitation_m = std_m_precipitation["timeseries-pr-monthly-mean"].rename("standarized_precipitation_m")

climate_da = [precipitation, precipitation_m]
climate_data = xr.combine_by_coords(climate_da)

## Export data
climate_data.to_netcdf(rf"{DATA_OUT}/Climate_shocks.nc")