In [None]:
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Set global variables
PROJECT = r"Z:\Laboral\World Bank\Paper - Child mortality and Climate Shocks"
OUTPUTS = rf"{PROJECT}\Outputs"
DATA = rf"{PROJECT}\Data"
DATA_IN = rf"{DATA}\Data_in"
DATA_PROC = rf"{DATA}\Data_proc"
DATA_OUT = rf"{DATA}\Data_out"

## Variable definitions

- Drought: Monthly cumulative precipitation **2.5 standard deviations below** the monthly historical average, considering 30 years to estimate historical averages and SD
- Excessive Rain: Monthly cumulative precipitation **2.5 standard deviations above** the monthly historical average, considering 30 years to estimate historical averages and SD
- Heatwave: More than 6 days with temperature **over 40 degrees** and 2.5 standard deviations higher than monthly historical average, considering 30 years to estimate historical averages and SD. Furthermore, **warm spell** duration index is higher than 6 days.
- Coldwave: More than 6 days with temperature **below 0 degrees** and 2.5 standard deviations lower than monthly historical average, considering 30 years to estimate historical averages and SD. Furthermore, **cold spell** duration index is higher than 6 days.
  ​

In [None]:
# Load the data
precipitation_ds = xr.open_dataset(rf"{DATA_IN}\Climate Data\timeseries-pr-monthly-mean_cru_monthly_cru-ts4.06-timeseries_mean_1901-2021.nc")

In [None]:
# Clean unused variables and dims
precipitation_ds = precipitation_ds.sel(bnds=0).drop(["bnds", "lon_bnds", "lat_bnds"])

# Compute 3, 6 and 12 past months means
precipitation_ds_3 = precipitation_ds.rolling(time=3, center=False).mean()
precipitation_ds_6 = precipitation_ds.rolling(time=6, center=False).mean()
precipitation_ds_12 = precipitation_ds.rolling(time=12, center=False).mean()

In [None]:
# Standarize Variables
def standarize_monthly_dataset(ds, historical_timeframe=30, monthly_means=False):
    ds = ds.sel(time=slice(f'{2020-historical_timeframe}', '2020'))
    
    if monthly_means is False:    
        climatology_mean = ds.mean("time")
        climatology_std = ds.std("time")
    else:
        climatology_mean = ds.groupby("time.month").mean("time")
        climatology_std = ds.groupby("time.month").std("time")
        ds = ds.groupby("time.month")
        
    stand_anomalies = xr.apply_ufunc(
        lambda x, m, s: (x - m) / (s + 0.000000001), # Add a very small number to solve situations where std is zero
        ds,
        climatology_mean,
        climatology_std,
    )

    return stand_anomalies

std_precipitation = standarize_monthly_dataset(precipitation_ds, historical_timeframe=30, monthly_means=True)
std_precipitation_3 = standarize_monthly_dataset(precipitation_ds_3, historical_timeframe=30, monthly_means=True)
std_precipitation_6 = standarize_monthly_dataset(precipitation_ds_6, historical_timeframe=30, monthly_means=True)
std_precipitation_12 = standarize_monthly_dataset(precipitation_ds_12, historical_timeframe=30, monthly_means=True)

In [None]:
#### Construct indicators
# FIXME: heatdays/coldays are annual
precipitation = std_precipitation["timeseries-pr-monthly-mean"].rename("standarized_precipitation")
precipitation_3 = std_precipitation_3["timeseries-pr-monthly-mean"].rename("standarized_precipitation_3")
precipitation_6 = std_precipitation_6["timeseries-pr-monthly-mean"].rename("standarized_precipitation_6")
precipitation_12 = std_precipitation_12["timeseries-pr-monthly-mean"].rename("standarized_precipitation_12")


climate_da = [precipitation, precipitation_3, precipitation_6, precipitation_12]
climate_data = xr.combine_by_coords(climate_da)

## Export data
climate_data.to_netcdf(rf"{DATA_OUT}/Climate_shocks_v2_previous_months.nc")

In [None]:
import pandas as pd
pd.read_csv(r"Z:\Laboral\World Bank\Paper - Child mortality and Climate Shocks\Data\Data_proc\births_climate_20000.csv")