In [1]:
import os
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm  # for notebooks
pd.options.mode.chained_assignment = None  # default='warn'
tqdm.pandas()

# Set global variables
PROJECT = r"Z:\Laboral\World Bank\Paper - Child mortality and Climate Shocks"
OUTPUTS = rf"{PROJECT}\Outputs"
DATA = rf"{PROJECT}\Data"
DATA_IN = rf"{DATA}\Data_in"
DATA_PROC = rf"{DATA}\Data_proc"
DATA_OUT = rf"{DATA}\Data_out"

## Function to assign climate shocks from a date and a point

In [20]:
climate_data = xr.open_dataset(rf"{DATA_OUT}/Climate_shocks.nc")
dates = climate_data.time.values

In [None]:
def get_climate_shock(from_date, to_date, lat, lon):
    if pd.isna(from_date):
        return np.nan
    
    # Filter point    
    point_data = climate_data.sel(time=slice(from_date, to_date)).sel(lat=lat, lon=lon, method='nearest')
    
    # Get position of original data
    lat = point_data.lat.item()
    lon = point_data.lon.item()

    # Filter by time
    inutero_q1   = point_data.isel(time=slice(0,3))
    inutero_q2   = point_data.isel(time=slice(3,6))
    inutero_q3   = point_data.isel(time=slice(6,9))
    born_1m      = point_data.isel(time=slice(9,10))
    born_2to3m  = point_data.isel(time=slice(10,12))
    born_3to6m  = point_data.isel(time=slice(12,15))
    born_6to9m  = point_data.isel(time=slice(15,18))
    born_9to12m = point_data.isel(time=slice(18,21))

    # Compute min and max values for both variables
    inutero_q1_max   = inutero_q1["standarized_precipitation"].max().item()
    inutero_q2_max   = inutero_q2["standarized_precipitation"].max().item()
    inutero_q3_max   = inutero_q3["standarized_precipitation"].max().item()
    born_1m_max      = born_1m["standarized_precipitation"].max().item()
    born_2to3m_max  = born_2to3m["standarized_precipitation"].max().item()
    born_3to6m_max  = born_3to6m["standarized_precipitation"].max().item()
    born_6to9m_max  = born_6to9m["standarized_precipitation"].max().item()
    born_9to12m_max = born_9to12m["standarized_precipitation"].max().item()

    inutero_q1_min   = inutero_q1["standarized_precipitation"].min().item()
    inutero_q2_min   = inutero_q2["standarized_precipitation"].min().item()
    inutero_q3_min   = inutero_q3["standarized_precipitation"].min().item()
    born_1m_min      = born_1m["standarized_precipitation"].min().item()
    born_2to3m_min  = born_2to3m["standarized_precipitation"].min().item()
    born_3to6m_min  = born_3to6m["standarized_precipitation"].min().item()
    born_6to9m_min  = born_6to9m["standarized_precipitation"].min().item()
    born_9to12m_min = born_9to12m["standarized_precipitation"].min().item()

    inutero_q1_m_max   = inutero_q1["standarized_precipitation_m"].max().item()
    inutero_q2_m_max   = inutero_q2["standarized_precipitation_m"].max().item()
    inutero_q3_m_max   = inutero_q3["standarized_precipitation_m"].max().item()
    born_1m_m_max      = born_1m["standarized_precipitation_m"].max().item()
    born_2to3m_m_max  = born_2to3m["standarized_precipitation_m"].max().item()
    born_3to6m_m_max  = born_3to6m["standarized_precipitation_m"].max().item()
    born_6to9m_m_max  = born_6to9m["standarized_precipitation_m"].max().item()
    born_9to12m_m_max = born_9to12m["standarized_precipitation_m"].max().item()

    inutero_q1_m_min   = inutero_q1["standarized_precipitation_m"].min().item()
    inutero_q2_m_min   = inutero_q2["standarized_precipitation_m"].min().item()
    inutero_q3_m_min   = inutero_q3["standarized_precipitation_m"].min().item()
    born_1m_m_min      = born_1m["standarized_precipitation_m"].min().item()
    born_2to3m_m_min  = born_2to3m["standarized_precipitation_m"].min().item()
    born_3to6m_m_min  = born_3to6m["standarized_precipitation_m"].min().item()
    born_6to9m_m_min  = born_6to9m["standarized_precipitation_m"].min().item()
    born_9to12m_m_min = born_9to12m["standarized_precipitation_m"].min().item()

    out_vars = (lat, lon, inutero_q1_max, inutero_q2_max, inutero_q3_max, born_1m_max, born_2to3m_max, born_3to6m_max, born_6to9m_max, born_9to12m_max, inutero_q1_min, inutero_q2_min, inutero_q3_min, born_1m_min, born_2to3m_min, born_3to6m_min, born_6to9m_min, born_9to12m_min, inutero_q1_m_max, inutero_q2_m_max, inutero_q3_m_max, born_1m_m_max, born_2to3m_m_max, born_3to6m_m_max, born_6to9m_m_max, born_9to12m_m_max, inutero_q1_m_min, inutero_q2_m_min, inutero_q3_m_min, born_1m_m_min, born_2to3m_m_min, born_3to6m_m_min, born_6to9m_m_min, born_9to12m_m_min)
    return out_vars    


In [None]:
def get_climate_shock_old(from_date, to_date, lat, lon):
    if pd.isna(from_date):
        return np.nan
    
    # Filter point    
    point_data = climate_data.sel(time=slice(from_date, to_date)).sel(lat=lat, lon=lon, method='nearest')
    
    # Get max and min values for standarized precipitation
    max_prec = point_data["standarized_precipitation"].max().item()
    min_prec = point_data["standarized_precipitation"].min().item()
    max_prec_m = point_data["standarized_precipitation_m"].max().item()
    min_prec_m = point_data["standarized_precipitation_m"].min().item()
    
    # Get position of original data
    lat = point_data.lat.item()
    lon = point_data.lon.item()
    
    return lat, lon, max_prec, min_prec, max_prec_m, min_prec_m

In [None]:
%load_ext line_profiler
def get_climate_shock_prof():
    date = np.random.choice(dates[12:-12])
    from_date, to_date = date + pd.DateOffset(months=-9), date + pd.DateOffset(years=1)
    lat, lon = np.random.uniform(-90, 90), np.random.uniform(-180, 180)
    
    get_climate_shock(from_date, to_date, lat, lon)    
    return

In [None]:
%lprun -f get_climate_shock_prof get_climate_shock_prof()

In [None]:
%timeit get_climate_shock_prof()

## Open DHS data and add the shock data to the dataframe

In [64]:
df = pd.read_stata(rf"{DATA_IN}/DHS/DHSBirthsGlobalAnalysis_04172024.dta")
df['ID'] = df.index

### Create dates variables:
We considered a 

In [None]:
# Create datetime object from year and month
df["day"] = 1
df["month"] = df["chb_month"].astype(float)
df["year"] = df["chb_year"].astype(float)
df["birthdate"] = pd.to_datetime(df[["year", "month","day"]]).to_numpy()

# Maximum range of dates
df["from_date"] = df["birthdate"] + pd.DateOffset(months=-9) # From in utero (9 months before birth) 
df["to_date"] = df["birthdate"] + pd.DateOffset(years=1) # To the first year of life

# Filter children from_date greater than 1990 (we only have climate data from 1990)
df = df[df["from_date"] > "1990-01-01"]

# # Construct deathdate variable
# df["deathdate"] = df[df["child_agedeath"]<=12].progress_apply(lambda x: x["birthdate"] + pd.DateOffset(months=x["child_agedeath"]), axis=1)

# # Replace to_date with deathdate if the child died
# df["to_date"] = np.where((df["child_agedeath"]<=12) & (df["deathdate"]<df["to_date"]), df["deathdate"], df["to_date"])

# Filter children to_date smalle than 2021 (we only have climate data to 2020)
df = df[df["to_date"] < "2021-01-01"]

In [None]:
df[["day", "month", "year", "birthdate", "from_date", "to_date", "child_agedeath"]]

In [None]:
df

In [None]:
climate_cols = ["lat_climate", "lon_climate", "prec_inutero_q1_max", "prec_inutero_q2_max", "prec_inutero_q3_max", "prec_born_1m_max", "prec_born_2to3m_max", "prec_born_3to6m_max", "prec_born_6to9m_max", "prec_born_9to12m_max", "prec_inutero_q1_min", "prec_inutero_q2_min", "prec_inutero_q3_min", "prec_born_1m_min", "prec_born_2to3m_min", "prec_born_3to6m_min", "prec_born_6to9m_min", "prec_born_9to12m_min", "prec_inutero_q1_m_max", "prec_inutero_q2_m_max", "prec_inutero_q3_m_max", "prec_born_1m_m_max", "prec_born_2to3m_m_max", "prec_born_3to6m_m_max", "prec_born_6to9m_m_max", "prec_born_9to12m_m_max", "prec_inutero_q1_m_min", "prec_inutero_q2_m_min", "prec_inutero_q3_m_min", "prec_born_1m_m_min", "prec_born_2to3m_m_min", "prec_born_3to6m_m_min", "prec_born_6to9m_m_min", "prec_born_9to12m_m_min"]

for n in tqdm(range(0, df.ID.max(), 10_000)):
    if os.path.exists(rf"{DATA_PROC}/births_climate_{n}.csv"):
        print(f"births_climate_{n}.csv exists, moving to next iteration")
        continue
    chunk = df.loc[(df.ID >= n) & (df.ID < n+10_000), ['ID', 'from_date', 'to_date', 'LATNUM', 'LONGNUM']].copy()
    if chunk.shape[0]==0:
        continue
    climate_results = chunk[['from_date', 'to_date', 'LATNUM', 'LONGNUM']].apply(lambda s: get_climate_shock(s['from_date'], s['to_date'], s['LATNUM'], s['LONGNUM']), axis=1)
    climate_results = climate_results.apply(pd.Series)
    climate_results.columns = climate_cols
    climate_results["ID"] = chunk["ID"]
    climate_results.to_csv(rf"{DATA_PROC}/births_climate_{n}.csv")
    
# df[climate_cols] = climate_data

In [None]:
# # For debugging
# chunk[['from_date', 'to_date', 'LATNUM', 'LONGNUM']].progress_apply(lambda s: get_climate_shock(s['from_date'], s['to_date'], s['LATNUM'], s['LONGNUM']), axis=1)

In [None]:
files = os.listdir(rf"{DATA_PROC}") 
files = [f for f in files if f.startswith("births_climate_")]
data = []
for file in tqdm(files):
    df = pd.read_csv(rf"{DATA_PROC}/{file}")
    data += [df]
df = pd.concat(data)

In [None]:
df = df.drop(columns="Unnamed: 0")
df.to_stata(rf"{DATA_PROC}\ClimateShocks_assigned.dta")

In [None]:
import matplotlib.pyplot as plt
pos = np.random.randint(0, 500000)
pos = 428380      
filtered2 = climate_data.standarized_precipitation.sel(lat=df.at[pos, "LATNUM"], lon=df.at[pos, "LONGNUM"], method="nearest")
filtered2.plot(figsize=(12, 2))

plt.axhline(2, color="red")
plt.axhline(-2, color="red")



In [226]:
data

Unnamed: 0,v000,v001,v002,v003,v007,...,mother_ageb_squ,mother_ageb_cub,mother_eduy_squ,mother_eduy_cub,birth_order
0,AL5,1,1,3,2008,...,560.111145,13255.964844,256.0,4096.0,1.0
1,AL5,1,1,3,2008,...,774.694397,21562.326172,256.0,4096.0,2.0
2,AL5,1,10,4,2008,...,423.673645,8720.616211,64.0,512.0,1.0
3,AL5,1,10,4,2008,...,604.340332,14856.699219,64.0,512.0,2.0
4,AL5,1,12,3,2008,...,654.506897,16744.466797,144.0,1728.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...
4780579,ZW7,99,8,3,2015,...,348.444427,6504.295410,100.0,1000.0,2.0
4780580,ZW7,99,9,1,2015,...,280.562500,4699.421875,64.0,512.0,1.0
4780581,ZW7,99,9,1,2015,...,280.562500,4699.421875,64.0,512.0,2.0
4780582,ZW7,99,9,1,2015,...,413.444458,8406.704102,64.0,512.0,3.0
