In [29]:
import os

repo_dir = os.environ.get("REPO_DIR")
code_dir = os.path.join(repo_dir, "code/")
data_dir = os.path.join(repo_dir, "data/")

os.chdir(code_dir)

import matplotlib.pyplot as plt
import numpy as np
import scipy.linalg
import pickle
import sklearn 
import sys
import pandas as pd
from importlib import reload

from sklearn.model_selection import KFold
from sklearn.linear_model import Ridge
import seaborn as sns

from scipy.stats import spearmanr, mode

import geopandas as gpd
import rasterio
import zarr

import gc

import warnings

from mosaiks.utils.imports import *

from mosaiks.utils.io import weighted_groupby
from affine import Affine

import cartopy.crs as ccrs
# Key prediction functions are here
from analysis.prediction_utils import (flatten_raster,rasterize_df)

# Predicting grid level HDI

In this notebook, we recenter HDI at the grid level for a time series from 2012 to 2021. We use an intermediate out from the notebook `hdi_preds_at_grid_time_series.ipynb`.

In [30]:
task = "Sub-national HDI"

In [31]:
data = pd.read_pickle(data_dir + "preds/hdi_grid_predictions_flat_file.p")\
.drop(columns = [task,"adj_factor","centered_pred","clipped"])

In [32]:
data = data.set_index("GDLCODE")

In [33]:
data

Unnamed: 0_level_0,lon,lat,country,raw_pred_hdi,pop_count,weighted_avg_raw
GDLCODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
GHAr108,-0.05,10.05,GHA,-0.214817,3984.767526,-0.048032
GHAr108,-0.05,10.15,GHA,-0.204464,3643.961173,-0.048032
GHAr108,-0.05,10.25,GHA,-0.201427,3858.296357,-0.048032
GHAr108,-0.05,10.35,GHA,-0.151808,10260.806671,-0.048032
GHAr108,-0.05,10.45,GHA,-0.184451,5938.877470,-0.048032
...,...,...,...,...,...,...
MMRr106,99.45,11.55,MMR,-0.069701,176.996441,0.022112
MMRr106,99.45,11.85,MMR,-0.065474,1149.625633,0.022112
MMRr106,99.55,11.65,MMR,-0.076085,12.338651,0.022112
MMRr106,99.55,11.75,MMR,-0.115727,122.476133,0.022112


## Merge with time series of GDL data

In [34]:
t_df = pd.read_csv(data_dir + "raw/GDL_HDI/SHDI-SGDI-Total 7.0.csv",low_memory=False)
t_df = t_df.rename(columns = {"shdi":task})

## Re-center preds on the known ADM1 Value

In [35]:
year_range = np.arange(2012,2022)

for year in year_range:
    print(year)
    
    t_df_year = t_df[t_df["year"] == year]
    t_df_year = t_df_year.set_index("GDLCODE")
    
    data_year = data.merge(t_df_year[[task]], "left", left_on="GDLCODE",right_index=True)
    
    data_year["Sub-national HDI"] = data_year["Sub-national HDI"].astype(float)
    data_year["adj_factor"] = data_year["Sub-national HDI"] - data_year["weighted_avg_raw"]
    data_year["centered_pred"] = data_year["raw_pred_hdi"] + data_year["adj_factor"]
    data_year["clipped"] = np.clip(data_year["centered_pred"],0,1)
    
    ## Rasterize
    data_year = data_year.reset_index()
    
#     pre_raster.to_pickle(data_dir + "preds/time_series/"
#            f"hdi_grid_predictions_flat_file_{year}.p")

    raster, extent = rasterize_df(data_year, 
                              data_colname = "clipped", 
                              grid_delta=.1, 
                              lon_col="lon", 
                              lat_col="lat",
                             custom_extent = (-180,180,-56,74)
                             )
    
    ####  Write grid data product as a raster
    
    meta = {'driver': 'GTiff',
 'dtype': 'float64',
 'nodata': np.nan,
 'width': 3600,
 'height': 1300,
 'count': 1,
'crs': "EPSG:4326",
'transform': Affine(0.1, 0.0, extent[0],
        0.0, -0.1, extent[3])
       }

    raster_outpath = (data_dir + "preds/time_series/grid/"
               f"hdi_raster_predictions_{year}.tif")

    with rasterio.open(raster_outpath , "w", **meta) as dest:
         dest.write(np.array([raster]))


2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
