In [3]:
from pathlib import Path
import os
from aatoolbox import create_country_config, create_custom_country_config, CodAB, GeoBoundingBox, IriForecastDominant, ChirpsMonthly
import pandas as pd
import geopandas as gpd
import netCDF4 as nc
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import math
import json
import warnings

# from config.chirps import *
import datetime

### add data directory and iri authentification
os.environ['AA_DATA_DIR'] = "/home/daniele/Documents/CHD/Python_projects/pa-aa-toolbox-folder/"
os.environ['IRI_AUTH'] = '957b9ba29d14f52928d863d854278df8c749aaaca7f746d4127033ac4dfd5e8d6c3531433b2498daa03de77e925e7c09c55d0ef2'
os.environ['FILENAME_ECMWF'] = './input_data_ecmwf/tcd_ecmwf_forecast_seasonal_precipitation_tercile_dominant_custom.nc'

## Main code

In [4]:
def _load_country_config(country_iso3):
    
    ####################################################
    #Country configuration file
    try:
        country_config = create_country_config(iso3=country_iso3)
        print('using built-in country configuration')
    except:
        try:
            filename = './config/countries/' + country_iso3 + '.yaml'
            country_config = create_custom_country_config(filename)
            print('using custom country configuration')
        except:
            print('missing country configuration')
            return
        
    return(country_config)


In [5]:
def _load_codab_data(country_config,country_iso3):
    
    ####################################################
    #Download / load boundaries data
    codab = CodAB(country_config=country_config)    
    if country_iso3 != 'tcd': #codab data for Tchad had to be manually download as it has different zip for different admin levels
        codab.download()     

    try:
        admin0_input_df = codab.load(admin_level=0)
    except:
        admin0_input_df = pd.DataFrame()
        print('no admin0 data')

    try:
        admin1_input_df = codab.load(admin_level=1)
    except:
        admin1_input_df = pd.DataFrame()
        print('no admin1 data')

    try:
        admin2_input_df = codab.load(admin_level=2)
    except:
        admin2_input_df = pd.DataFrame()
        print('no admin2 data')


    try:    
        geo_bounding_box = GeoBoundingBox.from_shape(admin0_input_df)
    except:
        geo_bounding_box = GeoBoundingBox.from_shape(admin1_input_df)

  
    return(admin0_input_df, admin1_input_df, admin2_input_df, geo_bounding_box)

In [6]:
def _load_chirps_data(country_config, geo_bounding_box):
    
    ####################################################
    #Download / load Chrips precipitation data
    chirps_monthly = ChirpsMonthly(country_config=country_config,geo_bounding_box=geo_bounding_box)  

    print('Downloading CHIRPS data...')
    chirps_monthly.download() 
    print('Download completed')
    
    print('Loading CHIRPS data...')
    chirps_monthly.process()
    chirps_monthly_data = chirps_monthly.load()
    chirps_input_df = chirps_monthly_data.to_dataframe()
    print('Data loaded')
    
    return(chirps_input_df)
    

In [7]:
def _load_iri_data(country_config, geo_bounding_box):
    
    ####################################################
    #Download / load IRI precipitation data
    iri_dominant = IriForecastDominant(country_config=country_config,geo_bounding_box=geo_bounding_box)

    print('Downloading IRI data...')
    iri_dominant.download()
    print('Download completed')
    
    print('Loading IRI data...')
    iri_dominant.process()
    iri_dominant_data = iri_dominant.load()
    iri_input_df = iri_dominant_data.to_dataframe()
    print('Data loaded')    
    
    return(iri_input_df)

In [8]:
def _load_ecmwf_data(country_iso3):
    
    ####################################################
    #Load ECMWF precipitation data (using file shared by Daniele for now)
    
    print('Loading ECMWF data...')
    if country_iso3 == 'tcd':
        filename = os.getenv('FILENAME_ECMWF')
        input_data = xr.open_dataset(filename)
        ecmwf_input_df = input_data.to_dataframe()
        print('Data loaded')
    else:
        ecmwf_input_df = pd.DataFrame()
        print('No ECMWF data for this country')
        
    return(ecmwf_input_df)



In [9]:
def _load_data(country_iso3 = 'tcd'):


    country_config = _load_country_config(country_iso3)
    admin0_input_df, admin1_input_df, admin2_input_df, geo_bounding_box = _load_codab_data(country_config,country_iso3)
    chirps_input_df = _load_chirps_data(country_config, geo_bounding_box)
    iri_input_df = _load_iri_data(country_config, geo_bounding_box)
    # ecmwf_input_df = _load_ecmwf_data(country_iso3)

    ####################################################
    #Prepare input data dictionary
    data_dict = {}
    data_dict['chirps'] = chirps_input_df 
    data_dict['iri'] = iri_input_df
    # data_dict['ecmwf'] = ecmwf_input_df
    data_dict['adm0'] = admin0_input_df
    data_dict['adm1'] = admin1_input_df
    data_dict['adm2'] = admin2_input_df

    return(data_dict)

In [11]:
def _format_data(data_dict, admin_level):

    
    ####################################################
    #Only a few adm1 regions are considered in the reference analysis. We adapt the geometry here so that the
    # "country adm0 level" is instead the union of these 5 regions

    new_df = data_dict['adm1']
    new_df = new_df[new_df['admin1Name'].isin(['Lac', 'Kanem', 'Barh-El-Gazel', 'Batha', 'Wadi Fira'])].reset_index(drop = True)

    new_geom = new_df['geometry'].iloc[0]
    for i in range(1,new_df.shape[0]):
        new_geom = new_geom.union(new_df['geometry'].iloc[i])

    data_dict['adm0']['geometry'] = new_geom
    
    ####################################################
    #Admin data can have different column names. The conditions below seems to work for most countries. 
    #The value of adm_pcode_key will be used later when performing a spatial join with precipitation data
    admin_boundaries = data_dict[admin_level]
    if admin_level == 'adm0':
        if 'ADM0_PCODE' in list(admin_boundaries.columns):
            adm_pcode_key = 'ADM0_PCODE'
        elif 'admin0Pcod' in list(admin_boundaries.columns):
            adm_pcode_key = 'admin0Pcod'
        
    elif admin_level == 'adm1':
        if 'ADM1_PCODE' in list(admin_boundaries.columns):
            adm_pcode_key = 'ADM1_PCODE'
        elif 'admin1Pcod' in list(admin_boundaries.columns):
            adm_pcode_key = 'admin1Pcod'
        
    elif admin_level == 'adm2':
        if 'ADM2_PCODE' in list(admin_boundaries.columns):
            adm_pcode_key = 'ADM2_PCODE'
        elif 'admin2Pcod' in list(admin_boundaries.columns):
            adm_pcode_key = 'admin2Pcod'
    #to do => add exceptions
    
    data_dict['adm_pcode_key'] = adm_pcode_key
    


    ####################################################
    #format precipitation NetCDF data and perform a spatial join with country boundaries
    col_name_dict = {}
    col_name_dict['chirps'] = {'x' : 'X', 'y' : 'Y', 't' : 'T', 'value' : 'precipitation'}
    col_name_dict['iri'] = {'x' : 'X', 'y' : 'Y', 't' : 'F', 'value' : 'dominant'}
    # col_name_dict['ecmwf'] = {'x' : 'longitude', 'y' : 'latitude', 't' : 'time', 'value' : 'tprate'}
    
    for data_source in ['chirps', 'iri']:#, 'ecmwf']:

        x_col = col_name_dict[data_source]['x']
        y_col = col_name_dict[data_source]['y']
        t_col = col_name_dict[data_source]['t']
        value_col = col_name_dict[data_source]['value']

        df = data_dict[data_source].reset_index()
        df.rename(columns = {x_col : 'longitude', y_col : 'latitude', value_col : 'value'}, inplace = True)
        df['date_month'] = df[t_col].apply(lambda x : x.month - 1)
        df['date_year'] = df[t_col].apply(lambda x : x.year)
        
        if data_source == 'ecmwf':
            df['value'] = df['value']*60*60*24*30*1000 #ecmwf data is on m/s instead of mm/month
          
        ##df = df.loc[(df['date_year'] >= start_year) & (df['date_year'] <=  end_year)] => to be implemented

        grid_points_df = df.groupby(['longitude','latitude'])['value'].mean().reset_index() # => artificial groupby just to retrive a grid with points
        grid_points_gdf = gpd.GeoDataFrame(grid_points_df, geometry=gpd.points_from_xy(grid_points_df.longitude, grid_points_df.latitude)).set_crs('epsg:4326')        
        grid_points_admin_df = grid_points_gdf.sjoin(admin_boundaries[[adm_pcode_key,'geometry']], how="inner")

        df = pd.merge(df, grid_points_admin_df[['longitude','latitude',adm_pcode_key]], left_on=['longitude','latitude'], right_on = ['longitude','latitude'])
 
        data_dict[data_source] = df  
    
    return(data_dict)

In [12]:
def _compute_climatology(data_dict, admin_level, start_year, end_year, drought_threshold_value):


    ####################################################
    #Prepare season definition
    month_calendar = 'JFMAMJJASOND'
    season_name = []
    season_months = []

    for i in range(0,12):
        season_name.append(month_calendar[i%12] + month_calendar[(i+1)%12] + month_calendar[(i+2)%12])
        season_months.append([i%12, (i+1)%12, (i+2)%12])


    ####################################################
    #Compute results per admin region
    admin_df = data_dict[admin_level].copy()
    adm_pcode_key = data_dict['adm_pcode_key']
    chirps_df = data_dict['chirps']

    for adm_code in list(admin_df[adm_pcode_key].unique()):   

        ####################################################
        #Aggregate data on admin level
        region_df = chirps_df[chirps_df[adm_pcode_key] == adm_code].groupby(['date_year','date_month'])['value'].mean().reset_index()


        ####################################################
        #Compute climatology and drought years per season
        drought_dict = {}
        rainy_season_precipitation = 0

        for season_index in range(0,12):

            season = season_name[season_index]

            precipitation_df = region_df[region_df['date_month'].isin(season_months[season_index])].groupby(['date_year'])['value'].mean().reset_index()
            
            precipitation_climatology_df = precipitation_df.loc[(precipitation_df['date_year'] >= start_year) & (precipitation_df['date_year'] <=  end_year)].copy()     
            average_precipitation = precipitation_climatology_df['value'].mean()
            drought_threshold = precipitation_climatology_df['value'].quantile(drought_threshold_value)

            drought_years_list_string = ', '.join(str(x) for x in list(precipitation_df.loc[precipitation_df['value'] < drought_threshold, 'date_year'].values))  

            drought_dict[season] = {}
            drought_dict[season]['average_precipitation'] = str(average_precipitation)
            drought_dict[season]['drought_threshold'] = str(drought_threshold)
            drought_dict[season]['drought_years'] = drought_years_list_string

            if average_precipitation > rainy_season_precipitation:
                rainy_season_precipitation = average_precipitation
                rainy_season = season



        ####################################################
        #Write result DataFrame
        admin_df.loc[admin_df[adm_pcode_key] == adm_code,'rainy_season'] = rainy_season
        admin_df.loc[admin_df[adm_pcode_key] == adm_code,'rainy_season_average_precipitation'] = float(drought_dict[rainy_season]['average_precipitation'])
        admin_df.loc[admin_df[adm_pcode_key] == adm_code,'rainy_season_drought_threshold'] = float(drought_dict[rainy_season]['drought_threshold'])
        admin_df.loc[admin_df[adm_pcode_key] == adm_code,'rainy_season_drought_years'] = drought_dict[rainy_season]['drought_years']
        admin_df.loc[admin_df[adm_pcode_key] == adm_code,'all_seasons_dict'] = json.dumps(drought_dict)


    
    return(admin_df)


In [13]:
####################################################
country_iso3 = 'tcd'

admin_level = 'adm1' ## adm0 / adm1 / adm2
start_year = 1982
end_year = 2020
drought_threshold_value = 1/3.

In [14]:
warnings.filterwarnings('ignore')

In [None]:
input_data_dict = _load_data(country_iso3)

In [16]:
####################################################
#format admin and precipitation (CHIRPS / IRI / ECMWF) data
data_dict = _format_data(input_data_dict, admin_level)

In [17]:
####################################################

#Climatology and drought years computed for CHIRPS data. IRI and ECMWF not enough implemented.
result_df = _compute_climatology(data_dict, admin_level, start_year, end_year, drought_threshold_value)

####################################################
output_file = './output_data/' + country_iso3 + '_' + admin_level + '_' + 'drought' + '_' + str(start_year) + '_' + str(end_year) + '.geojson'
print(output_file)
result_df.to_file(output_file, driver="GeoJSON")  

./output_data/tcd_adm1_drought_1982_2020.geojson


In [18]:
result_df

Unnamed: 0,admin1Name,admin1Pcod,admin1RefN,admin1Na_1,admin1AltN,admin1Al_1,admin0Name,admin0Pcod,date,validOn,validTo,Shape_Leng,Shape_Area,geometry,rainy_season,rainy_season_average_precipitation,rainy_season_drought_threshold,rainy_season_drought_years,all_seasons_dict
0,Batha,TD01,Batha,BATHA,,,Tchad,TD,2016-03-15,2016-05-09,,12.859463,7.621043,"POLYGON ((19.34077 16.10815, 19.39467 16.07575...",JAS,78.63174,67.613752,"1982, 1983, 1984, 1987, 1990, 1993, 1996, 1997...","{""JFM"": {""average_precipitation"": ""0.11957657""..."
1,Borkou,TD02,Borkou,BORKOU,,,Tchad,TD,2016-03-15,2016-05-09,,15.334343,12.740493,"POLYGON ((18.89773 18.66806, 19.18366 18.64927...",JJA,11.840555,9.925083,"1982, 1983, 1984, 1985, 1987, 1990, 1993, 1995...","{""JFM"": {""average_precipitation"": ""0.011299917..."
2,Chari-Baguirmi,TD03,Chari-Baguirmi,CHARI-BAGUIRMI,,,Tchad,TD,2016-03-15,2016-05-09,,10.28266,3.880907,"POLYGON ((15.40400 12.32200, 15.44253 12.28302...",JAS,172.31296,157.221303,"1982, 1983, 1984, 1985, 1987, 1990, 1993, 1997...","{""JFM"": {""average_precipitation"": ""0.7361085"",..."
3,Guéra,TD04,Guera,GUERA,,,Tchad,TD,2016-03-15,2016-05-09,,12.081014,5.057404,"POLYGON ((19.77772 13.03076, 19.88910 12.99307...",JAS,171.0323,155.914017,"1982, 1983, 1984, 1985, 1987, 1990, 1993, 1997...","{""JFM"": {""average_precipitation"": ""0.9314673"",..."
4,Hadjer-Lamis,TD05,Hadjer-Lamis,HADJER LAMIS,,,Tchad,TD,2016-03-15,2016-05-09,,9.789328,2.39812,"POLYGON ((15.75500 13.16300, 15.86200 13.15400...",JAS,128.5919,115.938835,"1982, 1983, 1984, 1985, 1987, 1989, 1990, 1993...","{""JFM"": {""average_precipitation"": ""0.26217386""..."
5,Kanem,TD06,Kanem,KANEM,,,Tchad,TD,2016-03-15,2016-05-09,,11.907133,6.12097,"POLYGON ((17.17024 16.03506, 16.32311 15.00361...",JAS,31.117594,27.435307,"1981, 1982, 1983, 1984, 1985, 1987, 1989, 1990...","{""JFM"": {""average_precipitation"": ""0.015378298..."
6,Lac,TD07,Lac,LAC,,,Tchad,TD,2016-03-15,2016-05-09,,6.471361,1.8103,"POLYGON ((13.72910 14.51277, 13.74198 14.49744...",JAS,77.40976,67.410876,"1982, 1983, 1984, 1985, 1987, 1989, 1990, 1993...","{""JFM"": {""average_precipitation"": ""0.03702551""..."
7,Logone Occidental,TD08,Logone Occidental,LOGONE OCCIDENTAL,,,Tchad,TD,2016-03-15,2016-05-09,,3.928428,0.727334,"POLYGON ((16.38100 9.21000, 16.41100 9.18100, ...",JAS,238.53227,226.263229,"1983, 1984, 1985, 1987, 1989, 1990, 1991, 1993...","{""JFM"": {""average_precipitation"": ""1.4928312"",..."
8,Logone Oriental,TD09,Logone Oriental,LOGONE ORIENTAL,,,Tchad,TD,2016-03-15,2016-05-09,,8.194939,1.944317,"POLYGON ((16.57123 9.15131, 16.59600 9.14900, ...",JAS,252.54913,236.802109,"1984, 1985, 1986, 1987, 1989, 1990, 1991, 1993...","{""JFM"": {""average_precipitation"": ""2.5777519"",..."
9,Mandoul,TD10,Mandoul,MANDOUL,,,Tchad,TD,2016-03-15,2016-05-09,,5.997929,1.433549,"POLYGON ((17.50700 9.61600, 17.55400 9.59700, ...",JAS,236.65425,217.401983,"1984, 1987, 1989, 1990, 1991, 1993, 1995, 1997...","{""JFM"": {""average_precipitation"": ""2.9877775"",..."


## Sandbox

In [29]:
##Export grid points for visualisation
grid_points_df = data_dict['chirps'].groupby(['longitude','latitude'])['value'].mean().reset_index() # => artificial groupby just to retrive a grid with points
grid_points_df.to_csv('./chirps_grid_tchad.csv') 

grid_points_df = data_dict['iri'].groupby(['longitude','latitude'])['value'].mean().reset_index() # => artificial groupby just to retrive a grid with points
grid_points_df.to_csv('./iri_grid_tchad.csv') 

# grid_points_df = data_dict['ecmwf'].groupby(['longitude','latitude'])['value'].mean().reset_index() # => artificial groupby just to retrive a grid with points
# grid_points_df.to_csv('./ecmwf_grid_tchad.csv') 


In [30]:
result_df

Unnamed: 0,OBJECTID,admin0Name,admin0Pcod,Shape_Leng,Shape_Area,geometry,rainy_season,rainy_season_average_precipitation,rainy_season_drought_threshold,rainy_season_drought_years,all_seasons_dict
0,1,Tchad,TD,55.887665,106.6902,"POLYGON ((19.69945 15.82167, 19.91543 15.77023...",JAS,59.3929,53.308697,"1982, 1983, 1984, 1985, 1987, 1989, 1990, 1993...","{""JFM"": {""average_precipitation"": ""0.0572793"",..."


## Climatology ECMWF
- Based on interval 1993 - 2016
- Will calculate all terciles (not only dominant)

## IRI non-dominant terciles
- Could retrieve them from the website but will have to do it manually (not using the toolbox)
- Why would they be important?

## IRI drought years
- Different options:
    - more than 50% territory shows lower tercile, irrespective or probability
    - 

## Aggregating data
- How to take into account partial cells