In [1]:
from pathlib import Path
import os
from aatoolbox import create_country_config, create_custom_country_config, CodAB, GeoBoundingBox, IriForecastDominant
import pandas as pd
import geopandas as gpd
import netCDF4 as nc
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import math
import json

from config.chirps import *
import datetime

### add data directory and iri authentification
#os.environ['AA_DATA_DIR'] = 
#os.environ['IRI_AUTH'] = 


## Main code

In [2]:
def _load_country_config(country_iso3):
    
    ####################################################
    #Country configuration file
    try:
        country_config = create_country_config(iso3=country_iso3)
        print('using built-in country configuration')
    except:
        try:
            filename = './config/countries/' + country_iso3 + '.yaml'
            country_config = create_custom_country_config(filename)
            print('using custom country configuration')
        except:
            print('missing country configuration')
            return
        
    return(country_config)


In [3]:
def _load_codab_data(country_config):
    
    ####################################################
    #Download / load boundaries data
    codab = CodAB(country_config=country_config)    
    codab.download()     

    try:
        admin0_input_df = codab.load(admin_level=0)
    except:
        admin0_input_df = pd.DataFrame()
        print('no admin0 data')

    try:
        admin1_input_df = codab.load(admin_level=1)
    except:
        admin1_input_df = pd.DataFrame()
        print('no admin1 data')

    try:
        admin2_input_df = codab.load(admin_level=2)
    except:
        admin2_input_df = pd.DataFrame()
        print('no admin2 data')


    try:    
        geo_bounding_box = GeoBoundingBox.from_shape(admin0_input_df)
    except:
        geo_bounding_box = GeoBoundingBox.from_shape(admin1_input_df)

  
    return(admin0_input_df, admin1_input_df, admin2_input_df, geo_bounding_box)

In [4]:
def _load_chirps_data(country_config, geo_bounding_box):
    
    ####################################################
    #Download / load Chrips precipitation data
    chirps_monthly = ChirpsMonthly(country_config=country_config,geo_bounding_box=geo_bounding_box)  

    print('Downloading CHIRPS data...')
    chirps_monthly.download() 
    print('Download completed')
    
    print('Loading CHIRPS data...')
    chirps_monthly.process()
    chirps_monthly_data = chirps_monthly.load()
    chirps_input_df = chirps_monthly_data.to_dataframe()
    print('Data loaded')
    
    return(chirps_input_df)
    

In [5]:
def _load_iri_data(country_config, geo_bounding_box):
    
    ####################################################
    #Download / load IRI precipitation data
    iri_dominant = IriForecastDominant(country_config=country_config,geo_bounding_box=geo_bounding_box)

    print('Downloading IRI data...')
    iri_dominant.download()
    print('Download completed')
    
    print('Loading IRI data...')
    iri_dominant.process()
    iri_dominant_data = iri_dominant.load()
    iri_input_df = iri_dominant_data.to_dataframe()
    print('Data loaded')    
    
    return(iri_input_df)

In [6]:
def _load_ecmwf_data(country_iso3):
    
    ####################################################
    #Load ECMWF precipitation data (using file shared by Daniele for now)
    
    print('Loading ECMWF data...')
    if country_iso3 == 'tcd':
        filename = './data/private/processed/tcd/ecmwf/tcd_ecmwf_forecast_seasonal_precipitation_tercile_dominant_custom.nc'
        input_data = xr.open_dataset(filename)
        ecmwf_input_df = input_data.to_dataframe()
        print('Data loaded')
    else:
        ecmwf_input_df = pd.DataFrame()
        print('No ECMWF data for this country')
        
    return(ecmwf_input_df)



In [7]:
def _load_data(country_iso3 = 'tcd'):


    country_config = _load_country_config(country_iso3)
    admin0_input_df, admin1_input_df, admin2_input_df, geo_bounding_box = _load_codab_data(country_config)
    chirps_input_df = _load_chirps_data(country_config, geo_bounding_box)
    iri_input_df = _load_iri_data(country_config, geo_bounding_box)
    ecmwf_input_df = _load_ecmwf_data(country_iso3)

    ####################################################
    #Prepare input data dictionary
    data_dict = {}
    data_dict['chirps'] = chirps_input_df 
    data_dict['iri'] = iri_input_df
    data_dict['ecmwf'] = ecmwf_input_df
    data_dict['adm0'] = admin0_input_df
    data_dict['adm1'] = admin1_input_df
    data_dict['adm2'] = admin2_input_df

    return(data_dict)

In [8]:
def _format_data(data_dict, admin_level):

    
    ####################################################
    #Only a few adm1 regions are considered in the reference analysis. We adapt the geometry here so that the
    # "country adm0 level" is instead the union of these 5 regions

    new_df = data_dict['adm1']
    new_df = new_df[new_df['admin1Name'].isin(['Lac', 'Kanem', 'Barh-El-Gazel', 'Batha', 'Wadi Fira'])].reset_index(drop = True)

    new_geom = new_df['geometry'].iloc[0]
    for i in range(1,new_df.shape[0]):
        new_geom = new_geom.union(new_df['geometry'].iloc[i])

    data_dict['adm0']['geometry'] = new_geom
    
    ####################################################
    #Admin data can have different column names. The conditions below seems to work for most countries. 
    #The value of adm_pcode_key will be used later when performing a spatial join with precipitation data
    admin_boundaries = data_dict[admin_level]
    if admin_level == 'adm0':
        if 'ADM0_PCODE' in list(admin_boundaries.columns):
            adm_pcode_key = 'ADM0_PCODE'
        elif 'admin0Pcod' in list(admin_boundaries.columns):
            adm_pcode_key = 'admin0Pcod'
        
    elif admin_level == 'adm1':
        if 'ADM1_PCODE' in list(admin_boundaries.columns):
            adm_pcode_key = 'ADM1_PCODE'
        elif 'admin1Pcod' in list(admin_boundaries.columns):
            adm_pcode_key = 'admin1Pcod'
        
    elif admin_level == 'adm2':
        if 'ADM2_PCODE' in list(admin_boundaries.columns):
            adm_pcode_key = 'ADM2_PCODE'
        elif 'admin2Pcod' in list(admin_boundaries.columns):
            adm_pcode_key = 'admin2Pcod'
    #to do => add exceptions
    
    data_dict['adm_pcode_key'] = adm_pcode_key
    


    ####################################################
    #format precipitation NetCDF data and perform a spatial join with country boundaries
    col_name_dict = {}
    col_name_dict['chirps'] = {'x' : 'X', 'y' : 'Y', 't' : 'T', 'value' : 'precipitation'}
    col_name_dict['iri'] = {'x' : 'X', 'y' : 'Y', 't' : 'F', 'value' : 'dominant'}
    col_name_dict['ecmwf'] = {'x' : 'longitude', 'y' : 'latitude', 't' : 'time', 'value' : 'tprate'}
    
    for data_source in ['chirps', 'iri', 'ecmwf']:

        x_col = col_name_dict[data_source]['x']
        y_col = col_name_dict[data_source]['y']
        t_col = col_name_dict[data_source]['t']
        value_col = col_name_dict[data_source]['value']

        df = data_dict[data_source].reset_index()
        df.rename(columns = {x_col : 'longitude', y_col : 'latitude', value_col : 'value'}, inplace = True)
        df['date_month'] = df[t_col].apply(lambda x : x.month - 1)
        df['date_year'] = df[t_col].apply(lambda x : x.year)
        
          
        ##df = df.loc[(df['date_year'] >= start_year) & (df['date_year'] <=  end_year)] => to be implemented

        grid_points_df = df.groupby(['longitude','latitude'])['value'].mean().reset_index() # => artificial groupby just to retrive a grid with points
        grid_points_gdf = gpd.GeoDataFrame(grid_points_df, geometry=gpd.points_from_xy(grid_points_df.longitude, grid_points_df.latitude)).set_crs('epsg:4326')        
        grid_points_admin_df = grid_points_gdf.sjoin(admin_boundaries[[adm_pcode_key,'geometry']], how="inner")

        df = pd.merge(df, grid_points_admin_df[['longitude','latitude',adm_pcode_key]], left_on=['longitude','latitude'], right_on = ['longitude','latitude'])
 
        data_dict[data_source] = df  
    
    return(data_dict)

In [9]:
def _compute_climatology(data_dict, admin_level, start_year, end_year, drought_threshold_value):


    ####################################################
    #Prepare season definition
    month_calendar = 'JFMAMJJASOND'
    season_name = []
    season_months = []

    for i in range(0,12):
        season_name.append(month_calendar[i%12] + month_calendar[(i+1)%12] + month_calendar[(i+2)%12])
        season_months.append([i%12, (i+1)%12, (i+2)%12])


    ####################################################
    #Compute results per admin region
    admin_df = data_dict[admin_level].copy()
    adm_pcode_key = data_dict['adm_pcode_key']
    chirps_df = data_dict['chirps']

    for adm_code in list(admin_df[adm_pcode_key].unique()):   

        ####################################################
        #Aggregate data on admin level
        region_df = chirps_df[chirps_df[adm_pcode_key] == adm_code].groupby(['date_year','date_month'])['value'].mean().reset_index()


        ####################################################
        #Compute climatology and drought years per season
        drought_dict = {}
        rainy_season_precipitation = 0

        for season_index in range(0,12):

            season = season_name[season_index]

            precipitation_df = region_df[region_df['date_month'].isin(season_months[season_index])].groupby(['date_year'])['value'].mean().reset_index()
            
            precipitation_climatology_df = precipitation_df.loc[(precipitation_df['date_year'] >= start_year) & (precipitation_df['date_year'] <=  end_year)].copy()     
            average_precipitation = precipitation_climatology_df['value'].mean()
            drought_threshold = precipitation_climatology_df['value'].quantile(drought_threshold_value)

            drought_years_list_string = ', '.join(str(x) for x in list(precipitation_df.loc[precipitation_df['value'] < drought_threshold, 'date_year'].values))  

            drought_dict[season] = {}
            drought_dict[season]['average_precipitation'] = str(average_precipitation)
            drought_dict[season]['drought_threshold'] = str(drought_threshold)
            drought_dict[season]['drought_years'] = drought_years_list_string

            if average_precipitation > rainy_season_precipitation:
                rainy_season_precipitation = average_precipitation
                rainy_season = season



        ####################################################
        #Write result DataFrame
        admin_df.loc[admin_df[adm_pcode_key] == adm_code,'rainy_season'] = rainy_season
        admin_df.loc[admin_df[adm_pcode_key] == adm_code,'rainy_season_average_precipitation'] = float(drought_dict[rainy_season]['average_precipitation'])
        admin_df.loc[admin_df[adm_pcode_key] == adm_code,'rainy_season_drought_threshold'] = float(drought_dict[rainy_season]['drought_threshold'])
        admin_df.loc[admin_df[adm_pcode_key] == adm_code,'rainy_season_drought_years'] = drought_dict[rainy_season]['drought_years']
        admin_df.loc[admin_df[adm_pcode_key] == adm_code,'all_seasons_dict'] = json.dumps(drought_dict)


    
    return(admin_df)


In [10]:

####################################################
country_iso3 = 'tcd'

admin_level = 'adm0' ## adm0 / adm1 / adm2
start_year = 1982
end_year = 2020
drought_threshold_value = 1/3.

input_data_dict = _load_data(country_iso3)


####################################################
#format admin and precipitation (CHIRPS / IRI / ECMWF) data
data_dict = _format_data(input_data_dict, admin_level)

####################################################

#Climatology and drought years computed for CHIRPS data. IRI and ECMWF not enough implemented.
result_df = _compute_climatology(data_dict, admin_level, start_year, end_year, drought_threshold_value)

####################################################
output_file = './output_data/' + country_iso3 + '_' + admin_level + '_' + 'drought' + '_' + str(start_year) + '_' + str(end_year) + '.geojson'
print(output_file)
result_df.to_file(output_file, driver="GeoJSON")  



using custom country configuration
Downloading CHIRPS data...
Download completed
Loading CHIRPS data...
Data loaded
Downloading IRI data...
Download completed
Loading IRI data...
Data loaded
Loading ECMWF data...
Data loaded
./output_data/tcd_adm0_drought_1982_2020.geojson


## Sandbox

In [None]:
##Export grid points for visualisation
grid_points_df = data_dict['chirps'].groupby(['longitude','latitude'])['value'].mean().reset_index() # => artificial groupby just to retrive a grid with points
grid_points_df.to_csv('./chirps_grid_tchad.csv') 

grid_points_df = data_dict['iri'].groupby(['longitude','latitude'])['value'].mean().reset_index() # => artificial groupby just to retrive a grid with points
grid_points_df.to_csv('./iri_grid_tchad.csv') 

grid_points_df = data_dict['ecmwf'].groupby(['longitude','latitude'])['value'].mean().reset_index() # => artificial groupby just to retrive a grid with points
grid_points_df.to_csv('./ecmwf_grid_tchad.csv') 


In [None]:
data_dict['ecmwf']