### Trigger mechanism for Somalia **

IPC trigger design as endorsed early 2020 (no forecast period- ML1 or ML2 -was specified):   

- The projected national population in Phase 3 and above exceed 20%, AND 
- The national population in Phase 3 is projected to increase by 5 percentage points, OR 
- The projected national population in Phase 4 or above is 2.5%

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import geopandas as gpd
import seaborn as sns
import numpy as np
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.float_format', lambda x: '%.2f' % x)

In [2]:
country="somalia"

In [3]:
# load world-pop-weighted fewsnet data
# note CS_99 denotes missing values
df_fadm=pd.read_csv(f"../Data/FewsNetWorldPop/som_admin1_fewsnet_worldpop.csv",index_col=0)
adm1c="ADMIN1" 
admc="ADMIN1" #"admin1Name"

In [4]:
# extract year and month from date
df_fadm["date"] = pd.to_datetime(df_fadm["date"])
df_fadm["year"] = df_fadm["date"].dt.year
df_fadm["month"] = df_fadm["date"].dt.month

In [5]:
df_fadm

Unnamed: 0,date,ADMIN1,CS_2,CS_3,CS_4,CS_99,ML1_1,ML1_2,ML1_3,ML1_4,...,ML1_5,ML2_5,pop_CS,pop_Total_CS,pop_ML1,pop_Total_ML1,pop_ML2,pop_Total_ML2,year,month
0,2009-07-01,Awdal,20.72,0.00,0.00,463644.93,0.00,20.72,0.00,0.00,...,0.00,0.00,20.72,463686.38,20.72,463686.38,20.72,463686.38,2009,7
1,2009-07-01,Bakool,0.00,0.00,0.00,483539.54,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,483539.54,0.00,483539.54,0.00,483539.54,2009,7
2,2009-07-01,Banadir,0.00,0.00,0.00,1388087.50,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,1388087.50,0.00,1388087.50,0.00,1388087.50,2009,7
3,2009-07-01,Bari,0.00,0.00,0.00,595480.55,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,595480.55,0.00,595480.55,0.00,595480.55,2009,7
4,2009-07-01,Bay,0.00,0.00,0.00,968685.09,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,968685.09,0.00,968685.09,0.00,968685.09,2009,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
733,2020-10-01,Nugaal,119627.93,144516.70,0.00,,0.00,119627.93,144516.70,0.00,...,0.00,0.00,264144.63,528289.26,264144.63,528289.26,264144.63,528289.26,2020,10
734,2020-10-01,Sanaag,21384.41,548290.40,0.00,,0.00,21384.41,548290.40,0.00,...,0.00,0.00,569674.81,1139349.61,569674.81,1139349.61,569674.81,1139349.61,2020,10
735,2020-10-01,Sool,55848.40,254617.74,0.00,,0.00,55848.40,254617.74,0.00,...,0.00,0.00,310466.14,620932.27,310466.14,620932.27,310466.14,620932.27,2020,10
736,2020-10-01,Togdheer,676748.79,175309.57,0.00,,0.00,676748.79,175309.57,0.00,...,0.00,0.00,852058.36,1704116.72,852058.36,1704116.72,852058.36,1704116.72,2020,10


In [6]:
# list column names
df_fadm.columns    

Index(['date', 'ADMIN1', 'CS_2', 'CS_3', 'CS_4', 'CS_99', 'ML1_1', 'ML1_2',
       'ML1_3', 'ML1_4', 'ML1_99', 'ML2_1', 'ML2_2', 'ML2_3', 'ML2_4',
       'ML2_99', 'CS_1', 'CS_5', 'ML1_5', 'ML2_5', 'pop_CS', 'pop_Total_CS',
       'pop_ML1', 'pop_Total_ML1', 'pop_ML2', 'pop_Total_ML2', 'year',
       'month'],
      dtype='object')

### National trigger


In [7]:
# compute national totals
df_ntl = df_fadm.drop(['year', 'month'], axis=1).groupby(by='date', axis=0, as_index = False).sum()
df_ntl.head(10)

Unnamed: 0,date,CS_2,CS_3,CS_4,CS_99,ML1_1,ML1_2,ML1_3,ML1_4,ML1_99,...,CS_1,CS_5,ML1_5,ML2_5,pop_CS,pop_Total_CS,pop_ML1,pop_Total_ML1,pop_ML2,pop_Total_ML2
0,2009-07-01,20.72,0.0,0.0,11660248.69,0.0,20.72,0.0,0.0,11660248.69,...,0.0,0.0,0.0,0.0,20.72,11660290.14,20.72,11660290.14,20.72,11660290.14
1,2009-10-01,20.72,6801018.54,4859229.98,0.0,0.0,0.0,6801039.26,4859229.98,0.0,...,0.0,0.0,0.0,0.0,11660269.24,23320538.49,11660269.24,23320538.48,11660269.24,23320538.48
2,2010-01-01,3070512.74,4317091.06,4599493.71,0.0,0.0,3070512.74,4317073.5,4599511.25,0.0,...,0.0,0.0,0.0,0.0,11987097.5,23974195.01,11987097.49,23974194.97,11987097.53,23974195.07
3,2010-04-01,3003964.65,4292968.11,4690164.69,0.0,0.0,3003964.65,4292968.11,4690164.69,0.0,...,0.0,0.0,0.0,0.0,11987097.46,23974194.92,11987097.46,23974194.92,11987097.38,23974194.77
4,2010-07-01,6806350.66,2885028.04,2295718.78,0.0,0.0,6806350.66,2885028.04,2295718.78,0.0,...,0.0,0.0,0.0,0.0,11987097.48,23974194.96,11987097.48,23974194.96,11987097.45,23974194.9
5,2010-10-01,6347000.7,1666470.89,3973625.85,0.0,0.0,5368684.06,2920178.37,3698235.06,0.0,...,0.0,0.0,0.0,0.0,11987097.43,23974194.86,11987097.49,23974194.99,11987097.47,23974194.94
6,2011-01-01,6533249.08,1698990.03,4085611.27,0.0,0.0,4898343.83,3472567.86,3946938.71,0.0,...,0.0,0.0,0.0,0.0,12317850.38,24635700.76,12317850.4,24635700.8,12317850.41,24635700.83
7,2011-04-01,6470811.0,1761428.12,4085611.27,0.0,0.0,2654113.97,5571023.79,4092712.84,0.0,...,0.0,0.0,0.0,0.0,12317850.39,24635700.78,12317850.6,24635701.2,12317850.58,24635701.16
8,2011-07-01,1923555.7,1644052.5,6860911.08,0.0,0.0,1923555.7,1644052.5,1383822.37,0.0,...,0.0,1889331.13,7366420.05,7366420.05,12317850.42,24635700.84,12317850.62,24635701.24,12317850.62,24635701.24
9,2011-10-01,1869213.24,3094900.73,4419001.11,0.0,0.0,1861516.9,3131671.88,4451504.71,0.0,...,0.0,2934735.55,2873157.06,0.0,12317850.63,24635701.26,12317850.55,24635701.09,12317850.54,24635701.09


In [8]:
def add_percentages(df):
    # calculate percentage of population per period and phase
    for period in ["CS", "ML1", "ML2"]:
        # IPC phases goes up to 5, so define range up to 6
        for i in range(1, 6):
            c = f"{period}_{i}"
            df[f"perc_{c}"] = df[c] / df[f"pop_{period}"] * 100
        # get pop and perc in IPC3+ and IPC2-
        # 3p = IPC phase 3 or higher, 4p = IPC phase 4 or higher
        df[f"{period}_3p"] = df[[f"{period}_{i}" for i in range(3, 6)]].sum(axis=1)
        df[f"perc_{period}_3p"] = df[f"{period}_3p"] / df[f"pop_{period}"] * 100
        df[f"{period}_4p"] = df[[f"{period}_{i}" for i in range(4, 6)]].sum(axis=1)
        df[f"perc_{period}_4p"] = df[f"{period}_4p"] / df[f"pop_{period}"] * 100
    df["perc_inc_ML2_3p"] = df["perc_ML2_3p"] - df["perc_CS_3p"]
    df["perc_inc_ML1_3p"] = df["perc_ML1_3p"] - df["perc_CS_3p"]
    return df

In [9]:
df_ntl=add_percentages(df_ntl)
df_ntl.head()

Unnamed: 0,date,CS_2,CS_3,CS_4,CS_99,ML1_1,ML1_2,ML1_3,ML1_4,ML1_99,...,perc_ML2_2,perc_ML2_3,perc_ML2_4,perc_ML2_5,ML2_3p,perc_ML2_3p,ML2_4p,perc_ML2_4p,perc_inc_ML2_3p,perc_inc_ML1_3p
0,2009-07-01,20.72,0.0,0.0,11660248.69,0.0,20.72,0.0,0.0,11660248.69,...,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2009-10-01,20.72,6801018.54,4859229.98,0.0,0.0,0.0,6801039.26,4859229.98,0.0,...,0.0,58.33,41.67,0.0,11660269.24,100.0,4859229.98,41.67,0.0,0.0
2,2010-01-01,3070512.74,4317091.06,4599493.71,0.0,0.0,3070512.74,4317073.5,4599511.25,0.0,...,19.95,41.42,38.64,0.0,9596268.72,80.05,4631466.61,38.64,5.67,-0.0
3,2010-04-01,3003964.65,4292968.11,4690164.69,0.0,0.0,3003964.65,4292968.11,4690164.69,0.0,...,25.93,49.87,24.2,0.0,8878537.87,74.07,2900414.07,24.2,-0.87,0.0
4,2010-07-01,6806350.66,2885028.04,2295718.78,0.0,0.0,6806350.66,2885028.04,2295718.78,0.0,...,29.68,47.27,23.05,0.0,8429187.8,70.32,2763467.52,23.05,27.1,0.0


In [10]:
#Trigger definition:
#The projected population in phase 3 and above exceed 20%, AND
#The projected population in phase 3 is projected to increase by a further 5%, OR
#The projected population in phase 4 or above is 2.5%

def get_national_abs_criterion(row, period, phase, threshold):
    """
    Return 1 if percentage of population in row for period in phase "phase" or higher, equals or larger than. 
    Threshold should NOT be a decimal (ie 5 for 5 percent, not .05) 
    """
    # range till 6 cause 5 is max phase
    cols = [f"perc_{period}_{l}" for l in range(phase, 6)]
    if np.isnan(row[f"pop_{period}"]):
        return np.nan
    if round(row[cols].sum()) >= threshold:
        return 1
    else:
        return 0
    
def get_national_increase_criterion(row, period, phase, threshold):
    """
    Return 1 if for row percentage in >="phase" projected at Period minus percentage currently (CS) in >="phase" is expected to be larger than Threshold
    For Global IPC the population analysed in ML2 is sometimes different than in CS. That is why we work directly with percentages and not anymore with (pop period phase+ - pop CS phase+) / pop CS
    Threshold should NOT be a decimal (ie 5 for 5 percent, not .05) 
    """
    # range till 6 cause 5 is max phase
    cols__ml = [f"perc_{period}_{l}" for l in range(phase, 6)]
    cols__cs = [f"perc_CS_{l}" for l in range(phase, 6)]
    if row[["pop_CS", f"pop_{period}"]].isnull().values.any():
        return np.nan
    if row[cols__ml].sum() == 0:
        return 0
    if round(row[cols__ml].sum() - row[cols__cs].sum()) >= threshold:
        return 1
    else:
        return 0    

In [11]:
# apply criteria. Returns 1 if criterion is met.

df_ntl["natl_criterion_ML1_3_20"] = df_ntl.apply(lambda x: get_national_abs_criterion(x,"ML1",3,20),axis=1)
df_ntl["natl_criterion_ML1_3_5in"] = df_ntl.apply(lambda x: get_national_increase_criterion(x,"ML1",3,5),axis=1)
df_ntl["natl_criterion_ML1_4_2half"] = df_ntl.apply(lambda x: get_national_abs_criterion(x,"ML1",4,2.5),axis=1)

df_ntl["natl_criterion_ML2_3_20"] = df_ntl.apply(lambda x: get_national_abs_criterion(x,"ML2",3,20),axis=1)
df_ntl["natl_criterion_ML2_3_5in"] = df_ntl.apply(lambda x: get_national_increase_criterion(x,"ML2",3,5),axis=1)
df_ntl["natl_criterion_ML2_4_2half"] = df_ntl.apply(lambda x: get_national_abs_criterion(x,"ML2",4,2.5),axis=1)


In [12]:
# determine whether national trigger is met

df_ntl['national_trigger_ML1'] =  np.where((df_ntl['natl_criterion_ML1_3_20'] & df_ntl['natl_criterion_ML1_3_5in'] ) | (df_ntl['natl_criterion_ML1_4_2half'] == 1), 1, 0)
df_ntl['national_trigger_ML2'] =  np.where((df_ntl['natl_criterion_ML2_3_20'] & df_ntl['natl_criterion_ML2_3_5in'] ) | (df_ntl['natl_criterion_ML2_4_2half'] == 1), 1, 0)

In [13]:
# extract year / month per row

df_ntl["date"] = pd.to_datetime(df_ntl["date"])
df_ntl["year"] = df_ntl["date"].dt.year
df_ntl["month"] = df_ntl["date"].dt.month

In [21]:
# list years / months during which national trigger would have been met

national_activations = df_ntl.loc[(df_ntl["national_trigger_ML1"] == 1)]
national_activations['period'] = 'ML1'
national_activations_ML2 = df_ntl.loc[(df_ntl["national_trigger_ML2"] == 1)]
national_activations_ML2['period'] = 'ML2'
national_activations = national_activations.append(national_activations_ML2)

display(national_activations_ML2.round(2).groupby(['year', 'month'], as_index=False)['period','perc_CS_3p','perc_CS_4','perc_ML1_3p','perc_ML1_4', 'perc_ML2_3p','perc_ML2_4'].agg(lambda x: list(x)))


Unnamed: 0,year,month,period,perc_CS_3p,perc_CS_4,perc_ML1_3p,perc_ML1_4,perc_ML2_3p,perc_ML2_4
0,2009,10,[ML2],[100.0],[41.67],[100.0],[41.67],[100.0],[41.67]
1,2010,1,[ML2],[74.38],[38.37],[74.38],[38.37],[80.05],[38.64]
2,2010,4,[ML2],[74.94],[39.13],[74.94],[39.13],[74.07],[24.2]
3,2010,7,[ML2],[43.22],[19.15],[43.22],[19.15],[70.32],[23.05]
4,2010,10,[ML2],[47.05],[33.15],[55.21],[30.85],[55.21],[33.86]
5,2011,1,[ML2],[46.96],[33.17],[60.23],[32.04],[49.86],[25.09]
6,2011,4,[ML2],[47.47],[33.17],[78.45],[33.23],[84.89],[37.42]
7,2011,7,[ML2],[84.38],[55.7],[84.38],[11.23],[84.38],[11.23]
8,2011,10,[ML2],[84.83],[35.87],[84.89],[36.14],[76.72],[46.47]
9,2012,1,[ML2],[33.07],[9.36],[33.07],[9.36],[43.0],[9.36]


### Subnational Trigger

In [15]:
# regions that have been or were forecasted to be IPC 5
print("CS 5", df_fadm.CS_5.unique())
print("ML1 5", df_fadm.ML1_5.unique())

CS 5 [0.00000000e+00 4.63102938e+05 8.50107129e+03 1.52671182e+03
 1.72466492e+02 1.41567647e+06 3.51474983e+02 4.53750016e+05
 1.42462158e+04 1.00261858e+06 3.79671608e+02 2.11952087e+02
 1.13561157e+06 5.43193680e+02 3.27374350e+05]
ML1 5 [0.00000000e+00 5.10570230e+05 1.46448675e+06 1.02299116e+06
 5.41431094e+05 5.40856781e+05 6.27290320e+05 1.41743246e+06
 3.91416789e+05 8.49944469e+05 4.54150633e+05 5.47134062e+04
 1.00001004e+06 4.45067863e+02 1.03813641e+06 2.01005196e+02
 3.25500499e+05]


In [16]:
def add_columns(df):
    df["date"] = pd.to_datetime(df["date"])
    df["year"] = df["date"].dt.year
    df["month"] = df["date"].dt.month

    # calculate percentage of population per analysis period and level
    for period in ["CS", "ML1", "ML2"]:
        # IPC level goes up to 5, so define range up to 6
        for i in range(1, 6):
            c = f"{period}_{i}"
            df[f"perc_{c}"] = df[c] / df[f"pop_{period}"] * 100
        # get pop and perc in IPC3+ and IPC2-
        # 3p = IPC level 3 or higher, 2m = IPC level 2 or lower
        df[f"{period}_3p"] = df[[f"{period}_{i}" for i in range(3, 6)]].sum(axis=1)
        df[f"perc_{period}_3p"] = df[f"{period}_3p"] / df[f"pop_{period}"] * 100
        df[f"{period}_2m"] = df[[f"{period}_{i}" for i in range(1, 3)]].sum(axis=1)
        df[f"perc_{period}_2m"] = df[f"{period}_2m"] / df[f"pop_{period}"] * 100
    df["perc_inc_ML2_3p"] = df["perc_ML2_3p"] - df["perc_CS_3p"]
    df["perc_inc_ML1_3p"] = df["perc_ML1_3p"] - df["perc_CS_3p"]
    return df

In [17]:
df_fadm=add_columns(df_fadm)
df_fadm.head()

Unnamed: 0,date,ADMIN1,CS_2,CS_3,CS_4,CS_99,ML1_1,ML1_2,ML1_3,ML1_4,...,perc_ML2_2,perc_ML2_3,perc_ML2_4,perc_ML2_5,ML2_3p,perc_ML2_3p,ML2_2m,perc_ML2_2m,perc_inc_ML2_3p,perc_inc_ML1_3p
0,2009-07-01,Awdal,20.72,0.0,0.0,463644.93,0.0,20.72,0.0,0.0,...,100.0,0.0,0.0,0.0,0.0,0.0,20.72,100.0,0.0,0.0
1,2009-07-01,Bakool,0.0,0.0,0.0,483539.54,0.0,0.0,0.0,0.0,...,,,,,0.0,,0.0,,,
2,2009-07-01,Banadir,0.0,0.0,0.0,1388087.5,0.0,0.0,0.0,0.0,...,,,,,0.0,,0.0,,,
3,2009-07-01,Bari,0.0,0.0,0.0,595480.55,0.0,0.0,0.0,0.0,...,,,,,0.0,,0.0,,,
4,2009-07-01,Bay,0.0,0.0,0.0,968685.09,0.0,0.0,0.0,0.0,...,,,,,0.0,,0.0,,,


In [18]:
def get_trigger(row, period, phase, threshold):
    """
    Return 1 if percentage of population in row for period in phase "phase" or higher, equals or larger than. 
    Threshold should NOT be a decimal (ie 5 for 5 percent, not .05) 
    """
    # range till 6 cause 5 is max phase
    cols = [f"{period}_{l}" for l in range(phase, 6)]
    if np.isnan(row[f"pop_{period}"]):
        return np.nan
#    if round(row[cols].sum()/row[f"pop_{period}"]*100) >= threshold:
    if round(row[cols].sum()) >= threshold:
        return 1
    else:
        return 0


#def get_trigger_increase_rel(row, phase, threshold):
#    """
#    Return 1 if population in row for >="phase" at ML1 is expected to be larger than (current (CS) population in >=phase) * (1+(/100))
#    """
#    # range till 6 cause 5 is max phase
#    cols_ml1 = [f"ML1_{l}" for l in range(phase, 6)]
#    cols_cs = [f"CS_{l}" for l in range(phase, 6)]
#    if row[["pop_CS", "pop_ML1"]].isnull().values.any():
#        return np.nan
#    elif row[cols_ml1].sum() == 0:
#        return 0
#    elif row[cols_ml1].sum() > 0 and row[cols_cs].sum() == 0:
#       return 1
#    elif round((row[cols_ml1].sum() - row[cols_cs].sum())/row[cols_cs].sum() * 100) >= threshold:
#        return 1
#    else:
#        return 0
    
def get_trigger_increase(row, period, phase, threshold):
    """
    Return 1 if for row percentage in >="phase" at period minus percentage in >="phase" currently (CS) is expected to be larger than threshold
    For Global IPC the population analysed in ML2 is sometimes different than in CS. That is why we work directly with percentages and not anymore with (pop period phase+ - pop CS phase+) / pop CS
    Threshold should NOT be a decimal (ie 5 for 5 percent, not .05) 
    """
    # range till 6 cause 5 is max phase
    cols__ml = [f"perc_{period}_{l}" for l in range(phase, 6)]
    cols__cs = [f"perc_CS_{l}" for l in range(phase, 6)]
    if row[["pop_CS", f"pop_{period}"]].isnull().values.any():
        return np.nan
    if row[cols__ml].sum() == 0:
        return 0
    if round(row[cols__ml].sum() - row[cols__cs].sum()) >= threshold:
        return 1
    else:
        return 0

In [19]:
#display most recent numbers
df_fadm.loc[df_fadm.date==df_fadm.date.max(),["date",
                                              "year",
                                              "month",
                                              "ADMIN1",
                                              "perc_CS_3p",
                                              "perc_CS_4",
                                              "perc_ML1_3p",
                                              "perc_ML1_4",
                                              "perc_ML2_3p",
                                              "perc_ML2_4"]]

Unnamed: 0,date,year,month,ADMIN1,perc_CS_3p,perc_CS_4,perc_ML1_3p,perc_ML1_4,perc_ML2_3p,perc_ML2_4
720,2020-10-01,2020,10,Awdal,24.7,0.0,24.7,0.0,24.7,0.0
721,2020-10-01,2020,10,Bakool,28.77,0.0,28.77,0.0,28.77,0.0
722,2020-10-01,2020,10,Banadir,0.0,0.0,0.0,0.0,98.95,0.0
723,2020-10-01,2020,10,Bari,100.0,0.0,100.0,0.0,100.0,0.0
724,2020-10-01,2020,10,Bay,34.62,0.0,34.62,0.0,34.62,0.0
725,2020-10-01,2020,10,Galgaduud,0.0,0.0,0.0,0.0,22.82,0.0
726,2020-10-01,2020,10,Gedo,0.0,0.0,0.0,0.0,0.0,0.0
727,2020-10-01,2020,10,Hiraan,19.85,0.0,19.85,0.0,42.01,0.0
728,2020-10-01,2020,10,Lower Juba,30.63,0.0,30.63,0.0,30.63,0.0
729,2020-10-01,2020,10,Lower Shabelle,0.27,0.0,0.27,0.0,0.27,0.0


In [23]:
#Column value for row will be 1 if threshold is met and 0 if it isnt
#The projected population in phase 3 and above exceed 20%, AND
#The projected population in phase 3 is projected to increase by a further 5%, OR
#The projected population in phase 4 or above is 2.5%

df_fadm["trigger_ML1_3_20"]=df_fadm.apply(lambda x: get_trigger(x,"ML1",3,20),axis=1)
df_fadm["trigger_ML1_3_5ir"]=df_fadm.apply(lambda x: get_trigger_increase(x,"ML1",3,5),axis=1)
df_fadm["trigger_ML1_4_2half"]=df_fadm.apply(lambda x: get_trigger(x,"ML1",4,2.5),axis=1)

df_fadm["trigger_ML2_3_20"]=df_fadm.apply(lambda x: get_trigger(x,"ML2",3,20),axis=1)
df_fadm["trigger_ML2_3_5ir"]=df_fadm.apply(lambda x: get_trigger_increase(x,"ML2",3,5),axis=1)
df_fadm["trigger_ML2_4_2half"]=df_fadm.apply(lambda x: get_trigger(x,"ML2",4,2.5),axis=1)



In [24]:
#initialize dict with all the analyses
dict_fan={}

In [25]:
#analyse endorsed trigger applied at subnational level
df_an1=df_fadm.loc[((df_fadm["trigger_ML1_3_20"]==1) & (df_fadm["trigger_ML1_3_5ir"]==1)) | (df_fadm["trigger_ML1_4_2half"]==1)]

display(df_an1.groupby(['year', 'month'], as_index=False)[admc,'perc_CS_3p','perc_ML1_3p','perc_ML1_4'].agg(lambda x: list(x)))
dict_fan["an1"]={"df":df_an1,"trig_cols":["CS_3p","ML1_3p","ML1_4"],"desc":"At least 20% of ADMIN1 population in IPC3+ at ML1 AND increase by 5 percentage points in ADMIN1 pop. projected in IPC3+ compared to current state) OR (At least 2.5% of ADMIN1 population projected at IPC4+ by ML1"}


Unnamed: 0,year,month,ADMIN1,perc_CS_3p,perc_ML1_3p,perc_ML1_4
0,2009,10,"[Bakool, Banadir, Bay, Galgaduud, Gedo, Hiraan...","[100.0, 100.0, 100.0, 100.0, 100.0000000000000...","[100.0, 100.0, 100.0, 100.0, 100.0000000000000...","[100.0, 96.99964006790977, 13.71673989310948, ..."
1,2010,1,"[Awdal, Bakool, Banadir, Bay, Galgaduud, Gedo,...","[57.97915792236532, 100.0, 99.92290134516996, ...","[57.97915647352021, 100.0, 99.92290134516996, ...","[0.003683904078121831, 37.65495933105403, 96.7..."
2,2010,4,"[Bakool, Banadir, Bay, Galgaduud, Gedo, Hiraan...","[100.00000000000003, 99.92290134516996, 0.1699...","[100.00000000000003, 99.92290134516996, 0.1699...","[79.5484677298544, 96.71650337608632, 0.149013..."
3,2010,7,"[Bakool, Banadir, Bay, Galgaduud, Gedo, Lower ...","[99.99999999999999, 99.92290134178798, 0.14901...","[99.99999999999999, 99.92290134178798, 0.14901...","[90.29316921072204, 96.84993807461116, 0.12748..."
4,2010,10,"[Bakool, Banadir, Bari, Bay, Galgaduud, Gedo, ...","[100.0, 99.40470934516016, 62.82575964465262, ...","[100.0, 99.40470934516016, 79.37899600341673, ...","[36.6228422937073, 96.49617061256029, 0.0, 0.0..."
5,2011,1,"[Bakool, Banadir, Bari, Bay, Galgaduud, Gedo, ...","[100.0, 99.41951866975097, 61.94988780861057, ...","[100.0, 99.41951866975097, 99.99999999999997, ...","[37.05882329763875, 96.42689808963337, 4.83298..."
6,2011,4,"[Bakool, Banadir, Bari, Bay, Galgaduud, Gedo, ...","[100.0, 99.41951866975097, 61.94988780861057, ...","[100.0, 100.0, 99.99999999999997, 57.229989521...","[37.05882329763875, 96.65628512330969, 4.83298..."
7,2011,7,"[Bakool, Banadir, Bari, Bay, Galgaduud, Gedo, ...","[99.99999999999999, 100.0, 99.01020599249549, ...","[100.0, 100.0, 99.01020599249549, 100.0, 100.0...","[0.0, 0.0, 4.8329875882169935, 0.0, 100.0, 0.0..."
8,2011,10,"[Bakool, Banadir, Bari, Bay, Galgaduud, Gedo, ...","[100.0, 100.0, 99.01020601276747, 99.999999999...","[100.0, 100.0, 100.00000000000003, 100.0, 100....","[11.05031169012179, 96.26398738312164, 4.83373..."
9,2012,1,"[Bari, Bay, Galgaduud, Gedo, Hiraan, Lower Jub...","[25.727870274522562, 40.07870663903405, 64.471...","[25.727870274522562, 40.07870663903405, 64.471...","[5.351551942641496, 0.06942070130353525, 6.399..."


In [26]:
def col_pop(row,col,df):
    pop_col=df[df.date==row.date][col].sum()
    return pop_col

def col_perc(row,col,df):
    s=col.split("_")[0]
    return df[df.date==row.date][col].sum()/df[df.date==row.date][f"pop_{s}"].sum()*100

In [27]:
#plot all analysis in nicer format
for k in dict_fan.keys():
    d=dict_fan[k]["desc"]
    num_k=k.replace("an","")
    print(f"Analysis {num_k}: FewsNet, {d}")
    df=dict_fan[k]["df"]
    df_grouped=df.groupby(['date','year', 'month'], as_index=False)[admc].agg(lambda x: list(x))
    for c in dict_fan[k]["trig_cols"]:
        df_grouped["pop_reg"]=df_grouped.apply(lambda x: col_pop(x,"adjusted_population",df),axis=1).astype(int)
        df_grouped[f"perc_{c}_reg"]=df_grouped.apply(lambda x: col_perc(x,c,df),axis=1).round(2)
        df_grouped[f"pop_{c}_reg"]=df_grouped.apply(lambda x: col_pop(x,c,df),axis=1).astype(int)
        df_grouped[f"perc_{c}_tot"]=df_grouped.apply(lambda x: col_perc(x,c,df_fadm),axis=1).round(2)
        df_grouped[f"pop_{c}_tot"]=df_grouped.apply(lambda x: col_pop(x,c,df_fadm),axis=1).astype(int)
    dict_fan[k]["df_group"]=df_grouped
    df_grouped["ADMIN1"]=[', '.join(map(str, l)) for l in df_grouped[admc]]
    df_grouped["Trigger description"]=d
    df_grouped=df_grouped.rename(columns={"ADMIN1":"Regions triggered","pop_reg":"pop. threshold regions"})
    df_grouped_clean=df_grouped[["year","month","Regions triggered"]].set_index(['year', 'month'])
    display(df_grouped[["year","month","Regions triggered"]].set_index(['year', 'month']))
    b=df_grouped[["year","month","Regions triggered","Trigger description"]].set_index(['Trigger description','year', 'month'])

Analysis 1: FewsNet, At least 20% of ADMIN1 population in IPC3+ at ML1 AND increase by 5 percentage points in ADMIN1 pop. projected in IPC3+ compared to current state) OR (At least 2.5% of ADMIN1 population projected at IPC4+ by ML1


KeyError: 'adjusted_population'

### FewsNet, plotting characteristics of the trigger

In [None]:
def plot_regions_trig(df_trig,adm0c="admin0Name",adm1c="admin1Name",shape_path="../Data/som_adm_undp_shp/Som_Admbnda_Adm1_UNDP.shp"):
    gdf = gpd.read_file(shape_path)

    count = 1
    f, ax = plt.subplots(figsize=(12,12))
    for d in range(2009,2021):
        ax2 = plt.subplot(4, 4, count)
        gdf.plot(ax=ax2, color='#DDDDDD', edgecolor='#BBBBBB')
        regions = df_trig[adm1c].loc[df_trig['year']==d]
        if len(regions) > 0:
            gdf.loc[gdf[adm1c].isin(regions)].plot(ax=ax2, color='red')
        plt.title(f"Regions triggered {d}")
        count+=1
        ax2.axis("off")
    plt.show()

In [None]:
plot_regions_trig(dict_fan["an1"]["df"])

In [None]:
def plot_aff_dates(df_d,df_trig,col,shape_path="../Data/som_adm_undp_shp/Som_Admbnda_Adm1_UNDP.shp",title=None):
    
    num_dates=len(df_trig.date.unique())
    colp_num=2
    rows=num_dates // colp_num
    rows+=num_dates % colp_num
    position = range(1, num_dates + 1)

    gdf = gpd.read_file(shape_path)
    df_geo=gdf[["admin1Name","geometry"]].merge(df_d,on="admin1Name",how="left")
    
    colors = len(df_geo[col].unique())
    cmap = 'Blues'
    figsize = (16, 10)
    scheme = "natural_breaks" #'equal_interval' 
    fig = plt.figure(1,figsize=(16,6*rows))
    
    for i,c in enumerate(df_trig.date.unique()):
        ax = fig.add_subplot(rows,colp_num,position[i])
        df_date=df_geo[df_geo.date==c]
        if df_date[col].isnull().values.all():
            print(f"No not-NaN values for {c}")
        elif df_date[col].isnull().values.any():
            df_geo[df_geo.date==c].plot(col, ax=ax,cmap=cmap, figsize=figsize, k = colors,  legend=True,scheme=scheme,missing_kwds={"color": "lightgrey", "edgecolor": "red",
   "hatch": "///",
    "label": "Missing values"})
        else:
            df_geo[df_geo.date==c].plot(col, ax=ax,cmap=cmap, figsize=figsize, k = colors,  legend=True,scheme=scheme)
        gdf.boundary.plot(linewidth=0.2,ax=ax)

        ax.axis("off")
        
        plt.title(pd.DatetimeIndex([c])[0].to_period('M'))
        if not df_date[col].isnull().values.all():
            leg = ax.get_legend()

            for lbl in leg.get_texts():
                label_text = lbl.get_text()
                upper = label_text.split(",")[-1].rstrip(']')

                try:
                    new_text = f'{float(upper):,.2f}'
                except:
                    new_text=upper
                lbl.set_text(new_text)

    if title:
        fig.suptitle(title,fontsize=14, y=0.92)
    plt.show()

In [None]:
plot_aff_dates(df_fadm,dict_fan["an1"]["df"],"perc_ML1_4",title="Percentage of population projected in IPC4+ in ML1 for the dates the trigger is met")

#### Trigger analysis Global IPC data
One of the goals was to compare the two sources of IPC data. Below are the results on the Global IPC data with the final chosen trigger

In [None]:
df_gadm=pd.read_csv(f"../Data/GlobalIPCProcessed/{country}_globalipc_admin1{suffix}.csv")

In [None]:
glob_adm1c="ADMIN1"

In [None]:
df_gadm=add_columns(df_gadm)

In [None]:
df_gadm.head(n=3)

In [None]:
#get yes/no for different thresholds, i.e. column value for row will be 1 if threshold is met and 0 if it isnt
df_gadm["trigger_ML1_4_20"]=df_gadm.apply(lambda x: get_trigger(x,"ML1",4,20),axis=1)
df_gadm["trigger_ML1_3_30"]=df_gadm.apply(lambda x: get_trigger(x,"ML1",3,30),axis=1)
df_gadm["trigger_ML1_3_5i"]=df_gadm.apply(lambda x: get_trigger_increase(x,"ML1",3,5),axis=1)
df_gadm["trigger_ML2_4_20"]=df_gadm.apply(lambda x: get_trigger(x,"ML2",4,20),axis=1)
df_gadm["trigger_ML2_3_30"]=df_gadm.apply(lambda x: get_trigger(x,"ML2",3,30),axis=1)
df_gadm["trigger_ML2_3_5i"]=df_gadm.apply(lambda x: get_trigger_increase(x,"ML2",3,5),axis=1)

In [None]:
#initialize dict with all the analyses
dict_gan={}

In [None]:
#currently (Oct 2020) selected trigger
df_gan1=df_gadm.loc[(df_gadm["trigger_ML1_4_20"]==1) | ((df_gadm["trigger_ML1_3_30"]==1) & (df_gadm["trigger_ML1_3_5i"]==1))]
display(df_gan1.groupby(['year', 'month'], as_index=False)[glob_adm1c,'perc_ML1_4','perc_CS_3p','perc_ML1_3p'].agg(lambda x: list(x)))
dict_gan["an1"]={"df":df_gan1,"trig_cols":["ML1_3p","CS_3p","ML1_4"],"desc":"At least 20% of ADMIN1 population in IPC4+ at ML1 OR (At least 30% of ADMIN1 population projected at IPC3+  AND increase by 5 percentage points in ADMIN1 pop.  projected in IPC3+ compared to current state)"}

In [None]:
for k in dict_gan.keys():
    d=dict_gan[k]["desc"]
    num_k=k.replace("an","")
    print(f"Analysis {num_k}: GlobalIPC, {d}")
    df=dict_gan[k]["df"]
    df_grouped=df.groupby(['year', 'month'], as_index=False)[glob_adm1c].agg(lambda x: list(x))
    if df_grouped.empty:
        display(df_grouped)
    else:
        df_grouped[glob_adm1c]=[', '.join(map(str, l)) for l in df_grouped[glob_adm1c]]
        df_grouped["Trigger description"]=d
        df_grouped=df_grouped.rename(columns={glob_adm1c:"Regions triggered"})
        df_grouped_clean=df_grouped[["year","month","Regions triggered"]].set_index(['year', 'month'])
        display(df_grouped[["year","month","Regions triggered"]].set_index(['year', 'month']))
        b=df_grouped[["year","month","Regions triggered","Trigger description"]].set_index(['Trigger description','year', 'month'])