# Linking DEC Metadata with J2SR Metrics

## Step 1: Import Packages

In [2]:
import pandas as pd
import numpy as np
import pycountry

## Step 2: Import DEC Data and Create Unique Country-Year ID

In [3]:
dec_meta = pd.read_csv('dec-evaluations-data.csv', index_col = "Unique_ID", encoding = 'latin')

In [4]:
dec_meta.head()

Unnamed: 0_level_0,Abstract,Ancillary_Data,Bibliographic_Type,ContentType,Contract_Grant_Number,Credit,Date_Resource_Created,Description,Descriptors_Topical,Descriptors_Geographic,...,Publication_Date_Freeform,Related_Doc_Links,Report_Number,Series_Title,Title,Title_Translated,URI,USAID_Geography,USAID_Project_Number,Digital_Object_Identifier
Unique_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
PD-AAB-495-E1,,Evaluation period: 22 Mar 1972-12 Jul 1976,Final Evaluation Report,Documents,,,7/1/1977 0:00,,Management education ~|~_©_~|~ Graduate educat...,Central America,...,13-Jul-77,,,,LOAN COMPLETION REVIEW AND REPORT,,https://dec.usaid.gov/dec/content/Detail.aspx?...,Reg Office Cent America & Panama (ROCAP),5960044,
PD-AAP-604,,Evaluation period: 25 Jul 1972-10 Apr 1979,Final Evaluation Report,Documents,,,6/1/1979 0:00,,Low cost housing ~|~_©_~|~ Minimum shelter hou...,Peru,...,6-Jun-79,,,,Urban reconstruction -- earthquake and flood z...,,https://dec.usaid.gov/dec/content/Detail.aspx?...,Peru,5270101,
PD-AAB-088-A1,,,Special Evaluation,Documents,AID/TA-C-1469,,2/1/1979 0:00,,Road construction ~|~_©_~|~ Roads ~|~_©_~|~ Ru...,Honduras,...,1-Feb-79,,,,TRIALS SELECTION CRITERIA AND EVALUATION DESIG...,,https://dec.usaid.gov/dec/content/Detail.aspx?...,Honduras,5220137,
PD-AAA-880-D1,,Evaluation period: 22 Jan 1979-9 Feb 1979,Final Evaluation Report,Documents,AID/LAC-C-1313,,2/1/1979 0:00,,Health surveys ~|~_©_~|~ Demographic research ...,El Salvador,...,9-Feb-79,,,,EVALUATION OF MULTIPURPOSE HOUSEHOLD SURVEY (E...,,https://dec.usaid.gov/dec/content/Detail.aspx?...,El Salvador,5190176,
PD-AAT-461,,Evaluation period: 1 Jan1972-31 Dec1976,Special Evaluation,Documents,AID/pha/C-1100,,7/1/1977 0:00,,Condoms ~|~_©_~|~ Sterilization (birth control...,Thailand,...,26-Jul-77,,,,REPORT OF THE SECOND EVALUATION OF THE NATIONA...,,https://dec.usaid.gov/dec/content/Detail.aspx?...,Thailand,4930283,


In [5]:
dec_meta['Date_Form'] = pd.to_datetime(dec_meta['Date_Resource_Created'], infer_datetime_format = True)

In [6]:
dec_meta['year'] = pd.DatetimeIndex(dec_meta['Date_Form']).year.astype(str)
dec_meta['year'] = dec_meta['year'].str[:-2]
dec_meta['year']

Unique_ID
PD-AAB-495-E1    1977
PD-AAP-604       1979
PD-AAB-088-A1    1979
PD-AAA-880-D1    1979
PD-AAT-461       1977
                 ... 
PA-00S-RX6       2017
PA-00S-RV5       2016
PA-00S-RSS       2016
PA-00S-RRZ       2017
PA-00S-RRS       2016
Name: year, Length: 12411, dtype: object

In [7]:
def do_fuzzy_search(country):
    try:
        result = pycountry.countries.search_fuzzy(country)
    except Exception:
        return np.nan
    else:
        return result[0].alpha_3

In [8]:
iso_map = {country: do_fuzzy_search(country) for country in dec_meta["Descriptors_Geographic"].unique()}
dec_meta["country_code"] = dec_meta["Descriptors_Geographic"].map(iso_map)

In [10]:
dec_meta['country_year'] = dec_meta['country_code'] + "_" + dec_meta['year']

## Step 3: Import J2SR Data and Create Unique Country-Year ID

In [12]:
pd.options.display.max_colwidth = 100
j2sr_data = pd.read_csv('data_records.1598319212.csv')
j2sr_data.head()

Unnamed: 0,series_id,series_name,source_name,country_id,country_name,region,income_group,year,value
0,68092,"Roadmap Metric: Business Environment (0-100, higher is better)","USAID, Journey to Self-Reliance Metrics",4,Afghanistan,Asia,Low Income Country (World Bank Classification),2007,35.153241
1,68092,"Roadmap Metric: Business Environment (0-100, higher is better)","USAID, Journey to Self-Reliance Metrics",4,Afghanistan,Asia,Low Income Country (World Bank Classification),2008,34.844764
2,68092,"Roadmap Metric: Business Environment (0-100, higher is better)","USAID, Journey to Self-Reliance Metrics",4,Afghanistan,Asia,Low Income Country (World Bank Classification),2009,35.229759
3,68092,"Roadmap Metric: Business Environment (0-100, higher is better)","USAID, Journey to Self-Reliance Metrics",4,Afghanistan,Asia,Low Income Country (World Bank Classification),2010,42.153961
4,68092,"Roadmap Metric: Business Environment (0-100, higher is better)","USAID, Journey to Self-Reliance Metrics",4,Afghanistan,Asia,Low Income Country (World Bank Classification),2011,40.856806


In [13]:
j2sr_data['roadmap_metric'] = j2sr_data['series_name'].str.contains('Roadmap Metric:')
j2sr_data['roadmap_metric']

0        True
1        True
2        True
3        True
4        True
         ... 
49853    True
49854    True
49855    True
49856    True
49857    True
Name: roadmap_metric, Length: 49858, dtype: bool

In [14]:
j2sr_iso_map = {country: do_fuzzy_search(country) for country in j2sr_data["country_name"].unique()}
j2sr_data["country_code"] = j2sr_data["country_name"].map(iso_map)

In [15]:
j2sr_data['country_year'] = j2sr_data['country_code'] + "_" + j2sr_data['year'].astype(str)
j2sr_data['country_year']

0        AFG_2007
1        AFG_2008
2        AFG_2009
3        AFG_2010
4        AFG_2011
           ...   
49853    VNM_1995
49854    YEM_1995
49855    ZAF_1995
49856    ZMB_1995
49857    ZWE_1995
Name: country_year, Length: 49858, dtype: object

In [16]:
j2sr_roadmap = j2sr_data[j2sr_data['roadmap_metric']]
j2sr_roadmap

Unnamed: 0,series_id,series_name,source_name,country_id,country_name,region,income_group,year,value,roadmap_metric,country_code,country_year
0,68092,"Roadmap Metric: Business Environment (0-100, higher is better)","USAID, Journey to Self-Reliance Metrics",4,Afghanistan,Asia,Low Income Country (World Bank Classification),2007,35.153241,True,AFG,AFG_2007
1,68092,"Roadmap Metric: Business Environment (0-100, higher is better)","USAID, Journey to Self-Reliance Metrics",4,Afghanistan,Asia,Low Income Country (World Bank Classification),2008,34.844764,True,AFG,AFG_2008
2,68092,"Roadmap Metric: Business Environment (0-100, higher is better)","USAID, Journey to Self-Reliance Metrics",4,Afghanistan,Asia,Low Income Country (World Bank Classification),2009,35.229759,True,AFG,AFG_2009
3,68092,"Roadmap Metric: Business Environment (0-100, higher is better)","USAID, Journey to Self-Reliance Metrics",4,Afghanistan,Asia,Low Income Country (World Bank Classification),2010,42.153961,True,AFG,AFG_2010
4,68092,"Roadmap Metric: Business Environment (0-100, higher is better)","USAID, Journey to Self-Reliance Metrics",4,Afghanistan,Asia,Low Income Country (World Bank Classification),2011,40.856806,True,AFG,AFG_2011
...,...,...,...,...,...,...,...,...,...,...,...,...
49853,70295,Roadmap Metric: Export Sophistication,"USAID, Journey to Self-Reliance Metrics",704,Vietnam,Asia,Lower Middle Income Country (World Bank Classification),1995,-0.915000,True,VNM,VNM_1995
49854,70295,Roadmap Metric: Export Sophistication,"USAID, Journey to Self-Reliance Metrics",887,Yemen,Middle East and North Africa,Low Income Country (World Bank Classification),1995,-1.413300,True,YEM,YEM_1995
49855,70295,Roadmap Metric: Export Sophistication,"USAID, Journey to Self-Reliance Metrics",710,South Africa,Sub-Saharan Africa,Upper Middle Income Country (World Bank Classification),1995,0.314500,True,ZAF,ZAF_1995
49856,70295,Roadmap Metric: Export Sophistication,"USAID, Journey to Self-Reliance Metrics",894,Zambia,Sub-Saharan Africa,Lower Middle Income Country (World Bank Classification),1995,-0.602400,True,ZMB,ZMB_1995


In [17]:
j2sr_roadmap = j2sr_roadmap.dropna(subset=['country_year'])
j2sr_roadmap.shape

(35286, 12)

In [20]:
j2sr_roadmap = j2sr_roadmap[j2sr_roadmap['year'] > 2009]
j2sr_roadmap['year']

3        2010
4        2011
5        2012
6        2013
7        2014
         ... 
47883    2010
47884    2010
47885    2010
47886    2010
47887    2010
Name: year, Length: 10983, dtype: int64

## Step 4: Pivot J2SR Data to Generate Columns per Indicator

In [21]:
j2sr_pivot = j2sr_roadmap.pivot_table(index = 'country_year', columns = 'series_name', values = 'value')
j2sr_pivot['iso'] = j2sr_pivot.index.str[:3]
j2sr_pivot['year'] = j2sr_pivot.index.str[4:]
j2sr_pivot

series_name,"Roadmap Metric: Biodiversity & Habitat Protections (0-100, higher is better)","Roadmap Metric: Business Environment (0-100, higher is better)","Roadmap Metric: Child Health (0-100, higher is better)","Roadmap Metric: Civil Society & Media Effectiveness (0-1, higher is better)","Roadmap Metric: Economic Gender Gap (0-1, where 0=inequality and 1=equality)",Roadmap Metric: Education Quality (years),Roadmap Metric: Export Sophistication,"Roadmap Metric: Information & Communication Technology (ICT) Adoption (1-100, higher is better)","Roadmap Metric: Liberal Democracy (0-1, higher is better)","Roadmap Metric: Open Government (0-1, higher is stronger rule of law)",Roadmap Metric: Poverty Rate ($5/Day) (percentage),"Roadmap Metric: Safety & Security (0-100, higher is better)","Roadmap Metric: Social Group Equality (0-4, higher is better)","Roadmap Metric: Tax System Effectiveness (0-1, higher is better)",iso,year
country_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
AFG_2010,,42.153961,48.949188,0.769,,,,,0.238,,,48.243911,1.861,0.34,AFG,2010
AFG_2011,,40.856806,50.954385,0.768,,,,,0.241,,,46.915305,1.861,0.31,AFG,2011
AFG_2012,,41.603012,52.948497,0.767,,,,,0.242,,,44.588927,1.861,0.28,AFG,2012
AFG_2013,,43.533872,54.929004,0.749,,,,,0.232,,,45.623433,1.402,0.24,AFG,2013
AFG_2014,,44.097498,56.898474,0.737,,,,,0.262,0.341421,,48.992330,1.322,0.23,AFG,2014
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZWE_2016,,37.060849,60.254732,0.392,0.713546,,-0.9568,,0.175,0.295787,,68.004509,1.670,0.61,ZWE,2016
ZWE_2017,,39.095571,59.988293,0.434,0.710000,,-0.9524,31.70614,0.215,,78.38,65.626222,2.274,0.64,ZWE,2017
ZWE_2018,94.53,42.145474,60.749835,0.523,0.730000,6.346832,-0.9291,32.58448,0.195,0.301143,,65.229449,1.849,,ZWE,2018
ZWE_2019,,,,0.418,,,,37.36302,0.175,0.332524,,,1.866,,ZWE,2019


## Step 5: Calculate Commitment and Capacity Scores from Normalized Roadmap Indicators

#### Min-Max Scaling
URL: https://selfreliance.usaid.gov/docs/FY_2020_USAID_Journey_to_Self-Reliance_Country_Roadmap_Methodology_Guide.pdf

USAID’s country roadmaps use a min-max scaling technique to normalize all data onto a common 0.0 to
1.0 scale to facilitate visualization, comparison across metrics, and calculation of the Commitment and
Capacity indices. A country scoring 0.0 on a given metric indicates that the country recorded the least
favorable outcome globally in the raw dataset, and a country scoring 1.0 indicates that the country
recorded the most favorable outcome globally in the raw dataset. All other countries receive scores
within the 0.0-1.0 range based on where they fall between the worst and best outcomes globally,
preserving the source organization’s data distribution.

While USAID Roadmaps are only produced for low- and middle-income countries, all countries globally, 
including high-income countries, are used to establish the range of possible outcomes for each metric. 

The period of performance used to determine the range of observed outcomes is 2010 to the latest data 
available on July 1 2019 (including values that have been “carried forward” from 2006-2009 into this 
date range; see “Temporal Coverage” and “Handling Missing Data” sections below for more details)

In [None]:
# Min-Max scaling



#### Methodology from USAID J2SR FY2020 Metrics Guide
URL: https://selfreliance.usaid.gov/docs/FY_2020_USAID_Journey_to_Self-Reliance_Country_Roadmap_Methodology_Guide.pdf

Overall “Commitment” and “Capacity” composite scores are calculated using the arithmetic mean of all
available scaled components for each country. 

The Commitment Index comprises seven underlying metrics, each receiving an equal weight 
(i.e. one-seventh weighting, if all sub-components are present after imputation). 

- Liberal Democracy Index
- Open Government
- Social Group Equality
- Economic Gender Gap
- Business Environment
- Trade Freedom
- Biodiversity and Habitat Protections

The Capacity Index comprises ten underlying metrics, each receiving an equal
weighting of one-tenth in aggregation, if all sub-components are present after imputation. 

- Government Effectiveness
- Tax System Effectiveness
- Safety and Security
- Civil Society and Media Effectiveness
- Poverty Rate
- Education Quality
- Child Health
- GDP per Capita in PPP
- ICT Adoption
- Export Sophistication

If dimension components (i.e. individual metrics) of either index are missing after imputation, Commitment and
Capacity scores are still generated using an arithmetic mean of all available components, but only when
at least six of ten Capacity metrics are present and four of seven Commitment metrics are present.


In [None]:
# Create function that checks for sufficient components in Commitment and computes arithmetic mean
def comm_calc(data, ldi, og, sge, egg, be, tf, bhp):
  if # at least 4 of 7 metrics present
    comm_array = [data[ldi], data[og], data[sge], data[egg], data[be], data[tf], data[bhp]]
    comm_score = np.nanmean(comm_array, 0)
    return comm_score

In [None]:
comm_vars = ['Roadmap Metric: Biodiversity & Habitat Protections (0-100, higher is better)',
        'Roadmap Metric: Business Environment (0-100, higher is better)',
        'Roadmap Metric: Economic Gender Gap (0-1, where 0=inequality and 1=equality)',
        'Roadmap Metric: Education Quality (years)',
        'Roadmap Metric: Liberal Democracy (0-1, higher is better)',
        'Roadmap Metric: Open Government (0-1, higher is stronger rule of law)',
        'Roadmap Metric: Social Group Equality (0-4, higher is better)']
        
# Missing Trade Freedom

In [None]:
# Create function that checks for sufficient components in Capacity and computes arithmetic mean
def cap_calc(data, ge, tse, ss, csme, pr, eq, ch, gdp, ict, ep):
  if # at least 4 of 7 metrics present
    cap_array = [data[ge], data[tse], data[ss], data[csme], data[pr], data[eq], data[ch], data[gdp], data[ict], data[ep]]
    cap_score = np.nanmean(cap_array, 0)
    return cap_score

In [None]:
cap_vars = ['Roadmap Metric: Child Health (0-100, higher is better)',
       'Roadmap Metric: Civil Society & Media Effectiveness (0-1, higher is better)',
       'Roadmap Metric: Export Sophistication',
       'Roadmap Metric: Information & Communication Technology (ICT) Adoption (1-100, higher is better)',
       'Roadmap Metric: Poverty Rate ($5/Day) (percentage)',
       'Roadmap Metric: Safety & Security (0-100, higher is better)',
       'Roadmap Metric: Tax System Effectiveness (0-1, higher is better)']

# Missing GDP per Capita, Education Quality, and Gov Effectiveness

## Step 4: Link DEC and J2SR Data

In [82]:
dec_j2sr = pd.merge(dec_meta, j2sr_pivot, how='left', left_on = 'country_year', right_index=True)
dec_j2sr.head()

Unnamed: 0_level_0,Abstract,Ancillary_Data,Bibliographic_Type,ContentType,Contract_Grant_Number,Credit,Date_Resource_Created,Description,Descriptors_Topical,Descriptors_Geographic,...,Roadmap Metric: Export Sophistication,"Roadmap Metric: Information & Communication Technology (ICT) Adoption (1-100, higher is better)","Roadmap Metric: Liberal Democracy (0-1, higher is better)","Roadmap Metric: Open Government (0-1, higher is stronger rule of law)",Roadmap Metric: Poverty Rate ($5/Day) (percentage),"Roadmap Metric: Safety & Security (0-100, higher is better)","Roadmap Metric: Social Group Equality (0-4, higher is better)","Roadmap Metric: Tax System Effectiveness (0-1, higher is better)",iso,year_y
Unique_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
PD-AAB-495-E1,,Evaluation period: 22 Mar 1972-12 Jul 1976,Final Evaluation Report,Documents,,,7/1/1977 0:00,,Management education ~|~_©_~|~ Graduate education ~|~_©_~|~ Limited data,Central America,...,,,,,,,,,,
PD-AAP-604,,Evaluation period: 25 Jul 1972-10 Apr 1979,Final Evaluation Report,Documents,,,6/1/1979 0:00,,Low cost housing ~|~_©_~|~ Minimum shelter housing ~|~_©_~|~ Housing ~|~_©_~|~ Earthquakes ~|~_©...,Peru,...,,,0.048,,,,1.304,,PER,1979.0
PD-AAB-088-A1,,,Special Evaluation,Documents,AID/TA-C-1469,,2/1/1979 0:00,,Road construction ~|~_©_~|~ Roads ~|~_©_~|~ Rural areas,Honduras,...,,,0.061,,,,0.696,,HND,1979.0
PD-AAA-880-D1,,Evaluation period: 22 Jan 1979-9 Feb 1979,Final Evaluation Report,Documents,AID/LAC-C-1313,,2/1/1979 0:00,,Health surveys ~|~_©_~|~ Demographic research ~|~_©_~|~ MIGRATION ~|~_©_~|~ Samples ~|~_©_~|~ Su...,El Salvador,...,,,0.041,,,,0.631,,SLV,1979.0
PD-AAT-461,,Evaluation period: 1 Jan1972-31 Dec1976,Special Evaluation,Documents,AID/pha/C-1100,,7/1/1977 0:00,,Condoms ~|~_©_~|~ Sterilization (birth control) ~|~_©_~|~ Oral contraceptives ~|~_©_~|~ Family p...,Thailand,...,,,0.083,,,,0.353,,THA,1977.0


In [83]:
dec_j2sr.to_csv('dec_j2sr_trial.csv')