# Linking DEC Metadata with J2SR Metrics

## Step 1: Import Packages

In [2]:
import pandas as pd
import numpy as np
import pycountry

## Step 2: Import DEC Data and Create Unique Country-Year ID

In [3]:
dec_meta = pd.read_csv('dec-evaluations-data.csv', index_col = "Unique_ID", encoding = 'latin')

In [4]:
dec_meta.head()

Unnamed: 0_level_0,Abstract,Ancillary_Data,Bibliographic_Type,ContentType,Contract_Grant_Number,Credit,Date_Resource_Created,Description,Descriptors_Topical,Descriptors_Geographic,...,Publication_Date_Freeform,Related_Doc_Links,Report_Number,Series_Title,Title,Title_Translated,URI,USAID_Geography,USAID_Project_Number,Digital_Object_Identifier
Unique_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
PD-AAB-495-E1,,Evaluation period: 22 Mar 1972-12 Jul 1976,Final Evaluation Report,Documents,,,7/1/1977 0:00,,Management education ~|~_©_~|~ Graduate educat...,Central America,...,13-Jul-77,,,,LOAN COMPLETION REVIEW AND REPORT,,https://dec.usaid.gov/dec/content/Detail.aspx?...,Reg Office Cent America & Panama (ROCAP),5960044,
PD-AAP-604,,Evaluation period: 25 Jul 1972-10 Apr 1979,Final Evaluation Report,Documents,,,6/1/1979 0:00,,Low cost housing ~|~_©_~|~ Minimum shelter hou...,Peru,...,6-Jun-79,,,,Urban reconstruction -- earthquake and flood z...,,https://dec.usaid.gov/dec/content/Detail.aspx?...,Peru,5270101,
PD-AAB-088-A1,,,Special Evaluation,Documents,AID/TA-C-1469,,2/1/1979 0:00,,Road construction ~|~_©_~|~ Roads ~|~_©_~|~ Ru...,Honduras,...,1-Feb-79,,,,TRIALS SELECTION CRITERIA AND EVALUATION DESIG...,,https://dec.usaid.gov/dec/content/Detail.aspx?...,Honduras,5220137,
PD-AAA-880-D1,,Evaluation period: 22 Jan 1979-9 Feb 1979,Final Evaluation Report,Documents,AID/LAC-C-1313,,2/1/1979 0:00,,Health surveys ~|~_©_~|~ Demographic research ...,El Salvador,...,9-Feb-79,,,,EVALUATION OF MULTIPURPOSE HOUSEHOLD SURVEY (E...,,https://dec.usaid.gov/dec/content/Detail.aspx?...,El Salvador,5190176,
PD-AAT-461,,Evaluation period: 1 Jan1972-31 Dec1976,Special Evaluation,Documents,AID/pha/C-1100,,7/1/1977 0:00,,Condoms ~|~_©_~|~ Sterilization (birth control...,Thailand,...,26-Jul-77,,,,REPORT OF THE SECOND EVALUATION OF THE NATIONA...,,https://dec.usaid.gov/dec/content/Detail.aspx?...,Thailand,4930283,


In [5]:
dec_meta['Date_Form'] = pd.to_datetime(dec_meta['Date_Resource_Created'], infer_datetime_format = True)

In [36]:
dec_meta['year'] = pd.DatetimeIndex(dec_meta['Date_Form']).year
dec_recent = dec_meta[dec_meta['year'] > 2009]
dec_recent.shape

(2320, 36)

In [37]:
dec_recent.loc[:,'year'] = dec_recent['year'].astype(str)
dec_recent.loc[:,'year'] = dec_recent['year'].str[:-2]
dec_recent['year']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)


Unique_ID
PA-00K-SQC    2015
PA-00K-SGR    2015
PA-00K-SBZ    2015
PA-00K-SBV    2015
PA-00K-SB1    2014
              ... 
PA-00S-RX6    2017
PA-00S-RV5    2016
PA-00S-RSS    2016
PA-00S-RRZ    2017
PA-00S-RRS    2016
Name: year, Length: 2320, dtype: object

In [7]:
def do_fuzzy_search(country):
    try:
        result = pycountry.countries.search_fuzzy(country)
    except Exception:
        return np.nan
    else:
        return result[0].alpha_3

In [33]:
iso_map = {country: do_fuzzy_search(country) for country in dec_recent["Descriptors_Geographic"].unique()}
dec_recent.loc[:,"country_code"] = dec_recent["Descriptors_Geographic"].map(iso_map)

In [34]:
dec_recent.loc[:,'country_year'] = dec_recent['country_code'] + "_" + dec_recent['year']
dec_recent['country_year']

## Step 3: Import J2SR Data and Create Unique Country-Year ID

In [43]:
pd.options.display.max_colwidth = 100
j2sr_data = pd.read_csv('data_records.1598319212.csv')
j2sr_data.head()

Unnamed: 0,series_id,series_name,source_name,country_id,country_name,region,income_group,year,value
0,68092,"Roadmap Metric: Business Environment (0-100, higher is better)","USAID, Journey to Self-Reliance Metrics",4,Afghanistan,Asia,Low Income Country (World Bank Classification),2007,35.153241
1,68092,"Roadmap Metric: Business Environment (0-100, higher is better)","USAID, Journey to Self-Reliance Metrics",4,Afghanistan,Asia,Low Income Country (World Bank Classification),2008,34.844764
2,68092,"Roadmap Metric: Business Environment (0-100, higher is better)","USAID, Journey to Self-Reliance Metrics",4,Afghanistan,Asia,Low Income Country (World Bank Classification),2009,35.229759
3,68092,"Roadmap Metric: Business Environment (0-100, higher is better)","USAID, Journey to Self-Reliance Metrics",4,Afghanistan,Asia,Low Income Country (World Bank Classification),2010,42.153961
4,68092,"Roadmap Metric: Business Environment (0-100, higher is better)","USAID, Journey to Self-Reliance Metrics",4,Afghanistan,Asia,Low Income Country (World Bank Classification),2011,40.856806


In [44]:
j2sr_data = j2sr_data[j2sr_data['year'] > 2009]
j2sr_data['year']

3        2010
4        2011
5        2012
6        2013
7        2014
         ... 
47883    2010
47884    2010
47885    2010
47886    2010
47887    2010
Name: year, Length: 15592, dtype: int64

In [45]:
j2sr_data['roadmap_metric'] = j2sr_data['series_name'].str.contains('Roadmap Metric:')
j2sr_data['roadmap_metric']

3        True
4        True
5        True
6        True
7        True
         ... 
47883    True
47884    True
47885    True
47886    True
47887    True
Name: roadmap_metric, Length: 15592, dtype: bool

In [46]:
j2sr_iso_map = {country: do_fuzzy_search(country) for country in j2sr_data["country_name"].unique()}
j2sr_data["country_code"] = j2sr_data["country_name"].map(iso_map)

In [47]:
j2sr_data['country_year'] = j2sr_data['country_code'] + "_" + j2sr_data['year'].astype(str)
j2sr_data['country_year']

3        AFG_2010
4        AFG_2011
5        AFG_2012
6        AFG_2013
7        AFG_2014
           ...   
47883    VNM_2010
47884    YEM_2010
47885    ZAF_2010
47886    ZMB_2010
47887    ZWE_2010
Name: country_year, Length: 15592, dtype: object

In [50]:
j2sr_roadmap = j2sr_data[j2sr_data['roadmap_metric']]
j2sr_roadmap

Unnamed: 0,series_id,series_name,source_name,country_id,country_name,region,income_group,year,value,roadmap_metric,country_code,country_year
3,68092,"Roadmap Metric: Business Environment (0-100, higher is better)","USAID, Journey to Self-Reliance Metrics",4,Afghanistan,Asia,Low Income Country (World Bank Classification),2010,42.153961,True,AFG,AFG_2010
4,68092,"Roadmap Metric: Business Environment (0-100, higher is better)","USAID, Journey to Self-Reliance Metrics",4,Afghanistan,Asia,Low Income Country (World Bank Classification),2011,40.856806,True,AFG,AFG_2011
5,68092,"Roadmap Metric: Business Environment (0-100, higher is better)","USAID, Journey to Self-Reliance Metrics",4,Afghanistan,Asia,Low Income Country (World Bank Classification),2012,41.603012,True,AFG,AFG_2012
6,68092,"Roadmap Metric: Business Environment (0-100, higher is better)","USAID, Journey to Self-Reliance Metrics",4,Afghanistan,Asia,Low Income Country (World Bank Classification),2013,43.533872,True,AFG,AFG_2013
7,68092,"Roadmap Metric: Business Environment (0-100, higher is better)","USAID, Journey to Self-Reliance Metrics",4,Afghanistan,Asia,Low Income Country (World Bank Classification),2014,44.097498,True,AFG,AFG_2014
...,...,...,...,...,...,...,...,...,...,...,...,...
47883,70295,Roadmap Metric: Export Sophistication,"USAID, Journey to Self-Reliance Metrics",704,Vietnam,Asia,Lower Middle Income Country (World Bank Classification),2010,-0.095900,True,VNM,VNM_2010
47884,70295,Roadmap Metric: Export Sophistication,"USAID, Journey to Self-Reliance Metrics",887,Yemen,Middle East and North Africa,Low Income Country (World Bank Classification),2010,-1.286300,True,YEM,YEM_2010
47885,70295,Roadmap Metric: Export Sophistication,"USAID, Journey to Self-Reliance Metrics",710,South Africa,Sub-Saharan Africa,Upper Middle Income Country (World Bank Classification),2010,0.132100,True,ZAF,ZAF_2010
47886,70295,Roadmap Metric: Export Sophistication,"USAID, Journey to Self-Reliance Metrics",894,Zambia,Sub-Saharan Africa,Lower Middle Income Country (World Bank Classification),2010,-0.775800,True,ZMB,ZMB_2010


In [51]:
j2sr_roadmap = j2sr_roadmap.dropna(subset=['country_year'])
j2sr_roadmap.shape

(7981, 12)

## Step 4: Pivot J2SR Data to Generate Columns per Indicator

In [52]:
j2sr_pivot = j2sr_roadmap.pivot_table(index = 'country_year', columns = 'series_name', values = 'value')
j2sr_pivot['iso'] = j2sr_pivot.index.str[:3]
j2sr_pivot['year'] = j2sr_pivot.index.str[4:]
j2sr_pivot

series_name,"Roadmap Metric: Biodiversity & Habitat Protections (0-100, higher is better)","Roadmap Metric: Business Environment (0-100, higher is better)","Roadmap Metric: Child Health (0-100, higher is better)","Roadmap Metric: Civil Society & Media Effectiveness (0-1, higher is better)","Roadmap Metric: Economic Gender Gap (0-1, where 0=inequality and 1=equality)",Roadmap Metric: Education Quality (years),Roadmap Metric: Export Sophistication,"Roadmap Metric: Information & Communication Technology (ICT) Adoption (1-100, higher is better)","Roadmap Metric: Liberal Democracy (0-1, higher is better)","Roadmap Metric: Open Government (0-1, higher is stronger rule of law)",Roadmap Metric: Poverty Rate ($5/Day) (percentage),"Roadmap Metric: Safety & Security (0-100, higher is better)","Roadmap Metric: Social Group Equality (0-4, higher is better)","Roadmap Metric: Tax System Effectiveness (0-1, higher is better)",iso,year
country_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
AFG_2010,,42.153961,48.949188,0.769,,,,,0.238,,,48.243911,1.861,0.34,AFG,2010
AFG_2011,,40.856806,50.954385,0.768,,,,,0.241,,,46.915305,1.861,0.31,AFG,2011
AFG_2012,,41.603012,52.948497,0.767,,,,,0.242,,,44.588927,1.861,0.28,AFG,2012
AFG_2013,,43.533872,54.929004,0.749,,,,,0.232,,,45.623433,1.402,0.24,AFG,2013
AFG_2014,,44.097498,56.898474,0.737,,,,,0.262,0.341421,,48.992330,1.322,0.23,AFG,2014
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZWE_2016,,37.060849,60.254732,0.392,0.713546,,-0.9568,,0.175,0.295787,,68.004509,1.670,0.61,ZWE,2016
ZWE_2017,,39.095571,59.988293,0.434,0.710000,,-0.9524,31.70614,0.215,,78.38,65.626222,2.274,0.64,ZWE,2017
ZWE_2018,94.53,42.145474,60.749835,0.523,0.730000,6.346832,-0.9291,32.58448,0.195,0.301143,,65.229449,1.849,,ZWE,2018
ZWE_2019,,,,0.418,,,,37.36302,0.175,0.332524,,,1.866,,ZWE,2019


## Step 5: Calculate Commitment and Capacity Scores from Normalized Roadmap Indicators

#### Min-Max Scaling
URL: https://selfreliance.usaid.gov/docs/FY_2020_USAID_Journey_to_Self-Reliance_Country_Roadmap_Methodology_Guide.pdf

USAID’s country roadmaps use a min-max scaling technique to normalize all data onto a common 0.0 to
1.0 scale to facilitate visualization, comparison across metrics, and calculation of the Commitment and
Capacity indices. A country scoring 0.0 on a given metric indicates that the country recorded the least
favorable outcome globally in the raw dataset, and a country scoring 1.0 indicates that the country
recorded the most favorable outcome globally in the raw dataset. All other countries receive scores
within the 0.0-1.0 range based on where they fall between the worst and best outcomes globally,
preserving the source organization’s data distribution.

While USAID Roadmaps are only produced for low- and middle-income countries, all countries globally, 
including high-income countries, are used to establish the range of possible outcomes for each metric. 

The period of performance used to determine the range of observed outcomes is 2010 to the latest data 
available on July 1 2019 (including values that have been “carried forward” from 2006-2009 into this 
date range; see “Temporal Coverage” and “Handling Missing Data” sections below for more details)

In [None]:
# Min-Max scaling



#### Methodology from USAID J2SR FY2020 Metrics Guide
URL: https://selfreliance.usaid.gov/docs/FY_2020_USAID_Journey_to_Self-Reliance_Country_Roadmap_Methodology_Guide.pdf

Overall “Commitment” and “Capacity” composite scores are calculated using the arithmetic mean of all
available scaled components for each country. 

The Commitment Index comprises seven underlying metrics, each receiving an equal weight 
(i.e. one-seventh weighting, if all sub-components are present after imputation). 

- Liberal Democracy Index
- Open Government
- Social Group Equality
- Economic Gender Gap
- Business Environment
- Trade Freedom
- Biodiversity and Habitat Protections

The Capacity Index comprises ten underlying metrics, each receiving an equal
weighting of one-tenth in aggregation, if all sub-components are present after imputation. 

- Government Effectiveness
- Tax System Effectiveness
- Safety and Security
- Civil Society and Media Effectiveness
- Poverty Rate
- Education Quality
- Child Health
- GDP per Capita in PPP
- ICT Adoption
- Export Sophistication

If dimension components (i.e. individual metrics) of either index are missing after imputation, Commitment and
Capacity scores are still generated using an arithmetic mean of all available components, but only when
at least six of ten Capacity metrics are present and four of seven Commitment metrics are present.


In [None]:
# Create function that checks for sufficient components in Commitment and computes arithmetic mean
def comm_calc(data, ldi, og, sge, egg, be, tf, bhp):
  if # at least 4 of 7 metrics present
    comm_array = [data[ldi], data[og], data[sge], data[egg], data[be], data[tf], data[bhp]]
    comm_score = np.nanmean(comm_array, 0)
    return comm_score

In [None]:
comm_vars = ['Roadmap Metric: Biodiversity & Habitat Protections (0-100, higher is better)',
        'Roadmap Metric: Business Environment (0-100, higher is better)',
        'Roadmap Metric: Economic Gender Gap (0-1, where 0=inequality and 1=equality)',
        'Roadmap Metric: Education Quality (years)',
        'Roadmap Metric: Liberal Democracy (0-1, higher is better)',
        'Roadmap Metric: Open Government (0-1, higher is stronger rule of law)',
        'Roadmap Metric: Social Group Equality (0-4, higher is better)']
        
# Missing Trade Freedom

In [None]:
# Create function that checks for sufficient components in Capacity and computes arithmetic mean
def cap_calc(data, ge, tse, ss, csme, pr, eq, ch, gdp, ict, ep):
  if # at least 4 of 7 metrics present
    cap_array = [data[ge], data[tse], data[ss], data[csme], data[pr], data[eq], data[ch], data[gdp], data[ict], data[ep]]
    cap_score = np.nanmean(cap_array, 0)
    return cap_score

In [None]:
cap_vars = ['Roadmap Metric: Child Health (0-100, higher is better)',
       'Roadmap Metric: Civil Society & Media Effectiveness (0-1, higher is better)',
       'Roadmap Metric: Export Sophistication',
       'Roadmap Metric: Information & Communication Technology (ICT) Adoption (1-100, higher is better)',
       'Roadmap Metric: Poverty Rate ($5/Day) (percentage)',
       'Roadmap Metric: Safety & Security (0-100, higher is better)',
       'Roadmap Metric: Tax System Effectiveness (0-1, higher is better)']

# Missing GDP per Capita, Education Quality, and Gov Effectiveness

## Step 6: Link DEC and J2SR Data

In [54]:
dec_j2sr = pd.merge(dec_recent, j2sr_pivot, how='left', left_on = 'country_year', right_index=True)
dec_j2sr.head()

Unnamed: 0_level_0,Abstract,Ancillary_Data,Bibliographic_Type,ContentType,Contract_Grant_Number,Credit,Date_Resource_Created,Description,Descriptors_Topical,Descriptors_Geographic,...,Roadmap Metric: Export Sophistication,"Roadmap Metric: Information & Communication Technology (ICT) Adoption (1-100, higher is better)","Roadmap Metric: Liberal Democracy (0-1, higher is better)","Roadmap Metric: Open Government (0-1, higher is stronger rule of law)",Roadmap Metric: Poverty Rate ($5/Day) (percentage),"Roadmap Metric: Safety & Security (0-100, higher is better)","Roadmap Metric: Social Group Equality (0-4, higher is better)","Roadmap Metric: Tax System Effectiveness (0-1, higher is better)",iso,year_y
Unique_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
PA-00K-SQC,,Submitted as: Final evaluation of the Liberia grants and solicitation mechanism (LGSM) project,Final Evaluation Report,Documents,,,3/1/2015 0:00,,Quality of care ~|~_©_~|~ Orphans and vulnerable children (OVC) ~|~_©_~|~ Human capacity develop...,Liberia,...,-1.3426,,0.489,0.481454,,63.232402,3.184,0.45,LBR,2015.0
PA-00K-SGR,,"""Submitted to: USAID/Uganda"" ~|~_©_~|~ Submitted as: SUNRISE-OVC final evaluation ~|~_©_~|~ Foot...",Final Evaluation Report,Documents,AID-OAA-A-14-00061,,9/1/2015 0:00,,Orphans and vulnerable children (OVC) ~|~_©_~|~ Health service utilization ~|~_©_~|~ Access to s...,Uganda,...,-0.5197,,0.256,0.410924,,59.464849,2.562,0.44,UGA,2015.0
PA-00K-SBZ,,"""Supporting a more collaborative environment for civil society and technology service providers ...",Special Evaluation,Documents,AID-442-TO-15-00002 ~|~_©_~|~ AID-486-I-14-00001,,11/1/2015 0:00,,Civil society ~|~_©_~|~ Technology ~|~_©_~|~ Access to resources ~|~_©_~|~ Civil society organiz...,Cambodia,...,-0.6199,,0.115,0.356455,,65.989641,1.356,0.51,KHM,2015.0
PA-00K-SBV,,Evaluated project title: Land use dynamics and adapting to climate change in West Africa ~|~_©_~...,Special Evaluation,Documents,AID-624-TO-15-00001 ~|~_©_~|~ AID-OAA-I-14-00017,,9/1/2015 0:00,,Climate change ~|~_©_~|~ Land use ~|~_©_~|~ Geological surveys ~|~_©_~|~ Natural resource manage...,West Africa ~|~_©_~|~ Africa south of Sahara ~|~_©_~|~ Niger,...,,,,,,,,,,
PA-00K-SB1,,Evaluated project title: Malagasy heniky ny fahasalamana (MAHEFA) ~|~_©_~|~ Project title: Advan...,Special Evaluation,Documents,AID-OAA-A-12-00047,,12/1/2014 0:00,,Communities ~|~_©_~|~ Community health workers ~|~_©_~|~ Cost-effectiveness ~|~_©_~|~ Water sani...,Madagascar,...,-0.6901,,0.239,0.424821,,77.959577,2.535,0.34,MDG,2014.0


In [55]:
dec_j2sr.shape

(2320, 52)

In [56]:
dec_j2sr.to_csv('dec_j2sr_trial.csv')

## Step 7: Visualizations