### Import modules and set up some helper functions

In [1]:
from lxml import etree
import pandas as pd
from collections import OrderedDict
import re
pd.options.mode.chained_assignment = None
from datetime import datetime, date

In [2]:
XML_FILENAME = "dfid-bd.xml"
DP_NAME = "Department for International Development (DFID)"
DP_IATI_PREFIX = "" # DFID is GB-1, but the org ID is already included in project IDs in the AIMS

In [3]:
doc = etree.parse(XML_FILENAME)

In [4]:
def get_data(activity):
    return OrderedDict({
        "iati_identifier": activity.find("iati-identifier").text,
        "title": unicode(activity.find("title").text),
        "start_date": unicode(activity.find("activity-date[@type='start-planned']").get("iso-date"))
    })
def parse_activities(activities):
    for activity in activities:
        yield get_data(activity)
def correct_project_id(prefix, project_id):
    return "%s" % (project_id)

In [5]:
activities = doc.xpath("//iati-activity[@hierarchy='1']")
iati_data = pd.DataFrame(parse_activities(activities))
dp_iati = iati_data.set_index("iati_identifier")

In [6]:
aims_data = pd.read_csv("../DashboardReport.csv")
dp_aims = aims_data[aims_data["Managing DP"]==DP_NAME]
dp_aims = dp_aims[["DP Project No", "Project Title"]]
dp_aims["iati_identifier"] = dp_aims["DP Project No"].apply(lambda x: correct_project_id(DP_IATI_PREFIX, x))
dp_aims = dp_aims.set_index(["iati_identifier"])
dp_aims = dp_aims.rename(columns={
        "DP Project No": "project_id_aims",
        "Project Title": "title",
    })

In [7]:
aims_plus_iati = dp_aims.join(dp_iati, lsuffix="_aims", rsuffix="_iati", how="outer").fillna("NOT FOUND").sort_index()

### Compare this DP's projects in the AIMS and IATI

In [8]:
aims_plus_iati

Unnamed: 0_level_0,project_id_aims,title_aims,start_date,title_iati
iati_identifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
GB-1-107367,NOT FOUND,NOT FOUND,2004-09-01,Bangladesh Budget Support for Roads and Highwa...
GB-1-107368,NOT FOUND,NOT FOUND,2003-07-01,Rural Electrification Development Project
GB-1-107369,NOT FOUND,NOT FOUND,2006-01-01,"Health, Nutrition and Population Sector Progra..."
GB-1-107370,GB-1-107370,Promoting Financial Services for Poverty Reduc...,2005-02-01,Promoting Financial Services for Poverty Reduc...
GB-1-107371,NOT FOUND,NOT FOUND,2007-01-01,"Sanitation, Hygiene, Education & Water Supply ..."
GB-1-107372,NOT FOUND,NOT FOUND,2007-05-01,Joint UN Accelerating Progress towards Materna...
GB-1-107398,NOT FOUND,NOT FOUND,2001-04-01,Chars Livelihoods Programme
GB-1-107402,GB-1-107402,Economic Empowerment of the Poorest,2005-11-01,Economic Empowerment of the Poorest
GB-1-107403,NOT FOUND,NOT FOUND,2007-01-01,BRAC Challenging the Frontiers of Poverty Redu...
GB-1-107413,NOT FOUND,NOT FOUND,2007-04-01,Regulatory and Investment Systems for Enterprise


### Write output to CSV file

In [9]:
aims_plus_iati.to_csv("iati_projects_and_aims.csv", encoding='utf-8')

> The comparison shows that DFID's IATI data includes all of the projects found in the AIMS. There are a significant number of projects not found in the AIMS that could potentially be included from IATI data.