### Import modules and set up some helper functions

In [1]:
from lxml import etree
import pandas as pd
from collections import OrderedDict
import re
pd.options.mode.chained_assignment = None
from datetime import datetime, date

The IATI-XML file is retrieved from the Registry with the command:
    
    wget "http://datastore.iatistandard.org/api/1/access/activity.xml?reporting-org=CA-3&recipient-country=BD&stream=True" -O canada-bd.xml

In [2]:
XML_FILENAME = "canada-bd.xml"
DP_NAME = "Department of Foreign Affairs; Trade & Development Canada (DFATD Canada)"
DP_IATI_PREFIX = "CA-3"

In [3]:
doc = etree.parse(XML_FILENAME)

In [4]:
nsmap = {"xml": "http://www.w3.org/XML/1998/namespace"}
def get_data(activity):
    return OrderedDict({
        "iati_identifier": activity.find("iati-identifier").text,
        "title": unicode(activity.find("title/narrative[@xml:lang='en']", namespaces=nsmap).text),
        "start_date": unicode(activity.find("activity-date[@type='2']").get("iso-date")),
        "aid_type": unicode(activity.find("default-aid-type").get("code"))
    })
def parse_activities(activities):
    for activity in activities:
        yield get_data(activity)
def correct_project_id(prefix, project_id):
    return "%s-%s" % (prefix, project_id)

In [5]:
activities = doc.xpath("//iati-activity")
iati_data = pd.DataFrame(parse_activities(activities))
dp_iati = iati_data.set_index("iati_identifier")

In [6]:
aims_data = pd.read_csv("../DashboardReport.csv")
dp_aims = aims_data[aims_data["Managing DP"]==DP_NAME]
dp_aims = dp_aims[["DP Project No", "Project Title"]]
dp_aims["iati_identifier"] = dp_aims["DP Project No"].apply(lambda x: correct_project_id(DP_IATI_PREFIX, x))
dp_aims = dp_aims.set_index(["iati_identifier"])
dp_aims = dp_aims.rename(columns={
        "DP Project No": "project_id_aims",
        "Project Title": "title",
    })

In [7]:
aims_plus_iati = dp_aims.join(dp_iati, lsuffix="_aims", rsuffix="_iati", how="outer").fillna("NOT FOUND").sort_index()

### Compare this DP's projects in the AIMS and IATI

In [8]:
aims_plus_iati

Unnamed: 0_level_0,project_id_aims,title_aims,aid_type,start_date,title_iati
iati_identifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
CA-3-A021170001,NOT FOUND,NOT FOUND,D01,2001-07-27,Legal Reform
CA-3-A031069001,NOT FOUND,NOT FOUND,D01,2004-07-29,Policy Leadership and Advocacy for Gender Equa...
CA-3-A031503001,NOT FOUND,NOT FOUND,B04,2004-03-04,Education Sector Support - Second Primary Educ...
CA-3-A032356001,NOT FOUND,NOT FOUND,C01,2005-12-15,Teaching Quality Improvement in Secondary Educ...
CA-3-A032582001,NOT FOUND,NOT FOUND,D01,2007-09-28,Adolescent Reproductive Health
CA-3-A032583001,A032583001,Strengthening Comptrollership and Oversight of...,D01,2008-03-04,Strengthening Comptrollership and Oversight of...
CA-3-A032585001,NOT FOUND,NOT FOUND,D01,2005-10-20,Life Skills and Education for Adolescent Devel...
CA-3-A032593001,NOT FOUND,NOT FOUND,B04,2008-04-16,Katalyst II: Business Services for Small Enter...
CA-3-A032610002,NOT FOUND,NOT FOUND,B04,2008-02-04,"Health, Nutrition and Population Sector Progra..."
CA-3-A032610003,NOT FOUND,NOT FOUND,B04,2006-03-27,Bangladesh Health Commodities


### Write output to CSV file

In [9]:
aims_plus_iati.to_csv("iati_projects_and_aims.csv", encoding='utf-8')

> The comparison shows that Canada's IATI data includes all of the projects found in the AIMS. There are a significant number of projects not found in the AIMS that could potentially be included from Canada's IATI data, though it is not clear on what basis we should choose or exclude projects from the AIMS. It would be helpful to have at least a couple of ways to reduce the number of projects, even if there will still need to be some manual selection.

#### Total number of projects not found in AIMS

In [10]:
len(aims_plus_iati)

85

#### Total number of projects not found in IATI

In [11]:
len(aims_plus_iati[aims_plus_iati.title_iati=="NOT FOUND"])

0

#### Total number of projects not found in AIMS

In [12]:
len(aims_plus_iati[aims_plus_iati.title_aims=="NOT FOUND"])

74