### Import modules and set up some helper functions

In [8]:
from lxml import etree
import pandas as pd
from collections import OrderedDict
import re
pd.options.mode.chained_assignment = None

In [9]:
nsmap = {"xml": "http://www.w3.org/XML/1998/namespace"}
def get_data(activity):
    return OrderedDict({
        "iati_identifier": activity.find("iati-identifier").text,
        "title": unicode(activity.find("title[@xml:lang='en']", namespaces=nsmap).text),
        "implementing_org": unicode(activity.find("participating-org[@role='Implementing']").text),
    })
def parse_activities(activities):
    for activity in activities:
        yield get_data(activity)
def correct_project_id(prefix, project_id):
    return "%s-%s" % (prefix, re.sub("\.", "", project_id))

### Get IATI data

We extract data from the IATI Datastore using the following command:
```
wget "http://datastore.iatistandard.org/api/1/access/activity.xml?reporting-org=DE-1&recipient-country=BD&stream=True" -O bmz-20160121.xml
```

In [10]:
doc = etree.parse("bmz-20160121.xml")
activities = doc.xpath("//iati-activity")
iati_data = pd.DataFrame(parse_activities(activities))
giz_iati = iati_data[iati_data["implementing_org"] == u"Gesellschaft für internationale Zusammenarbeit (GIZ) GmbH"]
giz_iati = giz_iati.set_index("iati_identifier")

### Get AIMS data

In [11]:
aims_data = pd.read_csv("../DashboardReport.csv")
giz_aims = aims_data[aims_data["Managing DP"]=="Deutsche Gesellschaft f\xc3\xbcr  Internationale Zusammenarbeit (GIZ) GmbH (GIZ)"]
giz_aims = giz_aims[["DP Project No", "Project Title"]]
giz_aims["iati_identifier"] = giz_aims["DP Project No"].apply(lambda x: correct_project_id("DE-1", x))
giz_aims = giz_aims.set_index(["iati_identifier"])
giz_aims = giz_aims.rename(columns={
        "DP Project No": "project_id_aims",
        "Project Title": "title",
    })

## Which projects are found in AIMS and in IATI?

In [12]:
aims_plus_iati = giz_aims.join(giz_iati, lsuffix="_aims", rsuffix="_iati", how="outer").fillna("NOT FOUND").sort_index()
aims_plus_iati

Unnamed: 0_level_0,project_id_aims,title_aims,implementing_org,title_iati
iati_identifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
DE-1-200135012,2001.3501.2,Study and Expert Fund,Gesellschaft für internationale Zusammenarbeit...,Studies and Expert Fund
DE-1-200820720,2008.2072.0,Wetland Biodiversity Protection Project,Gesellschaft für internationale Zusammenarbeit...,Wetland Biodiversity Protection Project
DE-1-200922054,2009.2205.4,Promotion of Social and Environmental Standard...,Gesellschaft für internationale Zusammenarbeit...,Promotion of Social and Environmental Standard...
DE-1-200922401,2009.2240.1,Management of Natural Resources and Community ...,Gesellschaft für internationale Zusammenarbeit...,Management of Natural Resources and Community ...
DE-1-201022201,2010.2220.1,Sustainable Development and Biodiversity Conse...,Gesellschaft für internationale Zusammenarbeit...,Sustainable Development and Biodiversity Conse...
DE-1-201121672,2011.2167.2,Adressing Bangladesh's Demographic Challenges,Gesellschaft für internationale Zusammenarbeit...,Addressing Bangladesh's Demographic Challenges...
DE-1-201220979,2012.2097.9,Renewable Energy and Energy Efficiency Programme,Gesellschaft für internationale Zusammenarbeit...,Renewable Energy and Energy Efficiency Programme
DE-1-201220987,NOT FOUND,NOT FOUND,Gesellschaft für internationale Zusammenarbeit...,Prison and Justice Reform for Promoting Human ...
DE-1-201220987-001,2012.2098.7-001,Improvement of the real Situation of overcrowd...,NOT FOUND,NOT FOUND
DE-1-201220987-003,2012.2098.7-003,Justice Reform and Corruption Prevention (JRCP),NOT FOUND,NOT FOUND


In [13]:
aims_plus_iati.to_csv("iati_projects_and_aims.csv", encoding='utf-8')