### Import modules and set up some helper functions

In [1]:
from lxml import etree
import pandas as pd
from collections import OrderedDict
import re
pd.options.mode.chained_assignment = None
from datetime import datetime, date

In [2]:
doc = etree.parse("worldbank-bd.xml")

In [3]:
budgets = doc.xpath("//budget")
disbursements = doc.xpath("//transaction[transaction-type/@code='D']")

In [4]:
def get_budget(budget):
    date = budget.find("period-start").get("iso-date")
    value = budget.find("value").text
    iatiidentifier = budget.getparent().find("iati-identifier").text
    budget_type = budget.get("type")
    return OrderedDict({"date": date, 
                        "value": float(value), 
                        "iati-identifier": iatiidentifier, 
                        "budget-type": budget_type})
def process_budgets(budgets):
    for budget in budgets:
        yield get_budget(budget)

def get_transaction(transaction):
    date = transaction.find("transaction-date").get("iso-date")
    value = transaction.find("value").text
    iatiidentifier = transaction.getparent().find("iati-identifier").text
    return {"date": date, "value": float(value), "iati-identifier": iatiidentifier}
def process_transactions(transactions):
    for transaction in transactions:
        yield get_transaction(transaction)

In [5]:
def later_than_2015(budget):
    date = budget["date"]
    return datetime.strptime(date, "%Y-%m-%d") > datetime(2015, 1, 1)
def later_than_now(budget):
    date = budget["date"]
    return datetime.strptime(date, "%Y-%m-%d") > datetime.utcnow()

In [6]:
b = process_budgets(budgets)
budget_dates = pd.DataFrame(sorted(filter(later_than_2015, b)))
t = process_transactions(disbursements)
transaction_dates = pd.DataFrame(sorted(filter(later_than_2015, t)))

### What are the top 10 budget dates (where the value is not empty)?

In [7]:
budget_dates[budget_dates.value>0].sort_values(by="date", ascending=False).head(10)

Unnamed: 0,budget-type,date,iati-identifier,value
2718,2,2021-09-01,44000-P149553,1466666.67
1354,1,2021-09-01,44000-P149553,1466666.67
2717,2,2021-08-01,44000-P149553,1466666.67
1353,1,2021-08-01,44000-P149553,1466666.67
1352,1,2021-07-01,44000-P149553,1466666.66
2716,2,2021-07-01,44000-P149553,1466666.66
1351,1,2021-06-01,44000-P149553,2757333.34
2715,2,2021-06-01,44000-P149553,2757333.34
1349,1,2021-05-01,44000-P149553,2757333.33
2713,2,2021-05-01,44000-P149553,2757333.33


> There is good forward budget data

### What are the top 10 Disbursement transaction dates?

In [8]:
transaction_dates[transaction_dates.value>0].sort_values(by="date", ascending=False).head(10)

Unnamed: 0,date,iati-identifier,value
78,2015-09-30,44000-P132634,768087
66,2015-09-30,44000-P111272,4288481
55,2015-09-30,44000-P040712,1547158
56,2015-09-30,44000-P073886,4322309
58,2015-09-30,44000-P090807,3461183
59,2015-09-30,44000-P095965,7296842
61,2015-09-30,44000-P098151,2224824
62,2015-09-30,44000-P103999,22275025
63,2015-09-30,44000-P106161,21952044
64,2015-09-30,44000-P106216,7201356


> The most recent disbursement transactions were in September 2015

### Now let's compare projects in IATI with those in the AIMS.

In [9]:
nsmap = {"xml": "http://www.w3.org/XML/1998/namespace"}
def get_data(activity):
    return OrderedDict({
        "iati_identifier": activity.find("iati-identifier").text,
        "title": unicode(activity.find("title[@xml:lang='en']", namespaces=nsmap).text),
        "implementing_org": unicode(activity.find("participating-org[@role='Implementing']").text),
        "start_date": unicode(activity.find("activity-date[@type='start-planned']").get("iso-date"))
    })
def parse_activities(activities):
    for activity in activities:
        yield get_data(activity)
def correct_project_id(prefix, project_id):
    return "%s-%s" % (prefix, project_id)

In [10]:
activities = doc.xpath("//iati-activity")
iati_data = pd.DataFrame(parse_activities(activities))
wb_iati = iati_data.set_index("iati_identifier")

In [11]:
aims_data = pd.read_csv("../DashboardReport.csv")
wb_aims = aims_data[aims_data["Managing DP"]=="World Bank (WB)"]
wb_aims = wb_aims[["DP Project No", "Project Title"]]
wb_aims["iati_identifier"] = wb_aims["DP Project No"].apply(lambda x: correct_project_id("44000", x))
wb_aims = wb_aims.set_index(["iati_identifier"])
wb_aims = wb_aims.rename(columns={
        "DP Project No": "project_id_aims",
        "Project Title": "title",
    })

In [12]:
aims_plus_iati = wb_aims.join(wb_iati, lsuffix="_aims", rsuffix="_iati", how="outer").fillna("NOT FOUND").sort_index()
aims_plus_iati

Unnamed: 0_level_0,project_id_aims,title_aims,implementing_org,start_date,title_iati
iati_identifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
44000-P040712,P040712,WATER MANAGEMENT IMPROVEMENT PROJECT,BANGLADESH WATER DEVELOPMENT BOARD (BWDB) AND ...,1999-12-14,BD: Water Management Improvement Project
44000-P062916,NOT FOUND,NOT FOUND,BANGLADESH BANK,1999-12-21,BD Central Bank Strengthening Project
44000-P071794,NOT FOUND,NOT FOUND,RURAL ELECTRIFICATION BOARD AND INFRASTRUCTURE...,2001-10-01,BD: Rural Elect. Renewable Energy Dev.
44000-P073886,P073886,EMPOWERMENT AND LIVELIHOOD IMPROVEMENT PROJECT...,SOCIAL DEVELOPMENT FOUNDATION,2007-09-30,BD: Social Investment Program Project II
44000-P078707,NOT FOUND,NOT FOUND,"MINISTRY OF POWER, ENERGY & MINERAL RESOURCES",2004-02-17,BD: Power Sector Development TA
44000-P084078,P084078,NATIONAL AGRICULTURAL TECHNOLOGY PROJECT,"MINISTRIES OF AGRICULTURE, AND FISHERIES & LIV...",2005-12-22,BD:National Agricultural Technology Proj
44000-P086791,NOT FOUND,NOT FOUND,TBD,2004-06-17,BD: Reaching Out of School Children
44000-P089382,P089382,INVESTMENT PROMOTION AND FINANCING FACILITY,BANGLADESH BANK,2005-03-31,BD: Invst Promotion Financing Facility
44000-P090807,P090807,SKILLS AND TRAINING ENHANCEMENT PROJECT,"MINISTRY OF EDUCATION, BANGLADESH",2006-04-18,BD: Skills and Trg. Enhancement Project
44000-P093988,P093988,DHAKA WATER SUPPLY AND SANITATION PROJECT,DHAKA WATER SUPPLY AND SEWERAGE AUTHORITY,2006-07-06,BD: Dhaka Water Sup & San. Project


> Where `NOT FOUND` is shown on the left, it means the project was not found in the AIMS. Where `NOT FOUND` is shown on the right, it means the project was not found in IATI