### Import modules and set up some helper functions

In [1]:
from lxml import etree
import pandas as pd
from collections import OrderedDict
import re
pd.options.mode.chained_assignment = None
from datetime import datetime, date

In [2]:
doc = etree.parse("worldbank-bd.xml")

In [3]:
budgets = doc.xpath("//budget")
disbursements = doc.xpath("//transaction[transaction-type/@code='D']")

In [4]:
def get_budget(budget):
    date = budget.find("period-start").get("iso-date")
    value = budget.find("value").text
    iatiidentifier = budget.getparent().find("iati-identifier").text
    budget_type = budget.get("type")
    return OrderedDict({"date": date, 
                        "value": float(value), 
                        "iati-identifier": iatiidentifier, 
                        "budget-type": budget_type})
def process_budgets(budgets):
    for budget in budgets:
        yield get_budget(budget)

def get_transaction(transaction):
    date = transaction.find("transaction-date").get("iso-date")
    value = transaction.find("value").text
    iatiidentifier = transaction.getparent().find("iati-identifier").text
    return {"date": date, "value": float(value), "iati-identifier": iatiidentifier}
def process_transactions(transactions):
    for transaction in transactions:
        yield get_transaction(transaction)

In [5]:
def later_than_2015(budget):
    date = budget["date"]
    return datetime.strptime(date, "%Y-%m-%d") > datetime(2015, 1, 1)
def later_than_now(budget):
    date = budget["date"]
    return datetime.strptime(date, "%Y-%m-%d") > datetime.utcnow()

In [6]:
b = process_budgets(budgets)
budget_dates = pd.DataFrame(sorted(filter(later_than_2015, b)))
t = process_transactions(disbursements)
transaction_dates = pd.DataFrame(sorted(filter(later_than_2015, t)))

### What are the top 10 budget dates (where the value is not empty)?

In [7]:
budget_dates[budget_dates.value>0].sort_values(by="date", ascending=False).head(10)

Unnamed: 0,budget-type,date,iati-identifier,value
2718,2,2021-09-01,44000-P149553,1466666.67
1354,1,2021-09-01,44000-P149553,1466666.67
2717,2,2021-08-01,44000-P149553,1466666.67
1353,1,2021-08-01,44000-P149553,1466666.67
1352,1,2021-07-01,44000-P149553,1466666.66
2716,2,2021-07-01,44000-P149553,1466666.66
1351,1,2021-06-01,44000-P149553,2757333.34
2715,2,2021-06-01,44000-P149553,2757333.34
1349,1,2021-05-01,44000-P149553,2757333.33
2713,2,2021-05-01,44000-P149553,2757333.33


> There is good forward budget data

### What are the top 10 Disbursement transaction dates?

In [8]:
transaction_dates[transaction_dates.value>0].sort_values(by="date", ascending=False).head(10)

Unnamed: 0,date,iati-identifier,value
78,2015-09-30,44000-P132634,768087
66,2015-09-30,44000-P111272,4288481
55,2015-09-30,44000-P040712,1547158
56,2015-09-30,44000-P073886,4322309
58,2015-09-30,44000-P090807,3461183
59,2015-09-30,44000-P095965,7296842
61,2015-09-30,44000-P098151,2224824
62,2015-09-30,44000-P103999,22275025
63,2015-09-30,44000-P106161,21952044
64,2015-09-30,44000-P106216,7201356


> The most recent disbursement transactions were in September 2015