In [1]:
from lxml import etree
from copy import deepcopy

In [2]:
AKN = {"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0/CSD13"}
xml = "../data/AK-dail-2015-11-12-v2.xml"
baseURI = "http://oireachtas.ie"
nsmap = {None: AKN['akn'],
         'xsi':"http://www.w3.org/2001/XMLSchema-instance"}

In [21]:
root = etree.parse(xml).getroot()

In [22]:
for p in root.xpath('.//akn:question/akn:p[contains(., "Aire") and not(contains(., "D\'Fhiafraigh"))]', namespaces=AKN):
    p.text = p.text.strip() + ". D'Fhiafraigh "

## Questions in Akoma Ntoso

Questions are represented as a subset of the Akoma Ntoso file for an the day's debate. The debate body consists of the parent debateSection for the question (along with any other questions that have been taken together).

In [13]:
# Using deepcopy to create copy because etree objects are mutable.
pq_root = deepcopy(root)

In [43]:
pqs = root.xpath(".//akn:debateSection[@name='question'][./akn:question]", namespaces=AKN)
for pq in pqs[2:3]:
    pq_eId = pq.attrib["eId"]
    # Using deepcopy to create copy because etree objects are mutable.
    pq_root = deepcopy(root)
    for this in pq_root.xpath(".//akn:FRBRthis", namespaces=AKN):
        this.attrib['value'] = this.attrib['value'].replace("/main", "/"+pq_eId)
    preface = pq_root.xpath(".//akn:preface", namespaces=AKN)
    meta = pq_root.find(".//{*}meta")
    meta.remove(meta.find("./{*}analysis"))
    pq_root.find("./{*}debate").remove(pq_root.find(".//{*}debateBody"))
    dbody = etree.SubElement(pq_root.find("./{*}debate"), "debateBody")
    dbody.append(pq)
    refs = set(ref[1:] for 
           ref in pq_root.xpath('''.//akn:meta//akn:FRBRdate/@name|
                                   .//akn:meta/*/@source|
                                   .//akn:debateSection/*/@as|
                                   .//akn:debateSection/*/@by|
                                   .//akn:debateSection/*/@to''', 
                                namespaces=AKN))
    for tlc in pq_root.xpath(".//akn:references", namespaces=AKN)[0].getchildren():
        if tlc.attrib['eId'] not in refs:
            tlc.getparent().remove(tlc)
    with open(xml.replace("xml", pq_eId+".xml"), "wb") as f:
        f.write(etree.tostring(pq_root, 
                         pretty_print=True, 
                         xml_declaration=True,
                        encoding="UTF-8"))