In [1]:
from lxml import etree
from copy import deepcopy

In [2]:
AKN = {"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0/CSD13"}
xml = "../data/AK-dail-2015-11-12-v2.xml"
baseURI = "http://oireachtas.ie"
nsmap = {None: AKN['akn'],
         'xsi':"http://www.w3.org/2001/XMLSchema-instance"}

In [3]:
root = etree.parse(xml).getroot()

## Questions in Akoma Ntoso

Questions are represented as a subset of the Akoma Ntoso file for an the day's debate. The debate body consists of the parent debateSection for the question (along with any other questions that have been taken together).

In [4]:
# Using deepcopy to create copy because etree objects are mutable.
pq_root = deepcopy(root)

In [5]:
for p in root.xpath('.//akn:question/akn:p[contains(., "Aire") and not(contains(., "D\'Fhiafraigh"))]', namespaces=AKN):
    p.text = p.text.strip() + ". D'Fhiafraigh "

In [6]:
pqs = root.xpath(".//akn:debateSection[@name='question'][./akn:question]", namespaces=AKN)
for pq in pqs[2:3]:
    print(etree.tostring(pq).decode("utf-8"))

<debateSection xmlns="http://docs.oasis-open.org/legaldocml/ns/akn/3.0/CSD13" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" name="question" eId="dbsect_6">
                    <heading>National Monuments</heading>
                    <question by="#MaureenOSullivan" to="#Minister_for_Arts,_Heritage_and_the_Gaeltacht" eId="pq_3">
                        <p eId="para_23">3<b>Deputy Maureen O&#8217;Sullivan</b> asked the <b>Minister for Arts, Heritage and the Gaeltacht</b> if she will exercise the power conferred on her under the national monument certification process to undertake the preservation of 10 Moore Street in Dublin 1, identified by the National Museum as the point of entry into the terrace by the Irish garrison from the General Post Office, the location of the first council of war after the evacuation of the General Post Office, and where five signatories spent their last night before their surrender and execution, and those other buildings on the terrace identified in

In [10]:
for pq in pqs:
    print(pq.xpath("./akn:question/akn:p//text()", namespaces=AKN))

['1', 'Deputy Seán Ó Fearghaíl', ' asked the ', 'Minister for Arts, Heritage and the Gaeltacht', ' the funding being provided for the Heritage Council in 2016; and if she will make a statement on the matter. [39716/15]']
["2. D'Fhiafraigh ", 'Deputy Pearse Doherty', ' den ', 'an Aire Ealaíon, Oidhreachta agus Gaeltachta', ' an dtabharfaidh sí soiléiriú maidir leis na céimeanna atá á nglacadh ag an Roinn chun próiseas nua tairisceana a chur ar bun maidir leis an aersheirbhís go hÁrainn; cén rannpháirtíocht a bheidh ag pobal an oileáin sa phróiseas; agus an bhfuil sí ábalta a dheimhniú gur seirbhís eitleáin sciatháin dhobhogtha amach as Aerfort Réigiúnach Chonamara a bheidh á lorg. [39632/15]']
['3', 'Deputy Maureen O’Sullivan', ' asked the ', 'Minister for Arts, Heritage and the Gaeltacht', ' if she will exercise the power conferred on her under the national monument certification process to undertake the preservation of 10 Moore Street in Dublin 1, identified by the National Museum as 

In [58]:
frbr = root.xpath(".//akn:identification", namespaces=AKN)
preface = root.xpath(".//akn:preface", namespaces=AKN)

In [89]:

meta = root.find(".//{*}meta")
meta.remove(meta.find("./{*}analysis"))
meta.remove(meta.find("./{*}references"))
root.find("./{*}debate").remove(root.find(".//{*}debateBody"))
dbody = etree.SubElement(root.find("./{*}debate"), "debateBody")
dbody.append(pqs[0])

In [None]:
refs = set(r.)

In [11]:
print(etree.tostring(r).decode("utf-8"))

<akomaNtoso xmlns="http://docs.oasis-open.org/legaldocml/ns/akn/3.0/CSD13" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://docs.oasis-open.org/legaldocml/ns/akn/3.0/CSD13 ./akomantoso30.xsd ">
    </akomaNtoso>


In [71]:
def new_akn_root():
    schema_loc = "http://docs.oasis-open.org/legaldocml/ns/akn/3.0/CSD13 ./akomantoso30.xsd "
    AN = etree.Element('{' + AKN['akn'] + '}akomaNtoso',           
            nsmap=nsmap,
            attrib= {"{" + AKN['akn'] + "}schemaLocation": schema_loc})
    return AN

In [86]:
pqs[1]

<Element {http://docs.oasis-open.org/legaldocml/ns/akn/3.0/CSD13}debateSection at 0x7fb04518ccc8>

In [73]:
AN = new_akn_root()
AN.append(preface[0])
print(etree.tostring(AN).decode("utf-8"))

<akomaNtoso xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://docs.oasis-open.org/legaldocml/ns/akn/3.0/CSD13" xmlns:ns0="http://docs.oasis-open.org/legaldocml/ns/akn/3.0/CSD13" ns0:schemaLocation="http://docs.oasis-open.org/legaldocml/ns/akn/3.0/CSD13 ./akomantoso30.xsd "><preface>
            <block name="title_ga">
                <docTitle>D&#205;OSP&#211;IREACHTA&#205; PARLAIMINTE</docTitle>
            </block>
            <block name="title_en">
                <docTitle>PARLIAMENTARY DEBATES</docTitle>
            </block>
            <block name="proponent_ga">
                <docProponent>D&#193;IL &#201;IREANN</docProponent>
            </block>
            <block name="status_ga">
                <docStatus>TUAIRISC OIFIGI&#218;IL</docStatus>
            </block>
            <block name="status_en">
                <docStatus>(OFFICIAL REPORT)</docStatus>
            </block>
            <block name="date_ga">
                <docDate date="2015-11-12">D&