In [29]:
from lxml import etree
import json
from dicttoxml import dicttoxml
import xmltodict

In [3]:
AKN = {"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0/CSD13"}
xml = "../data/AK-dail-2015-11-12-v2.xml"
baseURI = "http://oireachtas.ie"

In [4]:
root = etree.parse(xml)
house_name = root.xpath(".//akn:preface/akn:block[@name='proponent_ga']/akn:docProponent/text()", 
                        namespaces=AKN)
house_uri = root.find(".//{*}FRBRWork/{*}FRBRauthor").attrib['href']
date = root.find(".//{*}FRBRWork/{*}FRBRdate").attrib['date']
dbr_uri = root.xpath(".//akn:FRBRWork/akn:FRBRuri/@value", 
                                  namespaces=AKN)[0]

### Debate Headings


In [54]:
headings = {"debateList": {
            "head": { "chamber": {"name": house_name[0],
                                 "uri": house_uri},
                     "dateRange": {"start": date, 
                                   "end": date},
                     "query": None,
                     "by": {"name": None,
                            "uri": None},
                     "as": {"name": None,
                            "uri": None},
                     "mostRecent": 1
                    },
            "results": []
           }
        }

# First for loop placeholder for when it comes to parsing the xml files
for dbr in range(1):
    dbsects = root.xpath(".//akn:debateSection[./akn:heading]", namespaces=AKN)
    record = {"debateRecord": {"uri": dbr_uri, 
                               "date": date,
                               "debateSectionCount": len(dbsects), 
                               "debateSections": [] 
                               }}
    for dbs in dbsects:
        speech_count = len(dbs.xpath("./akn:speech", namespaces=AKN))
        do_children_exist = len(dbs.xpath("./akn:summary|./akn:speech|./akn:p", namespaces=AKN))>0
        parent_dbs = dbs.xpath("./parent::akn:debateSection", namespaces=AKN)
        if len(parent_dbs) > 0:
            parent_uri = dbr_uri + "/" + parent_dbs[0].attrib['eId']
            parent_heading = parent_dbs[0].find("./{*}heading").text
            parent = {"uri": parent_uri, 
                      "heading": parent_heading}
        else:
            parent = None
        dbs_uri = dbr_uri + "/" + dbs.attrib['eId']
        heading = dbs.find("./{*}heading").text
        data = {"uri": dbs_uri, 
                "heading": heading, 
                "context": None, 
                "speechCount": speech_count, 
                "parentDebateSection": parent,
               "containsDebate": do_children_exist}
        record['debateRecord']['debateSections'].append({"debateSection":data})
    headings['debateList']['results'].append(record)  

In [55]:
with open("../data/debate-list.json", "w") as f:
    json.dump(headings, f, sort_keys=True, indent=2)

In [56]:
with open("../data/debate-list.xml", "w") as f:
    f.write(xmltodict.unparse(headings, pretty=True))

### Parliamentary Questions

TODO: insert dot and space after question number in AKN

TODO: "D'Fhiafraigh" missing from start of questions in Irish

In [84]:
questions = {"questions": 
             
             {"head": { "chamber": {"name": house_name[0],
                                 "uri": house_uri},
                     "dateRange": {"start": date, "end": date },
                     "query": None,
                     "by": None,
                     "to": None
                    },
            "results": []
             }}
# if by and to are used, they are dict: {"uri": AnyURI, "showAs": string}
for q in root.xpath(".//akn:question", namespaces=AKN):
    question = {"by": {},
                "to": {},
                "debateSection": {"heading": "", "uri": ""},
                "uri": "",
                "text": "",
                "questionNumber": ""}
    by_tlc = root.xpath(".//akn:TLCPerson[@eId='{}']".format(q.attrib['by'][1:]), namespaces=AKN)[0].attrib
    to_tlc = root.xpath(".//akn:TLCRole[@eId='{}']".format(q.attrib['to'][1:]), namespaces=AKN)[0].attrib
    question['uri'] = dbr_uri + "/" + q.attrib['eId']
    question['questionNumber'] = int(q.attrib['eId'].split("_")[-1])
    question['by']['uri'] = by_tlc['href']
    question['by']['showAs'] = by_tlc['showAs']
    question['to']['uri'] = to_tlc['href']
    question['to']['showAs'] = to_tlc['showAs']
    dbs = q.xpath("./parent::akn:debateSection", namespaces=AKN)[0]
    question['debateSection']['uri'] = dbr_uri + "/" + dbs.attrib['eId']
    question['debateSection']['heading'] = dbs.xpath("./akn:heading/text()", namespaces=AKN)[0]
    question['text'] = " ".join(q.xpath("./akn:p//text()", namespaces=AKN)).replace("  ", " ")
    question = {"question": question}
    questions['questions']['results'].append(question)

In [87]:
with open("../data/question-list.json", "w") as f:
    json.dump(questions, f, sort_keys=True, indent=2)

### Votes