In [1]:
from lxml import etree
import json

In [2]:
AKN = {"akn": "http://docs.oasis-open.org/legaldocml/ns/akn/3.0/CSD13"}
xml = "../data/AK-dail-2015-11-12-v2.xml"
baseURI = "http://oireachtas.ie"

In [3]:
root = etree.parse(xml)

In [4]:
house_name = root.xpath(".//akn:preface/akn:block[@name='proponent_ga']/akn:docProponent/text()", namespaces=AKN)

In [5]:
house_uri = root.find(".//{*}FRBRWork/{*}FRBRauthor").attrib['href']
date = root.find(".//{*}FRBRWork/{*}FRBRdate").attrib['date']
date

'2015-11-12'

In [6]:
dbr_uri = root.xpath(".//akn:FRBRWork/akn:FRBRuri/@value", 
                                  namespaces=AKN)[0]

### Debate Headings


In [37]:
headings = {"debateList": {
            "head": { "chamber": {"name": house_name[0],
                                 "uri": house_uri},
                     "dateRange": {"start": date, "end": date },
                     "query": None
                    },
            "results": [{"debateRecord": {"uri": dbr_uri, "date": date, "debateSections": []}}]
        }}

In [None]:
for dbs in root.xpath(".//akn:debateSection[./akn:heading]", namespaces=AKN):
    count = len(dbs.xpath("./akn:speech", namespaces=AKN))
    parent_dbs = dbs.xpath("./parent::akn:debateSection", namespaces=AKN)
    if len(parent_dbs) > 0:
        parent_uri = dbr_uri + "/" + parent_dbs[0].attrib['eId']
        parent_heading = parent_dbs[0].find("./{*}heading").text
        parent = {"uri": parent_uri, "heading": parent_heading}
    else:
        parent = None
    dbs_uri = dbr_uri + "/" + dbs.attrib['eId']
    heading = dbs.find("./{*}heading").text
    data = {"uri": dbs_uri, "heading": heading, "context": None, "speechCount": count, "parentDebateSection": parent}
    headings['debateList']['results'][0]['debateRecord']['debateSections'].append({"debateSection":data})

In [43]:
with open("../data/debate-list.json", "w") as f:
    json.dump(headings, f, sort_keys=True, indent=2)

### Parliamentary Questions

TODO: insert dot and space after question number in AKN

TODO: "D'Fhiafraigh" missing from start of questions in Irish

In [84]:
questions = {"questions": 
             
             {"head": { "chamber": {"name": house_name[0],
                                 "uri": house_uri},
                     "dateRange": {"start": date, "end": date },
                     "query": None,
                     "by": None,
                     "to": None
                    },
            "results": []
             }}
# if by and to are used, they are dict: {"uri": AnyURI, "showAs": string}
for q in root.xpath(".//akn:question", namespaces=AKN):
    question = {"by": {},
                "to": {},
                "debateSection": {"heading": "", "uri": ""},
                "uri": "",
                "text": "",
                "questionNumber": ""}
    by_tlc = root.xpath(".//akn:TLCPerson[@eId='{}']".format(q.attrib['by'][1:]), namespaces=AKN)[0].attrib
    to_tlc = root.xpath(".//akn:TLCRole[@eId='{}']".format(q.attrib['to'][1:]), namespaces=AKN)[0].attrib
    question['uri'] = dbr_uri + "/" + q.attrib['eId']
    question['questionNumber'] = int(q.attrib['eId'].split("_")[-1])
    question['by']['uri'] = by_tlc['href']
    question['by']['showAs'] = by_tlc['showAs']
    question['to']['uri'] = to_tlc['href']
    question['to']['showAs'] = to_tlc['showAs']
    dbs = q.xpath("./parent::akn:debateSection", namespaces=AKN)[0]
    question['debateSection']['uri'] = dbr_uri + "/" + dbs.attrib['eId']
    question['debateSection']['heading'] = dbs.xpath("./akn:heading/text()", namespaces=AKN)[0]
    question['text'] = " ".join(q.xpath("./akn:p//text()", namespaces=AKN)).replace("  ", " ")
    question = {"question": question}
    questions['questions']['results'].append(question)

In [87]:
with open("../data/question-list.json", "w") as f:
    json.dump(questions, f, sort_keys=True, indent=2)

In [86]:
questions

{'questions': {'head': {'by': None,
   'chamber': {'name': 'DÁIL ÉIREANN', 'uri': '/ie/oireachtas/house/dail'},
   'dateRange': {'end': '2015-11-12', 'start': '2015-11-12'},
   'query': None,
   'to': None},
  'results': [{'question': {'by': {'showAs': 'Mr. Seán Ó Fearghaíl',
      'uri': '/ie/oireachtas/member/Seán-Ó-Fearghaíl.S.2000-06-09/dail/31'},
     'debateSection': {'heading': 'Heritage Council Funding',
      'uri': '/akn/ie/debateRecord/dail/2015-11-12/debate/dbsect_4'},
     'questionNumber': 1,
     'text': '1 Deputy Seán Ó Fearghaíl asked the Minister for Arts, Heritage and the Gaeltacht the funding being provided for the Heritage Council in 2016; and if she will make a statement on the matter. [39716/15]',
     'to': {'showAs': 'Minister for Arts, Heritage and the Gaeltacht',
      'uri': '/ie/oireachtas/member/Heather-Humphreys.D.2011-03-09/dail/31/minister/arts__heritage__gaeltacht'},
     'uri': '/akn/ie/debateRecord/dail/2015-11-12/debate/pq_1'}},
   {'question': {'by