In [1]:
import json
import pymongo
import re
from pprint import pprint
from collections import OrderedDict

In [2]:
db = pymongo.MongoClient().oir_rest

In [5]:
with open("BillsBook.json") as f:
    db.actsbook.insert_many(json.load(f))

In [42]:
db.actsbook.create_index([("Actno", pymongo.ASCENDING)])

'Actno_1'

In [43]:
for act in db.actsbook.find({"Actno": {"$exists": True}}):
    year = act["Actyear"]
    actNameGa = act["Actprefix"].strip() + " " + act['ActnameIrish'].strip() +" " + year
    actNameEn = act["Actname"].strip() + " Act " + year
    bill_uri = 'bill/' + act['Billyear'] + "/" + act["Billno"]
    act_uri = "/act/" + year + "/" + act["Actno"]
    
    bill = db.billsbook.find_one({"uri": bill_uri})
    try:
        actLongEn = bill['bill_title_long_en'].replace("Bill entitled ", "")
    except KeyError:
        actLongEn = None     
    try:
        actLongGa = bill['bill_title_long_ga'].replace("Bille dá ngairtear ", "")
    except KeyError:
        actLongGa = None
    act_dict = {"shortTitle": {"en": actNameEn, "ga": actNameGa},
                "longTitle": {"en": actLongEn, "ga": actLongGa},
               "uri": act_uri,
               "statuteBookURI": bill['act_uri'],
               "dateSigned": bill['date_signed'] if bill.get("date_signed") is not None else None}    
    db.billsbook.update_one({"_id": bill["_id"]}, {
                            "$set": {"act": act_dict}})

In [47]:
for bill in db.billsbook.find():
    try:
        billLongEn = bill['bill_title_long_en']
    except KeyError:
        billLongEn = None     
    try:
        billLongGa = bill['bill_title_long_ga']
    except KeyError:
        actLongGa = None
    bill_name = {"en": bill['bill_title_sh_en'],
                    "ga": bill['bill_title_sh_ga']}
    
    bill_title = {"en": billLongEn,
                    "ga": billLongGa}
    db.billsbook.update_one({"_id": bill["_id"]}, {
                            "$set": {"shortTitle": bill_name,
                                    "longTitle": bill_title}})

In [49]:
db.billsbook.update_many({}, {"$unset": {'act_uri': "", 
                                         'bill_title_long_en': "", 
                                         'bill_title_long_ga': "",
                                        'bill_title_sh_en': "", 
                                         'bill_title_sh_ga': "",
                                        "date_signed": ""}})

<pymongo.results.UpdateResult at 0x7f06dbeacf78>

In [63]:
with open("longtitle.txt", "w") as f:
    f.write("\n".join([t['longTitle']['ga'] for t in db.billsbook.find({"longTitle.ga": {"$ne": None}})]))

In [36]:
db.actsbook.find_one({"Actno": "65", "Actyear": "2015"})

In [None]:
for d in db.billsbook.find({"status": 'rejected_by_referendum'}, 
                            {"_id": 0, 
                              "uri": 1, 
                              'bill_title_sh_en': 1, 
                              'events': 1}):
    pprint(d)

In [64]:
titleGA = "Bille dá ngairtear Acht do leasú agus do leathnú an Achta Cosanta, 1954 agus do dhéanamh socrú i dtaobh nithe gaolmhara"

In [73]:
for bill in db.billsbook.find():
    db.billsbook.update_one({"_id": bill["_id"]}, {"$set": {"uri": "/" + bill['uri']}})

In [155]:
roles = []
for bill in db.billsbook.find({"sponsorRole.roleURI": {"$regex": "^http.+"}}, {"sponsorRole": 1}):
    sponsors = []
    for i, sp in enumerate(bill["sponsorRole"]):
        if sp.get("roleURI") is not None:
            if "minister" in sp['roleURI']:
                roleURI = sp['roleURI'].split("/")[-2:]
                member_uri = "/" + "/".join(sp['roleURI'].split("/")[3:-5])
            elif "president" in sp['roleURI']:
                roleURI = sp['roleURI'].split("/")[-1].split("_")
                memberURI = "/" + "/".join(sp['roleURI'].split("/")[3:-4])
            else:
                roleURI = None
            office_uri = "/office/{}/{}".format(roleURI[0], roleURI[1]) if roleURI is not None else None
            _as = {"showAs": None, "uri": office_uri}
            
            by = {"showAs": None, "uri": member_uri}
            sponsors.append({"sponsor": {"by": by, "as": _as}})
        else:
            pass    
    db.billsbook.update_one({"_id": bill['_id']}, {"$set": {"sponsors": sponsors}})

In [None]:
db.billsbook.update_many({}, {"$unset": {"sponsorRole": ""}})

In [153]:
set(b['source'] for b in db.billsbook.find())

{'Government', 'Private Member', 'Private Sponsor'}

In [475]:
#{"events.status": "lapsed", "act": {"$ne": None}}

def house_and_stage(uri):
    stage_name = re.search("(withdrawn|deemed|lapsed|restored|finance_res)", uri)
    if stage_name is None:
        stage_name = re.search("bill/\d{4}/\w?\d+/(?:dail/|seanad/)(\d|)", uri)
    #if stage_name is None:
    #    stage_name = re.search("()", uri)
    stage_name = stages[stage_name.group(1)] if stage_name is not None else None
    house = re.search("dail|seanad", uri)
    if house is not None:
        house_name = stages[house.group()] 
        house_uri = "/house/"+house.group()
    else:
        house_name, house_uri = None, None
        
    return stage_name, house_name, house_uri
    

def event_dict(progress, date, uri):
    stage_name, house_name, house_uri = house_and_stage(uri)
    return {"stagesEntered": progress,
            "dates": [{"date": date}],
           "isStage": True,
           "happenedOn": progress,
           "uri": uri,
            "showAs": stage_name,
            "chamber": {"uri": house_uri, "showAs": house_name}
                       }

eventup = []
bills = db.billsbook.find() #.limit(500)
for bill in bills:
    stage_dict = OrderedDict()
    progress = 0
    for event in bill['events']:
        event['uri'] = "/" + event['uri'] if not event['uri'].startswith("/") else event['uri']
        if event['status'] == "stage":                               
            if event['uri'] not in stage_dict:
                progress += 1
                
                stage_dict[event['uri']] = event_dict(progress, event['date'], event['uri'])                                          
            else:
                stage_dict[event['uri']]['dates'].append({"date": event['date']})
        else:
            stage_dict[event['uri']] = event_dict(progress, event['date'], event['uri'])
            stage_dict[event['uri']]['isStage'] = False
    events = [{"event": stage_dict[k]} for k in stage_dict]
    eventup.append(events)
    db.billsbook.update_one({"_id": bill["_id"]}, {"$set": {"billEvents": events}})
    
sum(len(s) for s in eventup)

34887

In [468]:
non_stages = [(e["showAs"], e['chamber']['showAs']) for bill in eventup for e in bill if e['showAs'] is not None]
len(non_stages)

34886

In [494]:
bills = db.billsbook.find({"status": "enacted"})
too_short = [[[e['event']['uri'], e['event']['stagesEntered']] for e in b['billEvents']] for b in bills if len(b.get("billEvents")) > 0 and b['billEvents'][-1]['event']["stagesEntered"] < 9]

In [509]:
bill = db.billsbook.find_one({"uri": "/bill/2012/34"}, {"events": 0, "_id": 0})
bill['versions'] = [{"version": {"showAs": "As Initiated",
                                 "stages": [{"stage": {"uri": "/bill/2012/34/dail/1",
                                                      "showAs": "First Stage",
                                                      'chamber': {'showAs': 'Dáil', 'uri': '/house/dail'}}},
                                           {"stage": {"uri": "/bill/2012/34/dail/2",
                                                      "showAs": "Second Stage",
                                                      'chamber': {'showAs': 'Dáil', 'uri': '/house/dail'}}}],
                                "uri": "/bill/2012/34/eng@initiated", 
                                "formats": [{"content-type": "text+xml",
                                            "uri": "/bill/2012/34/eng@initiated.xml"},
                                           {"content-type": "application+pdf",
                                           "uri": "/bill/2012/34/eng@initiated.pdf"}]}}]
bill['amendmentLists'] = [{"amendmentList": {"stage": {"uri": "/bill/2012/34/dail/3",
                                                      "showAs": "Committee Stage",
                                                      'chamber': {'showAs': 'Dáil', 'uri': '/house/dail'}}},
                                           
                                            "uri": "/bill/2012/34/dail/3/amendmentList/@eng", 
                                            "formats": [{"content-type": "text+xml",
                                                        "uri": "/bill/2012/34/dail/3/amendmentList/@eng.xml"},
                                                       {"content-type": "application+pdf",
                                                       "uri": "/bill/2012/34/dail/3/amendmentList/@eng.pdf"}]                          
                          }]
with open("sample-bill-medata.json", "w") as f:
    json.dump(bill, f, indent=2, sort_keys=True)

In [439]:
stages = {"1": "First Stage",
         "2": "Second Stage",
         "3": "Third Stage",
         "4": "Fourth Stage",
         "5": "Fifth Stage",
         "dail": "Dáil",
         "seanad": "Seanad",
         "deemed": "Deemed to have Passed",
         'financial-resolution': "Financial Resolutions",
          'finance_res': "Financial Resolutions", 
          'lapsed': "Lapsed", 
          'rejected': "Defeated or Rejected", 
          'restored': "Restored to the Order Paper",
         'withdrawn': "Withdrawn",
         '2_ord': "Order for Second Stage",
          '3_ord': "Order for Committee Stage",
          '4_ord': "Order for Report Stage",
         "seanad_amd": "Seanad Amendments"}

In [6]:
db.billsbook.find_one({"status": "enacted", "act.dateSigned": {"$gte": "2014-01-01"}}, {"events": 0, "_id": 0})

{'act': {'dateSigned': '2014-12-25',
  'longTitle': {'en': 'An Act to amend the Public Health (Tobacco) Act 2002 in order to prohibit the smoking of tobacco products in vehicles where children are present and to provide for the investigation and prosecution of such offences by members of an Garda Síochána',
   'ga': 'Acht do leasú an Achta Sláinte Poiblí (Tobac), 2002 d’fhonn toirmeasc a chur le caitheamh táirgí tobac i bhfeithiclí a bhfuil leanaí i láthair iontu agus do dhéanamh socrú maidir le comhaltaí den Gharda Síochána d’imscrúdú agus d’ionchúiseamh cionta den sórt sin'},
  'shortTitle': {'en': 'Protection of Children’s Health (Tobacco Smoke in Mechanically Propelled Vehicles) Act 2014',
   'ga': 'An tAcht um Chosaint Sláinte Leanaí (Deatach Tobac i bhFeithiclí Inneallghluaiste), 2012 athraithe ó Chosaint Sláinte Leanaí ar Dheatach Tobac, 2014'},
  'statuteBookURI': 'http://www.irishstatutebook.ie/eli/2014/act/40',
  'uri': '/act/2014/40'},
 'billEvents': [{'event': {'chamber': {