In [1]:
from IPython.display import Image

import pymongo
import requests
import json
import os
import re
import xmltodict
from pprint import pprint

In [2]:
db = pymongo.MongoClient()
datadir = "https://raw.githubusercontent.com/Oireachtas/ontology/master/data"
rest = db.oir_rest
base_uri = "http://oireachtas.ie/ie/oireachtas"

In [3]:
uriIndex = pymongo.IndexModel([("uri", pymongo.ASCENDING)], name="uri_index")
dateIndex = pymongo.IndexModel([("date", pymongo.DESCENDING)], name="date_desc_index")
eIdIndex = pymongo.IndexModel([("eId", pymongo.ASCENDING)], name="eId_index")

firstNameIndex = pymongo.IndexModel([("firstName", pymongo.ASCENDING)], name="first_name_index")
lastNameIndex = pymongo.IndexModel([("lastName", pymongo.ASCENDING)], name="last_name_index")
fullNameIndex = pymongo.IndexModel([("fullName", pymongo.ASCENDING)], name="full_name_index")

In [4]:
rest.drop_collection("members")
members = requests.get(os.path.join(datadir, "members.json")).json()
print(len(members))
for m in members:
    m['uri'] = base_uri+m.pop("eId")
    m['dateOfBirth'] = m.pop('date_of_birth')
    m['dateOfDeath'] = m.pop('date_of_death')
    m['gender'] = None
ins = rest.members.insert_many(members)
print(ins.acknowledged)
rest.members.create_indexes([uriIndex, firstNameIndex, lastNameIndex, fullNameIndex])

1695
True


['uri_index', 'first_name_index', 'last_name_index', 'full_name_index']

In [5]:
rest.drop_collection("parties")
parties = requests.get(os.path.join(datadir, "parties.json")).json()
print(len(parties))
rest.parties.insert_many(parties)

4618


<pymongo.results.InsertManyResult at 0x7fe4893a9510>

In [6]:
rest.drop_collection("service")
service = requests.get(os.path.join(datadir, "service.json")).json()
print(len(service))
rest.service.insert_many(service)
rest.service.create_indexes([eIdIndex])

6536


['eId_index']

In [7]:
rest.drop_collection("cabinets1")
rest.drop_collection("cabinets")
cabinets = requests.get(os.path.join(datadir, "government_members.json")).json()
print(len(cabinets))
rest.cabinets1.insert_many(cabinets)
rest.cabinets.create_indexes([eIdIndex])

643


['eId_index']

In [8]:
constituencies = requests.get(os.path.join(datadir, "constituencies.json")).json()

In [9]:
for c in rest.cabinets1.aggregate([
                                   {"$unwind": "$cabinets"}, 
                                    {"$sort": {"cabinets.start": 1}},
                                   
                                     ]):
    cab = c['cabinets']
    cabinet_uri = cab['cabinet'].split("ie/oireachtas")[-1]
    dateRange = {"start": cab['start'], "end": cab['end']}
    officeName = {"showAs": c['office'], "uri": base_uri+"/office"+ c['uri']}
    eId = base_uri+c['eId'] + cabinet_uri.split("/cabinet")[-1]
    office = {"cabinet": {"uri": base_uri+cabinet_uri, "showAs": ""},
             "dateRange": dateRange,
             "officeName": officeName,
             "eId": eId}
    rest.cabinets.insert_one(office)

In [10]:
rest.cabinets.find_one()

{'_id': ObjectId('57b32913a59f5b40f8dbba41'),
 'cabinet': {'showAs': '',
  'uri': 'http://oireachtas.ie/ie/oireachtas/cabinet/dail/1'},
 'dateRange': {'end': '1921-08-16', 'start': '1919-01-22'},
 'eId': 'http://oireachtas.ie/ie/oireachtas/member/George-Noble-Plunkett.D.1919-01-21/dail/1',
 'officeName': {'showAs': 'Minister for Foreign Affairs',
  'uri': 'http://oireachtas.ie/ie/oireachtas/office/minister/foreign_affairs'}}

In [11]:
for p in rest.parties.find():
    wikiParty = p['party']
    party_uri = base_uri+p['house'].replace("house", "party") + "/" + p['oir_party']
    showAs = p['oir_party'].replace("_", " ")
    #partyName = {"uri": party_uri, "showAs": showAs}
    dateRange = {"start": p['start'], "end": p['end']}
    rest.parties.update_one({"_id": p["_id"]}, {"$set": {"showAs": showAs,
                                                         "eId": base_uri+p['uri'],
                                                         "uri": party_uri,
                                                       "dateRange": dateRange,
                                                         "house": base_uri+p['house'],
                                                         "member": base_uri+p['member'],
                                                       "wikiParty": wikiParty}})
rest.parties.create_indexes([uriIndex])

['uri_index']

In [12]:
rest.parties.find_one()

{'_id': ObjectId('57b32908a59f5b40f8db8c2c'),
 'dateRange': {'end': '1922-06-16', 'start': '1921-05-24'},
 'eId': 'http://oireachtas.ie/ie/oireachtas/member/Edward-Aylward.D.1921-08-16/dail/2',
 'end': '1922-06-16',
 'house': 'http://oireachtas.ie/ie/oireachtas/house/dail/2',
 'member': 'http://oireachtas.ie/ie/oireachtas/member/Edward-Aylward.D.1921-08-16',
 'oir_party': 'Sinn_Féin',
 'party': 'Sinn_Féin',
 'showAs': 'Sinn Féin',
 'start': '1921-05-24',
 'uri': 'http://oireachtas.ie/ie/oireachtas/party/dail/2/Sinn_Féin',
 'wikiParty': 'Sinn_Féin'}

In [13]:
for s in rest.service.find():
    represents = []
    for c in s['constURI']:
        
        if not c.startswith("/"):
            c = "/"+c
        if c.startswith("/house/dail"):
            showAs = constituencies[c.split("/")[-1]]
        else:
            showAs = constituencies[c.split("/")[-1]].replace("-", " ")
        represents.append({"represent": {"uri": base_uri+c, "showAs": showAs}})
    uri = base_uri+re.sub("(dail|seanad)\.", "\g<1>/", s['serviceURI'])
    dateRange = {"start": s['periodStart'], "end": s['periodEnd']}
    house_name = db.oireachtas.wikiHouses.find_one({"_id":s['houseURI']})['houseName']
    house = {"uri": base_uri+s['houseURI'], "showAs": house_name}
    rest.service.update_one({"_id":s['_id']}, {"$set":{"uri": uri,
                                                       "eId": base_uri+s['eId'],
                                                       "dateRange": dateRange, 
                                                       "house": house,
                                                      "represents": represents}})

In [14]:
for s in rest.service.find():
    parties = rest.parties.find({"eId": s['uri']}, {"_id": 0, 
                                                    "showAs": 1,
                                                    "uri": 1,
                                                    "dateRange": 1} ).sort("dateRange.start", 1)
    offices = rest.cabinets.find({"eId": s['uri']}, {"_id": 0, "eId": 0} ).sort("dateRange.start", 1)
    rest.service.update_one({"_id": s['_id']}, 
                            {"$set": {"parties": [{"party": p} for p in parties], 
                                      "offices": [{"office": p} for p in offices]}})

In [15]:
rest.service.find_one({"parties": {"$size": 1}})

{'_id': ObjectId('57b3290ca59f5b40f8db9e37'),
 'constURI': ['house/dail/3/constituency/Cork-Mid-North-South-South-East-and-West'],
 'dateRange': {'end': '1923-08-09', 'start': '1922-09-09'},
 'eId': 'http://oireachtas.ie/ie/oireachtas/member/Daniel-Corkery.D.1921-08-16',
 'elected': '1922-06-16',
 'house': {'showAs': '3rd Dáil',
  'uri': 'http://oireachtas.ie/ie/oireachtas/house/dail/3'},
 'houseURI': '/house/dail/3',
 'offices': [],
 'parties': [{'party': {'dateRange': {'end': '1923-08-09',
     'start': '1922-06-16'},
    'showAs': 'Sinn Féin',
    'uri': 'http://oireachtas.ie/ie/oireachtas/party/dail/3/Sinn_Féin'}}],
 'periodEnd': '1923-08-09',
 'periodStart': '1922-09-09',
 'represents': [{'represent': {'showAs': 'Cork Mid, North, South, South East and West',
    'uri': 'http://oireachtas.ie/ie/oireachtas/house/dail/3/constituency/Cork-Mid-North-South-South-East-and-West'}}],
 'serviceURI': '/member/Daniel-Corkery.D.1921-08-16/dail.3',
 'uri': 'http://oireachtas.ie/ie/oireachtas/me

In [16]:
for m in rest.members.find():
    services = rest.service.find({"eId": m['uri']}, 
                                  {"_id": 0,
                                   "uri": 1,
                                   "house": 1,
                                  "offices": 1,
                                  "parties": 1,
                                  "represents": 1,
                                   "dateRange": 1
                                  }).sort("dateRange.start", 1)
    rest.members.update_one({"_id": m['_id']}, {"$set": {"memberships": [{"membership": s} for s in services]}})

In [17]:
for member in rest.members.find():
    for m in member['memberships']:
        for o in m['membership']['offices']:
            o['office']['cabinet']['showAs'] = "Government of the " + m['membership']['house']['showAs']
    
    rest.members.update_one({"_id": member['_id']}, {"$set": {"memberships": member['memberships']}})   
  

In [18]:
date = "1986-03-01"
m = rest.members.find_one({ 'fullName': 'Michael Noonan',
                    "memberships.membership.offices.office.dateRange.start": {"$lte": date},
                   "memberships.membership.offices.office.dateRange.end": {"$gte": date}},
                 {"_id": 0, 
                  "uri": 1,
                  "showAs": 1,
                  "image": 1,
                  #"memberships.membership.represents": 1,
                  "memberships.membership.offices.$.office.dateRange": 1})
for p_r in [("parties", "party"), ("offices", "office")]:
    item = m['memberships'][0]['membership'][p_r[0]]
    fil = [d for d in item if d[p_r[1]]['dateRange']['start'] <= date <= d[p_r[1]]['dateRange']['end']]
    #print(fil)
    m['memberships'][0]['membership'][p_r[0]] = fil
pprint(m)

{'image': 'http://www.oireachtas.ie//MembersImages/141.jpg',
 'memberships': [{'membership': {'dateRange': {'end': '1987-01-21',
                                               'start': '1982-12-14'},
                                 'house': {'showAs': '24th Dáil',
                                           'uri': 'http://oireachtas.ie/ie/oireachtas/house/dail/24'},
                                 'offices': [{'office': {'cabinet': {'showAs': 'Government '
                                                                               'of '
                                                                               'the '
                                                                               '24th '
                                                                               'Dáil',
                                                                     'uri': 'http://oireachtas.ie/ie/oireachtas/cabinet/dail/24'},
                                                         'date

In [20]:
def find_members():
    return rest.members.find({}, {"_id": 0,
                            "lastName": 1,
                           "showAs": 1,
                           "firstName": 1,
                           "dateOfBirth": 1,
                           "dateOfDeath": 1,
                           "image": 1,
                           "fullName": 1,
                           "uri": 1,
                           "memberships": 1,
                           "gender": 1,
                           "profession": 1})

In [23]:
with open("../data/members_service.json", "w") as f:
    json.dump([m for m in find_members()], f, indent=2, sort_keys=True)
with open("../data/members_service.xml", "w") as f:
    f.write(xmltodict.unparse({"root": {"members": [{"member": m} for m in find_members()]}}, pretty=True))

In [None]:
chairs = [m['details'] for m in members if m['details'] and "Chair" in m['details']]
for chair in chairs:    
    ch = re.search("Chair[\s\w]+\[[\d\s\w]+\]", chair)  
    print(ch.group() if ch is not None else None)