In [268]:
from IPython.display import Image

import pymongo
import requests
import json
import os
import re
import xmltodict
from pprint import pprint

In [4]:
db = pymongo.MongoClient()
datadir = "../../../ontology/data"
rest = db.oir_rest

In [297]:
uriIndex = pymongo.IndexModel([("uri", pymongo.ASCENDING)], name="uri_index")
dateIndex = pymongo.IndexModel([("date", pymongo.DESCENDING)], name="date_desc_index")
eIdIndex = pymongo.IndexModel([("eId", pymongo.ASCENDING)], name="eId_index")

firstNameIndex = pymongo.IndexModel([("firstName", pymongo.ASCENDING)], name="first_name_index")
lastNameIndex = pymongo.IndexModel([("lastName", pymongo.ASCENDING)], name="last_name_index")
fullNameIndex = pymongo.IndexModel([("fullName", pymongo.ASCENDING)], name="full_name_index")

In [69]:
with open(os.path.join(datadir, "members.json"), "r") as f:
    members = json.load(f)
for m in members:
    m['uri'] = m.pop("eId")
    m['dateOfBirth'] = m.pop('date_of_birth')
    m['dateOfDeath'] = m.pop('date_of_death')
    m['gender'] = None
ins = rest.members.insert_many(members)
print(ins.acknowledged)
rest.members.create_indexes([uriIndex, firstNameIndex, lastNameIndex, fullNameIndex])

True


['uri_index', 'first_name_index', 'last_name_index', 'full_name_index']

In [186]:
rest.drop_collection("parties")
with open(os.path.join(datadir, "parties.json"), "r") as f:
    parties = json.load(f)
rest.parties.insert_many(parties)

<pymongo.results.InsertManyResult at 0x7f80800acd38>

In [101]:
rest.drop_collection("service")
with open(os.path.join(datadir, "service.json"), "r") as f:
    service = json.load(f)
rest.service.insert_many(service)
rest.service.create_indexes([eIdIndex])

<pymongo.results.InsertManyResult at 0x7f8066759e10>

In [289]:
rest.drop_collection("cabinets1")
rest.drop_collection("cabinets")
with open(os.path.join(datadir, "government_members.json"), "r") as f:
    cabinets = json.load(f)
rest.cabinets1.insert_many(cabinets)
rest.cabinets.create_indexes([eIdIndex])

<pymongo.results.InsertManyResult at 0x7f80667a2af8>

In [291]:
for c in rest.cabinets1.aggregate([
                                   {"$unwind": "$cabinets"}, 
                                    {"$sort": {"cabinets.start": 1}},
                                   
                                     ]):
    cab = c['cabinets']
    cabinet_uri = cab['cabinet'].split("ie/oireachtas")[-1]
    dateRange = {"start": cab['start'], "end": cab['end']}
    officeName = {"showAs": c['office'], "uri": "/office"+ c['uri']}
    eId = c['eId'] + cabinet_uri.split("/cabinet")[-1]
    office = {"cabinet": {"uri": cabinet_uri, "showAs": ""},
             "dateRange": dateRange,
             "officeName": officeName,
             "eId": eId}
    rest.cabinets.insert_one(office)

In [187]:
for p in rest.parties.find():
    wikiParty = p['party']
    party_uri = p['house'].replace("house", "party") + "/" + p['oir_party']
    showAs = p['oir_party'].replace("_", " ")
    partyName = {"uri": party_uri, "showAs": showAs}
    dateRange = {"start": p['start'], "end": p['end']}
    rest.parties.update_one({"_id": p["_id"]}, {"$set": {"partyName": partyName,
                                               "dateRange": dateRange,
                                               "wikiParty": wikiParty}})
rest.parties.create_indexes([uriIndex])

['uri_index']

In [173]:
for s in rest.service.find():
    represents = []
    for c in s['constURI']:
        
        if not c.startswith("/"):
            c = "/"+c
        if c.startswith("/house/dail"):
            showAs = cons[c.split("/")[-1]]
        else:
            showAs = cons[c.split("/")[-1]].replace("-", " ")
        represents.append({"represent": {"uri": c, "showAs": showAs}})
    uri = re.sub("(dail|seanad)\.", "\g<1>/", s['serviceURI'])
    dateRange = {"start": s['periodStart'], "end": s['periodEnd']}
    house_name = db.oireachtas.wikiHouses.find_one({"_id":s['houseURI']})['houseName']
    house = {"uri": s['houseURI'], "showAs": house_name}
    rest.service.update_one({"_id":s['_id']}, {"$set":{"uri": uri, 
                                                       "dateRange": dateRange, 
                                                       "house": house,
                                                      "represents": represents}})

In [302]:
for s in rest.service.find():
    parties = rest.parties.find({"uri": s['uri']}, {"_id": 0, 
                                                    "partyName": 1, 
                                                    "dateRange": 1} ).sort("dateRange.start", 1)
    offices = rest.cabinets.find({"eId": s['uri']}, {"_id": 0, "eId": 0} ).sort("dateRange.start", 1)
    rest.service.update_one({"_id": s['_id']}, 
                            {"$set": {"parties": [{"party": p} for p in parties], 
                                      "offices": [{"office": p} for p in offices]}})

In [336]:
for m in rest.members.find():
    services = rest.service.find({"eId": m['uri']}, 
                                  {"_id": 0,
                                   "uri": 1,
                                   "house": 1,
                                  "offices": 1,
                                  "parties": 1,
                                  "represents": 1
                                  }).sort("dateRange.start", 1)
    rest.members.update_one({"_id": m['_id']}, {"$set": {"memberships": [{"membership": s} for s in services]}})

In [392]:
for member in rest.members.find():
    for m in member['memberships']:
        for o in m['membership']['offices']:
            o['office']['cabinet']['showAs'] = "Government of the " + m['membership']['house']['showAs']
    
    rest.members.update_one({"_id": member['_id']}, {"$set": {"memberships": member['memberships']}})   
  

In [372]:
date = "1986-03-01"
m = rest.members.find_one({ 'fullName': 'Michael Noonan',
                    "memberships.membership.offices.office.dateRange.start": {"$lte": date},
                   "memberships.membership.offices.office.dateRange.end": {"$gte": date}},
                 {"_id": 0, 
                  "uri": 1,
                  "showAs": 1,
                  "image": 1,
                  #"memberships.membership.represents": 1,
                  "memberships.membership.offices.$.office.dateRange": 1})
for p_r in [("parties", "party"), ("offices", "office")]:
    item = m['memberships'][0]['membership'][p_r[0]]
    fil = [d for d in item if d[p_r[1]]['dateRange']['start'] <= date <= d[p_r[1]]['dateRange']['end']]
    #print(fil)
    m['memberships'][0]['membership'][p_r[0]] = fil
pprint(m)

{'image': 'http://www.oireachtas.ie//MembersImages/141.jpg',
 'memberships': [{'membership': {'house': {'showAs': '24th Dáil',
                                           'uri': '/house/dail/24'},
                                 'offices': [{'office': {'cabinet': {'showAs': '',
                                                                     'uri': '/cabinet/dail/24'},
                                                         'dateRange': {'end': '1987-03-10',
                                                                       'start': '1986-02-19'},
                                                         'officeName': {'showAs': 'Minister '
                                                                                  'for '
                                                                                  'Industry '
                                                                                  'and '
                                                                       

In [410]:
def find_members():
    return rest.members.find({}, {"_id": 0,
                            "lastName": 1,
                           "showAs": 1,
                           "firstName": 1,
                           "dateOfBirth": 1,
                           "dateOfDeath": 1,
                           "image": 1,
                           "fullName": 1,
                           "uri": 1,
                           "memberships": 1,
                           "gender": 1,
                           "profession": 1})
with open("members_service.json", "w") as f:
    json.dump([m for m in find_members()], f, indent=2, sort_keys=True)
with open("members_service.xml", "w") as f:
    f.write(xmltodict.unparse({"root": {"members": [{"member": m} for m in find_members()]}}, pretty=True))

In [None]:
chairs = [m['details'] for m in members if m['details'] and "Chair" in m['details']]
for chair in chairs:    
    ch = re.search("Chair[\s\w]+\[[\d\s\w]+\]", chair)  
    print(ch.group() if ch is not None else None)