From af5e158030a149ef54bbbcc51ba954fefe96e303 Mon Sep 17 00:00:00 2001 From: Ori Hoch Date: Tue, 30 Jan 2018 13:27:31 +0200 Subject: [PATCH] minor cosmetic fixes to member pages, added index page with names of all members, added join-mks pipeline --- build_members.py | 28 +++++---- constants.py | 1 + join_mks.py | 114 +++++++++++++++++++++++++++++++++++ pipeline-spec.yaml | 39 +++++++++++- template_functions.py | 5 +- templates/member_detail.html | 59 +++++++++++------- templates/members_index.html | 27 +++++++++ 7 files changed, 236 insertions(+), 37 deletions(-) create mode 100644 join_mks.py create mode 100644 templates/members_index.html diff --git a/build_members.py b/build_members.py index 79abdba..2b6666b 100644 --- a/build_members.py +++ b/build_members.py @@ -2,7 +2,7 @@ from template_functions import get_jinja_env import logging, os, subprocess from datetime import datetime -from template_functions import build_template +from template_functions import build_template, get_context from constants import MEMBER_URL, POSITION_URL, MINISTRY_URL, FACTION_URL def main(): @@ -11,19 +11,25 @@ def main(): jinja_env = get_jinja_env() jinja_env.filters['datetime'] = datetimeformat + all_mks = [] for descriptor, resource in zip(datapackage["resources"], resources): for member in resource: - build_template(jinja_env, - "member_detail.html",{ - "first_name": member["mk_individual_first_name"], - "last_name": member["mk_individual_name"], - "photo": member["mk_individual_photo"], - "positions": sortpositions(member["positions"]), - "position_url": POSITION_URL, - "ministry_url":MINISTRY_URL, - "faction_url": FACTION_URL}, + build_template(jinja_env, "member_detail.html", + get_context({"first_name": member["mk_individual_first_name"], + "last_name": member["mk_individual_name"], + "photo": member["mk_individual_photo"], + "positions": sortpositions(member["positions"]), + "position_url": POSITION_URL, + "ministry_url": MINISTRY_URL, + "faction_url": FACTION_URL, + "source_member_schema": descriptor["schema"], + "source_member_row": member}), MEMBER_URL.format(member_id=member["mk_individual_id"])) - + all_mks.append({"first_name": member["mk_individual_first_name"], + "last_name": member["mk_individual_name"], + "url": MEMBER_URL.format(member_id=member["mk_individual_id"])}) + build_template(jinja_env, "members_index.html", + get_context({"all_mks": sorted(all_mks, key=lambda mk: mk["first_name"])}), "members/index.html") if os.environ.get("SKIP_STATIC") != "1": subprocess.check_call(["mkdir", "-p", "dist"]) subprocess.check_call(["cp", "-rf", "static", "dist/"]) diff --git a/constants.py b/constants.py index 7f1dafc..ed4639a 100644 --- a/constants.py +++ b/constants.py @@ -6,3 +6,4 @@ FACTION_URL = "factions/{faction_id}.html" COMMITTEE_LIST_KNESSET_URL = "committees/knesset-{num}.html" COMMITTEES_INDEX_URL = "committees/index.html" +MEMBERS_HOME_URL = "members/index.html" diff --git a/join_mks.py b/join_mks.py new file mode 100644 index 0000000..0538732 --- /dev/null +++ b/join_mks.py @@ -0,0 +1,114 @@ +from datapackage_pipelines.wrapper import ingest, spew +import logging, requests + + +parameters, datapackage, resources = ingest() +aggregations = {"stats": {}} +kns_mksitecode, kns_person = None, None +kns_person_descriptor = None +kns_persontoposition, kns_position = None, None +mk_individual_resource, mk_individual_descriptor = None, None + + +mk_altnames = {} +for mk, mk_name in zip(*requests.get("https://oknesset.org/api/knesset-data/get_all_mk_names.json").json()): + mk_altnames.setdefault(int(mk["id"]), set()).add(mk_name.strip()) + + +for descriptor, resource in zip(datapackage["resources"], resources): + if descriptor["name"] == "kns_mksitecode": + kns_mksitecode = {int(row["SiteId"]): row for row in resource} + elif descriptor["name"] == "kns_person": + kns_person = {int(row["PersonID"]): row for row in resource} + kns_person_descriptor = descriptor + elif descriptor["name"] == "mk_individual": + mk_individual_resource = resource + mk_individual_descriptor = descriptor + elif descriptor["name"] == "kns_position": + kns_position = {int(row["PositionID"]): row for row in resource} + elif descriptor["name"] == "kns_persontoposition": + kns_persontopositions = {} + for row in resource: + kns_persontopositions.setdefault(int(row["PersonID"]), []).append(row) + else: + for row in resource: + pass + + +KNOWN_MK_PERSON_IDS = { + 955: kns_person[30407] # Yehuda Glick - has a mismatch in name between mk_individual and kns_person +} + + +# TODO: remove this mk matching function once this bug is fixed: https://github.com/hasadna/knesset-data/issues/147 +def find_matching_kns_person(mk): + for person_id, person in kns_person.items(): + person_first, person_last, person_email = person["FirstName"].strip(), person["LastName"].strip(), person["Email"] + mk_first, mk_last, mk_email = mk["mk_individual_first_name"].strip(), mk["mk_individual_name"].strip(), mk["mk_individual_email"] + name_match = (len(person_first) > 1 and len(mk_first) > 1 and person_first == mk_first and person_last == mk_last) + email_match = (person_email and mk_email + and len(person_email.strip()) > 5 and len(mk_email.strip()) > 5 and + person_email.strip().lower() == mk_email.strip().lower()) + if name_match or email_match: + return person_id, person + person = KNOWN_MK_PERSON_IDS.get(int(mk["mk_individual_id"])) + if person: + return person["PersonID"], person + return None, None + + +def get_person_positions(person_id): + for kns_persontoposition_row in kns_persontopositions[person_id]: + mk_position = {field: kns_persontoposition_row[field] for field in ("KnessetNum", + "GovMinistryID", "GovMinistryName", + "DutyDesc", + "FactionID", "FactionName", + "GovernmentNum", + "CommitteeID", "CommitteeName")} + if not parameters.get("filter-knesset-num") or int(mk_position["KnessetNum"]) in parameters["filter-knesset-num"]: + position_id = int(kns_persontoposition_row["PositionID"]) + position = kns_position[position_id] + finish_date = kns_persontoposition_row["FinishDate"] + mk_position.update(start_date=kns_persontoposition_row["StartDate"].strftime('%Y-%m-%d %H:%M:%S'), + finish_date=finish_date.strftime('%Y-%m-%d %H:%M:%S') if finish_date else None, + position=position["Description"], + position_id=position_id, + gender={250: "f", 251: "m", 252: "o"}[int(position["GenderID"])],) + yield {k: v for k, v in mk_position.items() if v} + + +def get_mk_individual_resource(resource): + for mk_individual_row in resource: + mk_individual_id = int(mk_individual_row["mk_individual_id"]) + kns_person_id, kns_person_row = None, None + mksitecode = kns_mksitecode.get(mk_individual_id) + if mksitecode: + kns_person_id = int(mksitecode["KnsID"]) + kns_person_row = kns_person.get(kns_person_id) + if not kns_person_row: + logging.warning("person mismatch in kns_mksitecode for mk_individual_id {}".format(mk_individual_id)) + kns_person_id = None + if not kns_person_id: + kns_person_id, kns_person_row = find_matching_kns_person(mk_individual_row) + if not kns_person_id or not kns_person_row: + raise Exception("Failed to find matching person for mk_invidual {}".format(mk_individual_id)) + if parameters.get("filter-is-current") is None or kns_person_row["IsCurrent"] == parameters["filter-is-current"]: + mk_individual_row.update(**kns_person_row) + mk_individual_row["positions"] = list(get_person_positions(kns_person_id)) + altnames = mk_altnames.setdefault(mk_individual_id, set()) + altnames.add("{} {}".format(mk_individual_row["mk_individual_first_name"].strip(), + mk_individual_row["mk_individual_name"].strip()).strip()) + altnames.add("{} {}".format(kns_person_row["FirstName"].strip(), + mk_individual_row["LastName"].strip()).strip()) + mk_individual_row["altnames"] = list(altnames) + yield mk_individual_row + + +mk_individual_descriptor["schema"]["fields"] += kns_person_descriptor["schema"]["fields"] \ + + [{"name": "positions", "type": "array"}, + {"name": "altnames", "type": "array"}] + + +spew(dict(datapackage, resources=[mk_individual_descriptor]), + [get_mk_individual_resource(mk_individual_resource)], + aggregations["stats"]) diff --git a/pipeline-spec.yaml b/pipeline-spec.yaml index 7bf86cc..63030f9 100644 --- a/pipeline-spec.yaml +++ b/pipeline-spec.yaml @@ -141,6 +141,42 @@ download_members: parameters: out-path: data/members +join-mks: + pipeline: + - run: load_resource + parameters: + url: data/members/datapackage.json + resource: kns_mksitecode + - run: load_resource + parameters: + url: data/members/datapackage.json + resource: kns_persontoposition + - run: load_resource + parameters: + url: data/members/datapackage.json + resource: kns_position + - run: load_resource + parameters: + url: data/members/datapackage.json + resource: kns_person + # join_mks iterates over this resource, it must be the last one + - run: load_resource + parameters: + url: data/members/datapackage.json + resource: mk_individual + - run: join_mks +# parameters: +# filter-knesset-num: [20] +# filter-is-current: true +# - run: filter +# parameters: +# resources: mk_individual +# in: +# - mk_individual_id: 109 + - run: dump.to_path + parameters: + out-path: data/mks-joined + build: pipeline: # all these tables are loaded into memory @@ -190,9 +226,8 @@ create_members: # all these tables are loaded into memory - run: load_resource parameters: - url: data/members_aggr/datapackage.json + url: data/mks-joined/datapackage.json resource: mk_individual - - run: stream_remote_resources - run: build_members join-attendance-data: diff --git a/template_functions.py b/template_functions.py index 597181c..aef70ca 100644 --- a/template_functions.py +++ b/template_functions.py @@ -1,7 +1,7 @@ from jinja2 import Environment, FileSystemLoader, select_autoescape import os, logging import socket, datetime -from constants import COMMITTEES_INDEX_URL +from constants import COMMITTEES_INDEX_URL, MEMBERS_HOME_URL def get_jinja_env(): @@ -30,4 +30,5 @@ def get_context(context): return dict(context, **{"create_hostname": socket.getfqdn(), "create_time": datetime.datetime.now().strftime("%H:%M"), "create_date": datetime.datetime.now().strftime("%d/%m/%Y"), - "committeelist_url": COMMITTEES_INDEX_URL}) + "committeelist_url": COMMITTEES_INDEX_URL, + "members_home_url": MEMBERS_HOME_URL}) diff --git a/templates/member_detail.html b/templates/member_detail.html index a9b973a..8197418 100644 --- a/templates/member_detail.html +++ b/templates/member_detail.html @@ -1,6 +1,7 @@ {% extends "site_base.html" %} {% block breadcrumbs %} +
  • ח"כים וסיעות /
  • {{first_name}} {{last_name}}
  • {% endblock %} @@ -27,11 +28,12 @@

    {{first_name}} {{last_name}}

    - תפקדים + תפקידים

    -{% endblock %} \ No newline at end of file +{% endblock %} + +{% block data_sources %} + {{ super() }} +

    להלן נתוני המקור כפי שהם מופיעים במאגרי המידע של הכנסת באמצעות ממשק מידע פרלמנטרי זמין -

    +

    ח"כ

    + {% for field in source_member_schema.fields %} + {% if field.name == "positions" %} + positions: + + {% elif field.description and source_member_row[field.name] %} + {{field.description}}:

    {{source_member_row[field.name]}}

    + {% else %} + {{field.name}}:

    {{source_member_row[field.name]}}

    + {% endif %} + {% endfor %} +{% endblock %} diff --git a/templates/members_index.html b/templates/members_index.html new file mode 100644 index 0000000..fe0e018 --- /dev/null +++ b/templates/members_index.html @@ -0,0 +1,27 @@ +{% extends "site_base.html" %} + +{% block breadcrumbs %} +
  • ח"כים וסיעות
  • +{% endblock %} + +{% block divcontent %} +
    +
    +
    +
    +

    כל הח"כים

    +
    + +
    +
    +
    + +
    +
    +{% endblock %}