From af5e158030a149ef54bbbcc51ba954fefe96e303 Mon Sep 17 00:00:00 2001
From: Ori Hoch
Date: Tue, 30 Jan 2018 13:27:31 +0200
Subject: [PATCH] minor cosmetic fixes to member pages, added index page with
names of all members, added join-mks pipeline
---
build_members.py | 28 +++++----
constants.py | 1 +
join_mks.py | 114 +++++++++++++++++++++++++++++++++++
pipeline-spec.yaml | 39 +++++++++++-
template_functions.py | 5 +-
templates/member_detail.html | 59 +++++++++++-------
templates/members_index.html | 27 +++++++++
7 files changed, 236 insertions(+), 37 deletions(-)
create mode 100644 join_mks.py
create mode 100644 templates/members_index.html
diff --git a/build_members.py b/build_members.py
index 79abdba..2b6666b 100644
--- a/build_members.py
+++ b/build_members.py
@@ -2,7 +2,7 @@
from template_functions import get_jinja_env
import logging, os, subprocess
from datetime import datetime
-from template_functions import build_template
+from template_functions import build_template, get_context
from constants import MEMBER_URL, POSITION_URL, MINISTRY_URL, FACTION_URL
def main():
@@ -11,19 +11,25 @@ def main():
jinja_env = get_jinja_env()
jinja_env.filters['datetime'] = datetimeformat
+ all_mks = []
for descriptor, resource in zip(datapackage["resources"], resources):
for member in resource:
- build_template(jinja_env,
- "member_detail.html",{
- "first_name": member["mk_individual_first_name"],
- "last_name": member["mk_individual_name"],
- "photo": member["mk_individual_photo"],
- "positions": sortpositions(member["positions"]),
- "position_url": POSITION_URL,
- "ministry_url":MINISTRY_URL,
- "faction_url": FACTION_URL},
+ build_template(jinja_env, "member_detail.html",
+ get_context({"first_name": member["mk_individual_first_name"],
+ "last_name": member["mk_individual_name"],
+ "photo": member["mk_individual_photo"],
+ "positions": sortpositions(member["positions"]),
+ "position_url": POSITION_URL,
+ "ministry_url": MINISTRY_URL,
+ "faction_url": FACTION_URL,
+ "source_member_schema": descriptor["schema"],
+ "source_member_row": member}),
MEMBER_URL.format(member_id=member["mk_individual_id"]))
-
+ all_mks.append({"first_name": member["mk_individual_first_name"],
+ "last_name": member["mk_individual_name"],
+ "url": MEMBER_URL.format(member_id=member["mk_individual_id"])})
+ build_template(jinja_env, "members_index.html",
+ get_context({"all_mks": sorted(all_mks, key=lambda mk: mk["first_name"])}), "members/index.html")
if os.environ.get("SKIP_STATIC") != "1":
subprocess.check_call(["mkdir", "-p", "dist"])
subprocess.check_call(["cp", "-rf", "static", "dist/"])
diff --git a/constants.py b/constants.py
index 7f1dafc..ed4639a 100644
--- a/constants.py
+++ b/constants.py
@@ -6,3 +6,4 @@
FACTION_URL = "factions/{faction_id}.html"
COMMITTEE_LIST_KNESSET_URL = "committees/knesset-{num}.html"
COMMITTEES_INDEX_URL = "committees/index.html"
+MEMBERS_HOME_URL = "members/index.html"
diff --git a/join_mks.py b/join_mks.py
new file mode 100644
index 0000000..0538732
--- /dev/null
+++ b/join_mks.py
@@ -0,0 +1,114 @@
+from datapackage_pipelines.wrapper import ingest, spew
+import logging, requests
+
+
+parameters, datapackage, resources = ingest()
+aggregations = {"stats": {}}
+kns_mksitecode, kns_person = None, None
+kns_person_descriptor = None
+kns_persontoposition, kns_position = None, None
+mk_individual_resource, mk_individual_descriptor = None, None
+
+
+mk_altnames = {}
+for mk, mk_name in zip(*requests.get("https://oknesset.org/api/knesset-data/get_all_mk_names.json").json()):
+ mk_altnames.setdefault(int(mk["id"]), set()).add(mk_name.strip())
+
+
+for descriptor, resource in zip(datapackage["resources"], resources):
+ if descriptor["name"] == "kns_mksitecode":
+ kns_mksitecode = {int(row["SiteId"]): row for row in resource}
+ elif descriptor["name"] == "kns_person":
+ kns_person = {int(row["PersonID"]): row for row in resource}
+ kns_person_descriptor = descriptor
+ elif descriptor["name"] == "mk_individual":
+ mk_individual_resource = resource
+ mk_individual_descriptor = descriptor
+ elif descriptor["name"] == "kns_position":
+ kns_position = {int(row["PositionID"]): row for row in resource}
+ elif descriptor["name"] == "kns_persontoposition":
+ kns_persontopositions = {}
+ for row in resource:
+ kns_persontopositions.setdefault(int(row["PersonID"]), []).append(row)
+ else:
+ for row in resource:
+ pass
+
+
+KNOWN_MK_PERSON_IDS = {
+ 955: kns_person[30407] # Yehuda Glick - has a mismatch in name between mk_individual and kns_person
+}
+
+
+# TODO: remove this mk matching function once this bug is fixed: https://github.com/hasadna/knesset-data/issues/147
+def find_matching_kns_person(mk):
+ for person_id, person in kns_person.items():
+ person_first, person_last, person_email = person["FirstName"].strip(), person["LastName"].strip(), person["Email"]
+ mk_first, mk_last, mk_email = mk["mk_individual_first_name"].strip(), mk["mk_individual_name"].strip(), mk["mk_individual_email"]
+ name_match = (len(person_first) > 1 and len(mk_first) > 1 and person_first == mk_first and person_last == mk_last)
+ email_match = (person_email and mk_email
+ and len(person_email.strip()) > 5 and len(mk_email.strip()) > 5 and
+ person_email.strip().lower() == mk_email.strip().lower())
+ if name_match or email_match:
+ return person_id, person
+ person = KNOWN_MK_PERSON_IDS.get(int(mk["mk_individual_id"]))
+ if person:
+ return person["PersonID"], person
+ return None, None
+
+
+def get_person_positions(person_id):
+ for kns_persontoposition_row in kns_persontopositions[person_id]:
+ mk_position = {field: kns_persontoposition_row[field] for field in ("KnessetNum",
+ "GovMinistryID", "GovMinistryName",
+ "DutyDesc",
+ "FactionID", "FactionName",
+ "GovernmentNum",
+ "CommitteeID", "CommitteeName")}
+ if not parameters.get("filter-knesset-num") or int(mk_position["KnessetNum"]) in parameters["filter-knesset-num"]:
+ position_id = int(kns_persontoposition_row["PositionID"])
+ position = kns_position[position_id]
+ finish_date = kns_persontoposition_row["FinishDate"]
+ mk_position.update(start_date=kns_persontoposition_row["StartDate"].strftime('%Y-%m-%d %H:%M:%S'),
+ finish_date=finish_date.strftime('%Y-%m-%d %H:%M:%S') if finish_date else None,
+ position=position["Description"],
+ position_id=position_id,
+ gender={250: "f", 251: "m", 252: "o"}[int(position["GenderID"])],)
+ yield {k: v for k, v in mk_position.items() if v}
+
+
+def get_mk_individual_resource(resource):
+ for mk_individual_row in resource:
+ mk_individual_id = int(mk_individual_row["mk_individual_id"])
+ kns_person_id, kns_person_row = None, None
+ mksitecode = kns_mksitecode.get(mk_individual_id)
+ if mksitecode:
+ kns_person_id = int(mksitecode["KnsID"])
+ kns_person_row = kns_person.get(kns_person_id)
+ if not kns_person_row:
+ logging.warning("person mismatch in kns_mksitecode for mk_individual_id {}".format(mk_individual_id))
+ kns_person_id = None
+ if not kns_person_id:
+ kns_person_id, kns_person_row = find_matching_kns_person(mk_individual_row)
+ if not kns_person_id or not kns_person_row:
+ raise Exception("Failed to find matching person for mk_invidual {}".format(mk_individual_id))
+ if parameters.get("filter-is-current") is None or kns_person_row["IsCurrent"] == parameters["filter-is-current"]:
+ mk_individual_row.update(**kns_person_row)
+ mk_individual_row["positions"] = list(get_person_positions(kns_person_id))
+ altnames = mk_altnames.setdefault(mk_individual_id, set())
+ altnames.add("{} {}".format(mk_individual_row["mk_individual_first_name"].strip(),
+ mk_individual_row["mk_individual_name"].strip()).strip())
+ altnames.add("{} {}".format(kns_person_row["FirstName"].strip(),
+ mk_individual_row["LastName"].strip()).strip())
+ mk_individual_row["altnames"] = list(altnames)
+ yield mk_individual_row
+
+
+mk_individual_descriptor["schema"]["fields"] += kns_person_descriptor["schema"]["fields"] \
+ + [{"name": "positions", "type": "array"},
+ {"name": "altnames", "type": "array"}]
+
+
+spew(dict(datapackage, resources=[mk_individual_descriptor]),
+ [get_mk_individual_resource(mk_individual_resource)],
+ aggregations["stats"])
diff --git a/pipeline-spec.yaml b/pipeline-spec.yaml
index 7bf86cc..63030f9 100644
--- a/pipeline-spec.yaml
+++ b/pipeline-spec.yaml
@@ -141,6 +141,42 @@ download_members:
parameters:
out-path: data/members
+join-mks:
+ pipeline:
+ - run: load_resource
+ parameters:
+ url: data/members/datapackage.json
+ resource: kns_mksitecode
+ - run: load_resource
+ parameters:
+ url: data/members/datapackage.json
+ resource: kns_persontoposition
+ - run: load_resource
+ parameters:
+ url: data/members/datapackage.json
+ resource: kns_position
+ - run: load_resource
+ parameters:
+ url: data/members/datapackage.json
+ resource: kns_person
+ # join_mks iterates over this resource, it must be the last one
+ - run: load_resource
+ parameters:
+ url: data/members/datapackage.json
+ resource: mk_individual
+ - run: join_mks
+# parameters:
+# filter-knesset-num: [20]
+# filter-is-current: true
+# - run: filter
+# parameters:
+# resources: mk_individual
+# in:
+# - mk_individual_id: 109
+ - run: dump.to_path
+ parameters:
+ out-path: data/mks-joined
+
build:
pipeline:
# all these tables are loaded into memory
@@ -190,9 +226,8 @@ create_members:
# all these tables are loaded into memory
- run: load_resource
parameters:
- url: data/members_aggr/datapackage.json
+ url: data/mks-joined/datapackage.json
resource: mk_individual
- - run: stream_remote_resources
- run: build_members
join-attendance-data:
diff --git a/template_functions.py b/template_functions.py
index 597181c..aef70ca 100644
--- a/template_functions.py
+++ b/template_functions.py
@@ -1,7 +1,7 @@
from jinja2 import Environment, FileSystemLoader, select_autoescape
import os, logging
import socket, datetime
-from constants import COMMITTEES_INDEX_URL
+from constants import COMMITTEES_INDEX_URL, MEMBERS_HOME_URL
def get_jinja_env():
@@ -30,4 +30,5 @@ def get_context(context):
return dict(context, **{"create_hostname": socket.getfqdn(),
"create_time": datetime.datetime.now().strftime("%H:%M"),
"create_date": datetime.datetime.now().strftime("%d/%m/%Y"),
- "committeelist_url": COMMITTEES_INDEX_URL})
+ "committeelist_url": COMMITTEES_INDEX_URL,
+ "members_home_url": MEMBERS_HOME_URL})
diff --git a/templates/member_detail.html b/templates/member_detail.html
index a9b973a..8197418 100644
--- a/templates/member_detail.html
+++ b/templates/member_detail.html
@@ -1,6 +1,7 @@
{% extends "site_base.html" %}
{% block breadcrumbs %}
+ ח"כים וסיעות /
{{first_name}} {{last_name}}
{% endblock %}
@@ -27,11 +28,12 @@ {{first_name}} {{last_name}}
-
- {{position.position}}
-
-
-
-
{% if position.FactionID %}
-
- סיעה {{position.FactionName}}
-
+
+ {{position.FactionName}}
+
+ {% elif position.GovMinistryID %}
+
+ {{position.GovMinistryName}}
+
+ {% elif position.CommitteeID %}
+
+ {{position.CommitteeName}}
+
{% endif %}
-
- {% if position.GovMinistryID %}
-
- {% if position.GovMinistryID == 28 %}
- {{position.GovMinistryName}}
- {% else %}
- משרד ממשלתי : {{position.GovMinistryName}}
- {% endif %}
-
- {% endif %}
-
+
+
+ {{position.position}}
+ {% endif %}
{% endfor %}
-{% endblock %}
\ No newline at end of file
+{% endblock %}
+
+{% block data_sources %}
+ {{ super() }}
+ להלן נתוני המקור כפי שהם מופיעים במאגרי המידע של הכנסת באמצעות ממשק מידע פרלמנטרי זמין -
+ ח"כ
+ {% for field in source_member_schema.fields %}
+ {% if field.name == "positions" %}
+ positions:
+ {% for position in positions %}
+ - {{position}}
+ {% endfor %}
+ {% elif field.description and source_member_row[field.name] %}
+ {{field.description}}: {{source_member_row[field.name]}}
+ {% else %}
+ {{field.name}}: {{source_member_row[field.name]}}
+ {% endif %}
+ {% endfor %}
+{% endblock %}
diff --git a/templates/members_index.html b/templates/members_index.html
new file mode 100644
index 0000000..fe0e018
--- /dev/null
+++ b/templates/members_index.html
@@ -0,0 +1,27 @@
+{% extends "site_base.html" %}
+
+{% block breadcrumbs %}
+ ח"כים וסיעות
+{% endblock %}
+
+{% block divcontent %}
+
+{% endblock %}