# Basic Analysis

We are using KeplerGL
```
jupyter nbextension install --py --sys-prefix keplergl # can be skipped for notebook 5.3 and above
jupyter nbextension enable --py --sys-prefix keplergl # can be skipped for notebook 5.3 and above
```

In [5]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [6]:
import geopandas as gpd
import pathlib
import sqlite3
import pandas as pd

data_dir = pathlib.Path("..").resolve() / "data"
db_con = sqlite3.connect(str(data_dir / "ppm.gpkg"))
# Create connection and load spatialite extension
df = pd.read_sql('SELECT * FROM "ministers" AS "a" JOIN "education" AS "b" ON ("a"."school name" = "b"."school_name");', db_con)
# df = gpd.read_postgis('SELECT * FROM "ministers" AS "a" JOIN "education" AS "b" ON ("a"."school name" = "b"."school_name");', db_con, geom_col="geom")
geometry = gpd.GeoSeries.from_wkt(df.wkt)
df["Longitude"] =geometry.x
df["Latitude" ] =geometry.y
# gdf = gpd.GeoDataFrame(df, geometry=geometry, crs=4326)
# gdf.drop(columns=["geom"], inplace=True)
df.drop(columns=["geom", "wkt"], inplace=True)

In [7]:
from keplergl import KeplerGl
map = KeplerGl(height=1600)
map.add_data(data=df, name="schools")
map

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


  return df.to_dict('split')
Out of range float values are not JSON compliant
Supporting this message is deprecated in jupyter-client 7, please make sure your message is JSON-compliant
  content = self.pack(content)


KeplerGl(data={'schools': {'index': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,…

In [11]:
df_missing_high_school = pd.read_sql('SELECT * FROM "ministers" AS "a" LEFT OUTER JOIN (SELECT * FROM "education" WHERE "is_high_school" = 1) AS "b" ON ("a"."school name" = "b"."school_name") ;', db_con)

In [12]:
members_with_no_hs = df_missing_high_school[~df_missing_high_school.member.isin(df_missing_high_school [df_missing_high_school.is_high_school > 0]["member"])].sort_values(by=["member"])

In [13]:
members_with_no_hs

Unnamed: 0,fid,rowid,index,wiki link,schoollink,school name,start,district_link,group,member,...,geom,ogc_fid,school_name,school_link,is_university,is_high_school,is_alt_edu,operational_status,wkt,is_public
356,358,359,97,http://www.wikidata.org/entity/Q16732352,http://www.wikidata.org/entity/Q15574,University of Adelaide,2020-02-06T00:00:00Z,http://www.wikidata.org/entity/Q56649110,http://www.wikidata.org/entity/Q1065320,Andrew McLachlan,...,,,,,,,,,,
358,360,361,99,http://www.wikidata.org/entity/Q16732352,http://www.wikidata.org/entity/Q4824219,Australian Graduate School of Management,2020-02-06T00:00:00Z,http://www.wikidata.org/entity/Q56649110,http://www.wikidata.org/entity/Q1065320,Andrew McLachlan,...,,,,,,,,,,
357,359,360,98,http://www.wikidata.org/entity/Q16732352,http://www.wikidata.org/entity/Q160302,University of Edinburgh,2020-02-06T00:00:00Z,http://www.wikidata.org/entity/Q56649110,http://www.wikidata.org/entity/Q1065320,Andrew McLachlan,...,,,,,,,,,,
141,142,142,141,http://www.wikidata.org/entity/Q25756194,http://www.wikidata.org/entity/Q1144750,Queensland University of Technology,2016-07-02T00:00:00Z,http://www.wikidata.org/entity/Q2973569,http://www.wikidata.org/entity/Q1065320,Andrew Wallace,...,,,,,,,,,,
174,176,176,175,http://www.wikidata.org/entity/Q64216519,http://www.wikidata.org/entity/Q127990,Australian National University,2019-05-18T00:00:00Z,http://www.wikidata.org/entity/Q2973737,http://www.wikidata.org/entity/Q1065320,Anne Webster,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80,81,81,80,http://www.wikidata.org/entity/Q7970178,,Unknown,2010-08-21T00:00:00Z,http://www.wikidata.org/entity/Q550273,http://www.wikidata.org/entity/Q1065320,Warren Entsch,...,,,,,,,,,,
326,328,329,67,http://www.wikidata.org/entity/Q61983633,,Unknown,2019-03-06T00:00:00Z,http://www.wikidata.org/entity/Q56649108,http://www.wikidata.org/entity/Q1065320,Wendy Askew,...,,,,,,,,,,
224,226,226,225,http://www.wikidata.org/entity/Q112106576,http://www.wikidata.org/entity/Q1145497,Curtin University,2022-05-21T00:00:00Z,http://www.wikidata.org/entity/Q2973857,http://www.wikidata.org/entity/Q216082,Zaneta Mascarenhas,...,,,,,,,,,,
241,243,243,242,http://www.wikidata.org/entity/Q65628647,,University of South Australia,2022-05-21T00:00:00Z,http://www.wikidata.org/entity/Q2973625,,Zoe Daniel,...,,,,,,,,,,


# Get MP IDs
The parliamentary handbook has secondary school if you know MP Ids.
Lets fetch them

In [1]:
# Add mp id to all ministers
import geopandas as gpd
import pathlib
import sqlite3
import pandas as pd

data_dir = pathlib.Path("..").resolve() / "data"
db_con = sqlite3.connect(str(data_dir / "ppm.gpkg"))
# Create connection and load spatialite extension
df = pd.read_sql('SELECT * FROM "ministers"', db_con)
from pollypedagogy.utils import *
ph_ids = df["wiki link"].drop_duplicates().apply(get_ph_id_from_wikidata)
ph_ids

0         HX4
1         R36
4         83M
6       00AMR
7       00AMT
        ...  
371    300639
372    298839
373    300707
374    300644
375    300706
Name: wiki link, Length: 225, dtype: object

In [8]:
wiki_links = pd.DataFrame(df["wiki link"].drop_duplicates())


In [9]:
wiki_links["mp_ids"] = ph_ids
wiki_links

Unnamed: 0,wiki link,mp_ids
0,http://www.wikidata.org/entity/Q4932983,HX4
1,http://www.wikidata.org/entity/Q335697,R36
4,http://www.wikidata.org/entity/Q7684036,83M
6,http://www.wikidata.org/entity/Q5052776,00AMR
7,http://www.wikidata.org/entity/Q6761645,00AMT
...,...,...
371,http://www.wikidata.org/entity/Q112129143,300639
372,http://www.wikidata.org/entity/Q112149315,298839
373,http://www.wikidata.org/entity/Q112152550,300707
374,http://www.wikidata.org/entity/Q112581963,300644


In [11]:
ministers = df.merge(wiki_links, how="left")

In [12]:
ministers.to_sql("ministers", db_con, if_exists="replace", index=False)

441