# 47th Australian Parliament basic statistics
*Note* you will have needed to run the `download.sh` script to get the data.
## Average politician in the current parliament
Is a .... 52 Year old man

In [1]:
import sqlite3
import pathlib
import pandas as pd
import geopandas as gpd

root_data_dir = pathlib.Path("..").resolve() / "data"
ext_data_dir =root_data_dir / "external"

gpkg = root_data_dir / "aped.gpkg"
db_con = sqlite3.connect(gpkg)
ministers = pd.read_sql("SELECT * from minister", db_con)
divisions = gpd.read_file(ext_data_dir / "2021_ELB.gpkg", layer="2021_ELB")
# just double check that the divisions are in the same order as the ministers
ministers[ministers["is_representative"].astype("bool")].drop_duplicates("district")

Unnamed: 0,id,member,party,group,district,is_senator,is_representative,graduated,mp_id,start,wiki link,district_link
0,1,Aaron Violi,Coalition,http://www.wikidata.org/entity/Q1065320,Casey,0,1,1,300147,2022-05-21T00:00:00Z,http://www.wikidata.org/entity/Q112114544,http://www.wikidata.org/entity/Q2973490
1,2,Adam Bandt,Australian Greens,http://www.wikidata.org/entity/Q781486,Melbourne,0,1,1,M3C,2010-08-21T00:00:00Z,http://www.wikidata.org/entity/Q4678672,http://www.wikidata.org/entity/Q1635847
3,4,Alex Hawke,Coalition,http://www.wikidata.org/entity/Q1065320,Mitchell,0,1,1,HWO,2007-11-24T00:00:00Z,http://www.wikidata.org/entity/Q4717151,http://www.wikidata.org/entity/Q2973776
4,5,Alicia Payne,Australian Labor Party,http://www.wikidata.org/entity/Q216082,Division of Canberra,0,1,1,144732,2019-05-18T00:00:00Z,http://www.wikidata.org/entity/Q64223413,http://www.wikidata.org/entity/Q2973482
5,6,Alison Byrnes,Australian Labor Party,http://www.wikidata.org/entity/Q216082,Cunningham,0,1,1,299145,2022-05-21T00:00:00Z,http://www.wikidata.org/entity/Q111772495,http://www.wikidata.org/entity/Q182615
...,...,...,...,...,...,...,...,...,...,...,...,...
221,222,Zali Steggall,Independent,,Warringah,0,1,1,175696,2019-05-18T00:00:00Z,http://www.wikidata.org/entity/Q145103,http://www.wikidata.org/entity/Q1074025
222,223,Zaneta Mascarenhas,Australian Labor Party,http://www.wikidata.org/entity/Q216082,Swan,0,1,1,298800,2022-05-21T00:00:00Z,http://www.wikidata.org/entity/Q112106576,http://www.wikidata.org/entity/Q2973857
223,224,Zoe Daniel,,,Goldstein,0,1,1,008CH,2022-05-21T00:00:00Z,http://www.wikidata.org/entity/Q65628647,http://www.wikidata.org/entity/Q2973625
224,225,Zoe McKenzie,Coalition,http://www.wikidata.org/entity/Q1065320,Flinders,0,1,1,124514,2022-05-21T00:00:00Z,http://www.wikidata.org/entity/Q112114762,http://www.wikidata.org/entity/Q2973570


In [3]:
from utils import get_dob_gender_from_wikidata

df  = ministers["wiki link"].apply(get_dob_gender_from_wikidata)


In [None]:
ministers["dob"] = pd.to_datetime(df["dob"])
ministers["gender_ident"] = df["genderLabel"]


In [26]:
party_counts = ministers.groupby(["party", "gender_ident"]).size().reset_index(name='count')
party_counts

Unnamed: 0,party,gender_ident,count
0,Australian Greens,female,8
1,Australian Greens,male,7
2,Australian Labor Party,female,54
3,Australian Labor Party,male,49
4,Centre Alliance,female,1
5,Coalition,female,25
6,Coalition,male,63
7,Independent,female,5
8,Jacqui Lambie Network,female,2
9,Katter's Australian Party,male,1


In [36]:
# calculate ministers age using their date of birth grouped by party
from datetime import date

today = date.today()
ministers["age"] = ministers["dob"].apply(lambda x: today.year - x.year - ((today.month, today.day) < (x.month, x.day)))
ministers.groupby(["party"]).agg({"age": ["mean", "std"]})

Unnamed: 0_level_0,age,age
Unnamed: 0_level_1,mean,std
party,Unnamed: 1_level_2,Unnamed: 2_level_2
Australian Greens,49.846154,11.356688
Australian Labor Party,52.361702,9.109668
Centre Alliance,50.0,
Coalition,52.753086,8.127932
Independent,53.0,5.567764
Jacqui Lambie Network,52.0,0.0
Katter's Australian Party,77.0,
One Nation,67.5,0.707107
United Australia Party,39.0,


In [38]:
import plotly.express as px


fig = px.bar(party_counts, x="party", y="count", color="gender_ident", title="Gender breakdown of Australian politicians by party")
fig.show()

In [62]:
ministers_education = pd.read_sql("""
SELECT m.member, m.party, m.district, m.is_representative, m.is_senator, m.graduated,  e.school_name, a."School Sector", a.State, a."ABS Remoteness Area"
FROM minister m
JOIN minister_education me on m.id = me.minister_id
JOIN education e ON e.fid = me.education_id
JOIN education_acara ea on e.fid = ea.education_id
JOIN acara_school_locations_2022 a on ea.acara_id = a.fid
WHERE e.is_high_school = 1
ORDER BY m.id DESC
""", db_con
)
ministers_education.to_clipboard()

In [44]:
ministers_education["School Sector"].value_counts()

Government     120
Independent     75
Catholic        50
Name: School Sector, dtype: int64

In [61]:
# create a column for the school sector when grouping by member has attended a Government and a Independent or Catholic school then set  school sector to both if they have not attended either set  none
ministers_education["school_sector"] = ministers_education.groupby("member")["School Sector"].transform(lambda x: "Both" if "Government" in x.values and ("Independent" in x.values or  "Catholic" in x.values) else "none" if "Government" not in x.values and ("Independent" not in x.values and  "Catholic" not in x.values) else "Government" if "Government" in x.values else "Independent" if "Independent" in x.values else "Catholic")
school_sector_counts = ministers_education.drop_duplicates(["member", "school_sector"]).groupby([ "party", "school_sector"]).size().reset_index(name='count')
school_sector_counts

Unnamed: 0,school_sector,party,count
0,Both,Australian Greens,1
1,Both,Australian Labor Party,8
2,Both,Coalition,6
3,Both,One Nation,1
4,Catholic,Australian Labor Party,16
5,Catholic,Coalition,19
6,Catholic,Independent,3
7,Catholic,United Australia Party,1
8,Government,Australian Greens,8
9,Government,Australian Labor Party,48


In [60]:
# create a plot of ministers school sector type by party

school_sector_counts = ministers_education.drop_duplicates(["member", "school_sector"]).groupby(["school_sector", "party",]).size().reset_index(name='count')
fig = px.bar(school_sector_counts, x="party", y="count", color="school_sector", title="School sector breakdown of Australian politicians by party")
fig.show()

ministers

In [65]:
school_sector_counts = ministers_education.drop_duplicates(["member", "school_sector"]).groupby([ "party", "school_sector"]).size().reset_index(name='count')
# fig = px.bar(school_sector_counts, x="school_sector", y="count", color="party", title="School sector breakdown of Australian politicians by party")
# fig.show()

KeyError: Index(['school_sector'], dtype='object')