In [285]:
import pandas as pd, numpy as np, math, time

candidates = pd.read_csv("./candidates.csv", index_col="code") 
candidates["party"] = candidates["party"].apply(lambda x: x.replace("Pesec", "Anthony Pesec").replace("UAP", "United Australia").replace("Anning", "Fraser Anning").replace("Sustainable", "Sustainable Australia"))
voters = pd.read_csv("./voters.csv", index_col="voter_id")
results = candidates.loc["H":, :"candidate"]
results["first"] = 0
results["Katy Gallagher"] = 0
results["Zed Seselja"] = 0
results["Penny Kyburz"] = 0
results["exhausted votes"] = 0
sample = voters.sample(frac=1)
quota = math.ceil(len(sample) / 3) + 1

# first preference votes
start = time.time()
interval = 1
print("sorting first preference votes ...", end="\r")
for i, vote in enumerate(sample.index):
    pref = sample.at[vote, "prefs"][0]
    # below the line votes
    if candidates.at[pref, "type"] == "below":
        results.at[pref, "first"] = results.at[pref, "first"] + 1
    # above the line votes
    else:
        code = results[results["party"] == candidates.at[pref, "party"]].reset_index().iat[0, 0]
        results.at[code, "first"] = results.at[code, "first"] + 1
    if time.time() - interval > start:
        interval = interval + 1
        print(f"sorting first preference votes ... {(i + 1) / len(sample):.1%}", end="\r")
print("sorting first preference votes ... complete")

# surplus weight
weight = (results.at["T", "first"] - quota) / results.at["T", "first"]
results.at["T", "Katy Gallagher"] = quota
results.at["H", "Zed Seselja"] = results.at["H", "first"]
results.at["J", "Penny Kyburz"] = results.at["J", "first"]

def distribute(first, prefs):
    # exports a list of votes [seselja, kyburz, exhausted]
    if first in "ABHJ":
        return [0, 0, 0]
    if len(prefs) == 1:
        return [0, 0, 1]
    prefs = prefs[0] + prefs[1:].replace("A", "H").replace("B", "J")
    if ("H" not in prefs) & ("J" not in prefs):
        return [0, 0, 1]
    if "H" not in prefs:
        return [0, 1, 0]
    if "J" not in prefs:
        return [1, 0, 0]
    if prefs.index("H") < prefs.index("J"):
        return [1, 0, 0]
    else:
        return [0, 1, 0]    

# vote distribution
start = time.time()
interval = 1
print("distributing votes ...", end="\r")
for i, vote in enumerate(sample.index):
    prefs = sample.at[vote, "prefs"]
    first = prefs[0]    
    dist = distribute(first, prefs)
    if candidates.at[first, "type"] == "above":
        first = results[results["party"] == candidates.at[first, "party"]].reset_index().iat[0, 0]
    results.at[first, "Zed Seselja"] = results.at[first, "Zed Seselja"] + dist[0] if first != "T" else results.at[first, "Zed Seselja"] + dist[0] * weight
    results.at[first, "Penny Kyburz"] = results.at[first, "Penny Kyburz"] + dist[1] if first != "T" else results.at[first, "Penny Kyburz"] + dist[1] * weight
    results.at[first, "exhausted votes"] = results.at[first, "exhausted votes"] + dist[2] if first != "T" else results.at[first, "exhausted votes"] + dist[2] * weight
    
    if time.time() - interval > start:
        interval = interval + 1
        print(f"distributing votes ... {(i + 1) / len(sample):.1%}", end="\r")
print("distributing votes ... complete")

results

sorting first preference votes ... complete
distributing votes ... complete


Unnamed: 0_level_0,party,candidate,first,Katy Gallagher,Zed Seselja,Penny Kyburz,exhausted votes
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
H,Liberals,SESELJA Zed,84846,0,84846.0,0.0,0.0
I,Liberals,GUNNING Robert,2650,0,1812.0,700.0,138.0
J,Greens,KYBURZ Penny,45916,0,0.0,45916.0,0.0
K,Greens,DAVIDSON Emma,1978,0,23.0,1953.0,2.0
L,Anthony Pesec,PESEC Anthony,12359,0,4000.0,7959.0,400.0
M,Anthony Pesec,KENT Gary,212,0,71.0,133.0,8.0
N,Fraser Anning,VAN DUREN Shane,2392,0,1843.0,474.0,75.0
O,Fraser Anning,BIRKETT Scott,72,0,51.0,16.0,5.0
P,United Australia,WALTER Peter,5985,0,3948.0,1860.0,177.0
Q,United Australia,HODGSON Rebecah Elen,160,0,105.0,45.0,10.0


In [286]:
party_results = results.groupby("party").sum()
party_results = pd.concat([
    party_results.iloc[:-1, :].sort_values("first", ascending=False),
    party_results.iloc[-1:, :]
])
results.to_csv("./candidate_results.csv")
party_results.to_csv("./party_results.csv")
party_results

Unnamed: 0_level_0,first,Katy Gallagher,Zed Seselja,Penny Kyburz,exhausted votes
party,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Labor,106351,90078,2814.022663,13171.648844,287.328493
Liberals,87496,0,86658.0,700.0,138.0
Greens,47894,0,23.0,47869.0,2.0
Anthony Pesec,12571,0,4071.0,8092.0,408.0
United Australia,6145,0,4053.0,1905.0,187.0
Sustainable Australia,4457,0,1411.0,2965.0,81.0
Fraser Anning,2464,0,1894.0,490.0,80.0
ungrouped,2853,0,1481.0,1256.0,116.0


In [287]:
import json

sankey_data = {
    "nodes": [],
    "links": []
}

for i, party in enumerate(party_results.index):
    sankey_data["nodes"].append({
        "node": int(i),
        "name": party,
        "party": party
    })
for i, preference in enumerate(party_results.columns[1:]):
    sankey_data["nodes"].append({
        "node": int(i + len(party_results)),
        "name": preference,
        "party": (
            "Labor" if i == 0 else
            "Liberals" if i == 1 else
            "Greens" if i == 2 else "exhausted"
        )
    })
for i, party in enumerate(party_results.index):
    for j in range(4):
        if party_results.iat[i, j + 1] > 0:
            sankey_data["links"].append({
                "source": int(i),
                "target": int(j + len(party_results)),
                "value": party_results.iat[i, j + 1]
            })

class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        if isinstance(obj, np.floating):
            return float(obj)
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return super(NpEncoder, self).default(obj)

with open("./sankey_data.json", "w") as file:
    file.write(json.dumps(sankey_data, cls=NpEncoder))

In [288]:
party_results

Unnamed: 0_level_0,first,Katy Gallagher,Zed Seselja,Penny Kyburz,exhausted votes
party,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Labor,106351,90078,2814.022663,13171.648844,287.328493
Liberals,87496,0,86658.0,700.0,138.0
Greens,47894,0,23.0,47869.0,2.0
Anthony Pesec,12571,0,4071.0,8092.0,408.0
United Australia,6145,0,4053.0,1905.0,187.0
Sustainable Australia,4457,0,1411.0,2965.0,81.0
Fraser Anning,2464,0,1894.0,490.0,80.0
ungrouped,2853,0,1481.0,1256.0,116.0


In [302]:
party_results.iloc[:, 1].sum() / party_results.iloc[:, 1:].sum().sum()
party_results.iloc[:, 2].sum() / party_results.iloc[:, 1:].sum().sum()
party_results.iloc[:, 3].sum() / party_results.iloc[:, 1:].sum().sum()
# party_results.iloc[:, 4].sum() / party_results.iloc[:, 1:].sum().sum()

0.28290110625537435