In [1]:
import pandas as pd, math, time, json

choices = pd.read_csv(
    "2022_choices.csv",
    index_col="code"
)

votes = pd.read_csv(
    "2022_votes.csv",
    usecols=[1, 2],
    index_col=["voter id"]
)
votes = votes.sample(frac=1)

for col in ["first preferences", "Gallagher (elected)", "Gallagher (quota)", "Seselja", "Pocock", "exhausted"]:
    choices.loc[:, col] = 0

def sort_prefs(prefs):
    new_prefs = []    
    for x in prefs:
        if choices.at[x, "btl"]:
            new_prefs.append(x)
        else:
            new_prefs.append(choices[(choices["candidateName"] == choices.at[x, "candidateName"]) & (choices["btl"])].index[0])            
    return "".join(new_prefs)

def distribute(prefs):
    prefs = "".join([x for x in prefs if x in "LRT"])
    if len(prefs) == 0:
        return [0, 0, 0, 0, 1]
    pref = prefs[0]
    if pref == "L":
        prefs = "".join([x for x in prefs if x in "RT"])        
        if len(prefs) == 0:
            choices.at["L", "exhausted"] = choices.at["L", "exhausted"] + 1
            return [1, 0, 0, 0, 0]
        pref = prefs[0]
        if pref == "R":
            choices.at["L", "Seselja"] = choices.at["L", "Seselja"] + 1
            return [1, 0, 0, 0, 0]
        else:
            choices.at["L", "Pocock"] = choices.at["L", "Pocock"] + 1
            return [1, 0, 0, 0, 0]        
    elif pref == "R":
        return [0, 0, 1, 0, 0]
    else:
        return [0, 0, 0, 1, 0]

start = time.time()
interval = 2
print("sorting votes ...", end="\r")
for i, voter in enumerate(votes.index):
    votes.at[voter, "prefs"] = sort_prefs(votes.at[voter, "prefs"])
    if time.time() - interval > start:
        interval = interval + 2
        print(f"sorting votes ... {(i + 1) / len(votes):.1%}", end="\r")
print("sorting votes ... complete")
choices = choices.loc["L":].drop("btl", axis=1)

start = time.time()
interval = 2
print("analysing votes ...", end="\r")
for i, voter in enumerate(votes.index):
    prefs = votes.at[voter, "prefs"]
    first = prefs[0]
    dist = distribute(prefs)
    choices.at[first, "first preferences"] = choices.at[first, "first preferences"] + 1
    choices.loc[first, "Gallagher (elected)":] = choices.loc[first, "Gallagher (elected)":] + dist
    if time.time() - interval > start:
        interval = interval + 2
        print(f"analysing votes ... {(i + 1) / len(votes):.1%}", end="\r")
print("analysing votes ... complete")

total_votes = choices["first preferences"].sum()
quota = math.ceil(total_votes / 3) + 1
print(f"votes: {total_votes:,.0f}")
print(f"quota: {quota:,.0f}")

weight = (choices["Gallagher (elected)"].sum() - quota) / choices["Gallagher (elected)"].sum()
choices.at["L", "Gallagher (quota)"] = quota
choices.loc["L", "Seselja":] = choices.loc["L", "Seselja":] * weight
choices.to_csv("2022_results.csv")

choices_pc = choices.copy()
choices_pc.loc[:, "Gallagher (elected)":] = choices_pc.loc[:, "Gallagher (elected)":] / total_votes
choices_pc.to_csv("2022_results_pc.csv")
choices_pc.loc[:, "Gallagher (elected)":].sum()

sorting votes ... complete
analysing votes ... complete
votes: 285,217
quota: 95,074


Gallagher (elected)    0.407115
Gallagher (quota)      0.333339
Seselja                0.286047
Pocock                 0.369139
exhausted              0.011475
dtype: float64