In [25]:
import pandas as pd, math, time, json

choices = pd.read_csv(
    "2022_choices.csv",
    index_col="code"
)
choices.drop(["E", "T", "U"], inplace=True)

votes = pd.read_csv(
    "2022_votes.csv",
    usecols=[1, 2],
    index_col=["voter id"]
)
votes["prefs"] = votes["prefs"].str.replace("[ETU]", "", regex=True)
votes = votes.sample(frac=1)

for col in ["first preferences", "Gallagher (elected 1st)", "Seselja (elected 2nd)", "Goreng Goreng", "exhausted"]:
    choices.loc[:, col] = 0

def sort_prefs(prefs):
    new_prefs = []    
    for x in prefs:
        if choices.at[x, "btl"]:
            new_prefs.append(x)
        else:
            new_prefs.append(choices[(choices["candidateName"] == choices.at[x, "candidateName"]) & (choices["btl"])].index[0])            
    return "".join(new_prefs)

def distribute(prefs):
    prefs = "".join([x for x in prefs if x in "RZ"])
    if len(prefs) == 0:
        return [0, 0, 1]
    pref = prefs[0]
    if pref == "R":
        return [1, 0, 0]
    else:
        return [0, 1, 0]

start = time.time()
interval = 2
print("sorting votes ...", end="\r")
for i, voter in enumerate(votes.index):
    votes.at[voter, "prefs"] = sort_prefs(votes.at[voter, "prefs"])
    if time.time() - interval > start:
        interval = interval + 2
        print(f"sorting votes ... {(i + 1) / len(votes):.1%}", end="\r")
print("sorting votes ... complete")
choices = choices.loc["L":].drop("btl", axis=1)

start = time.time()
interval = 2
print("analysing votes ...", end="\r")
for i, voter in enumerate(votes.index):
    prefs = votes.at[voter, "prefs"]
    if len(prefs) > 0:
        first = prefs[0]
        dist = distribute(prefs)
        choices.at[first, "first preferences"] = choices.at[first, "first preferences"] + 1
        choices.loc[first, "Seselja (elected 2nd)":] = choices.loc[first, "Seselja (elected 2nd)":] + dist
    if time.time() - interval > start:
        interval = interval + 2
        print(f"analysing votes ... {(i + 1) / len(votes):.1%}", end="\r")
print("analysing votes ... complete")

total_votes = choices["first preferences"].sum()
quota = math.ceil(total_votes / 3) + 1
print(f"votes: {total_votes:,.0f}")
print(f"quota: {quota:,.0f}")

weight = (choices.at["L", "first preferences"] - quota) / choices.at["L", "first preferences"]
choices.at["L", "Gallagher (elected 1st)"] = quota
choices.loc["L", "Seselja (elected 2nd)":] = choices.loc["L", "Seselja (elected 2nd)":] * weight
choices.to_csv("2022_results_nocock.csv")

choices_pc = choices.copy()
choices_pc.loc[:, "Gallagher (elected 1st)":] = choices_pc.loc[:, "Gallagher (elected 1st)":] / total_votes
choices_pc.to_csv("2022_results_nocock_pc.csv")
choices_pc.loc[:, "Gallagher (elected 1st)":].sum()

sorting votes ... complete
analysing votes ... complete
votes: 285,032
quota: 95,012


Gallagher (elected 1st)    0.333338
Seselja (elected 2nd)      0.339599
Goreng Goreng              0.305259
exhausted                  0.021804
dtype: float64