In [49]:
import pandas as pd, time, numpy as np

candidates = pd.read_csv(
    "https://results.aec.gov.au/24310/Website/Downloads/SenateCandidatesDownload-24310.txt",
    sep="\t",
    header=1,
    usecols=[0, 1, 2, 3, 4, 5]
)
candidates["CandidateNm"] = candidates["Surname"] + " " + candidates["GivenNm"]
candidates = candidates[candidates["StateAb"] == "ACT"][["PartyAb", "PartyNm", "CandidateID", "CandidateNm"]]
candidates = candidates.replace(np.nan, "Anthony Pesec")

ballot_papers = pd.read_csv(
    "act_senate_2019.csv",
    usecols=[i for i in range(3, 30)]
)

vote_options = [x.split(":") if x != "C:" else ["C", "Anthony Pesec"] for x in ballot_papers.columns[3:]]

choices = pd.DataFrame()
choices.index.name = "code"

for i, option in enumerate(vote_options):
    letter, name = option    
    code = chr(65 + i) if i < 26 else chr(71 + i)
    if name in candidates["PartyNm"].tolist():
        choices.at[code, "PartyNm"] = candidates[candidates["PartyNm"] == name].iat[0, 1]
        choices.at[code, "PartyAb"] = candidates[candidates["PartyNm"] == name].iat[0, 0]
        choices.at[code, "btl"] = False
        choices.at[code, "candidateName"] = [x for x in vote_options if x[0] == letter][1][1]
    else:
        choices.at[code, "PartyNm"] = candidates[candidates["CandidateNm"] == name].iat[0, 1]
        choices.at[code, "PartyAb"] = candidates[candidates["CandidateNm"] == name].iat[0, 0]
        choices.at[code, "btl"] = True
        choices.at[code, "candidateName"] = candidates[candidates["CandidateNm"] == name].iat[0, 3]        
choices.to_csv("2019_choices.csv")

start = time.time()
interval = 2
votes = pd.DataFrame()
print("cleaning ballot papers ...", end="\r")
ballot_papers.columns = ballot_papers.columns[:3].tolist() + choices.index.tolist()
for i in ballot_papers.index:
    prefs = "".join(ballot_papers.iloc[i, 3:].sort_values().dropna().index)
    votes.at[i, "voter id"] = str(ballot_papers.iat[i, 0]).zfill(3) + str(ballot_papers.iat[i, 1]).zfill(3) + str(ballot_papers.iat[i, 2]).zfill(2)
    votes.at[i, "prefs"] = prefs
    if time.time() - interval > start:
        interval = interval + 2
        print(f"cleaning ballot papers ... {(i + 1) / len(ballot_papers):.1%}", end="\r")
print("cleaning ballot papers ... complete")
votes.to_csv("2019_votes.csv")

cleaning ballot papers ... complete
