In [29]:
import pandas as pd
import time
pd.options.mode.chained_assignment = None

def create_party_ids(label, df):
    ids = []
    start = time.time()
    interval = 1
    elapsed = interval
    for i, index in enumerate(df.index):
        id = str(df.loc[index, "ecode"]) + "-" + str(df.loc[index, "pcode"])
        ids.append(id)
        if (time.time() - elapsed) > start:
            print(f"formatting {label} data ... {(i + 1) / len(df):.1%}", end="\r")
            elapsed = elapsed + interval
    new_df = df
    new_df["id"] = ids
    new_df.set_index("id", inplace=True)
    return new_df

def create_ids(label, df):
    new_df = df    
    ids = []
    start = time.time()
    interval = 1
    elapsed = interval
    for i, index in enumerate(df.index):
        id = str(df.loc[index, "ecode"]) + "-" + str(df.loc[index, "pcode"]) + "-" + str(df.loc[index, "ccode"])
        ids.append(id)
        if (time.time() - elapsed) > start:
            print(f"formatting {label} data ... {(i + 1) / len(df):.1%}", end="\r")
            elapsed = elapsed + interval
    new_df["id"] = ids    
    return new_df

def format_candidates(df):
    new_df = df
    party_list = []
    for candidate in df.index:
        party = str(df.loc[candidate, "ecode"]) + "-" + str(df.loc[candidate, "ccode"])
        party_list.append(parties.loc[party, "pabbrev"])    
    new_df["party"] = party_list
    new_df["cname"] = df["cname"].apply(lambda x: x.split(", ")[1] + " " + x.split(", ")[0])
    return new_df    

# BEGIN PROGRAM

print("2020 ACT ELECTION DATA FORMATTER")
print("\u00a9 Markus Mannheim (ABC Canberra)")

# read in parameters
print("\nformatting party data ...", end="\r")
parties = pd.read_csv("./data/Groups.txt")
parties = create_party_ids("party", parties)
print("formatting party data ... complete")
print("formatting candidate data ...", end="\r")
candidates = pd.read_csv("./data/Candidates.txt")
candidates = create_ids("candidate", candidates)
candidates.set_index("id", inplace=True)
candidates = format_candidates(candidates)
print("formatting candidate data ... complete")
print("formatting electorate data ...", end="\r")
electorates = pd.read_csv("./data/Electorates.txt", index_col="ecode")
print("formatting electorate data ... complete")

# read in votes
print("formatting ballot data ...", end="\r")
ballots = pd.DataFrame(columns=["pindex", "pref", "pcode", "ccode", "ecode"])
for i, electorate in enumerate(electorates.electorate):
    add_ballots = pd.read_csv(f"./data/{electorate}Total.txt", usecols=["pindex", "pref", "pcode", "ccode"])
    add_ballots["ecode"] = i + 1
    ballots = ballots.append(add_ballots)
ballots = create_ids("ballot", ballots[:100000])
print("formatting ballot data ... complete")

# read in votes
print("\ncreating individual vote files ...", end="\r")
vote_files = pd.DataFrame(index=ballots.pindex.unique(), columns=["votes", "pref", "value"])
start = time.time()
interval = 1
elapsed = interval
for i, vote in enumerate(vote_files.index):
    data = ballots[ballots.pindex == vote].sort_values("pref")
    votes = []
    for j in data.index:
        votes.append(data.loc[j, "id"])
    vote_files.loc[vote, "votes"] = votes
    vote_files.loc[vote, "pref"] = 0
    vote_files.loc[vote, "value"] = 1
    if (time.time() - start) > elapsed:
        print(f"creating individual vote files ... {(i + 1) / len(vote_files):.1%}", end="\r")
        elapsed = elapsed + interval
print("creating individual vote files ... complete")

# save data
print("\nsaving data ...", end="\r")
candidates.to_csv("./data/candidates.csv", index_label="id")
parties.to_csv("./data/parties.csv", index_label="id")
vote_files.to_csv("./data/votes.csv", index_label="id")
print("saving data ... complete")

2020 ACT ELECTION DATA FORMATTER
© Markus Mannheim (ABC Canberra)

formatting party data ... complete
formatting candidate data ... complete
formatting electorate data ... complete
formatting ballot data ... complete

creating individual vote files ... complete

saving data ... complete


In [27]:
candidates

Unnamed: 0_level_0,ecode,pcode,ccode,cname,party
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1-1-1,1,1,1,Cathy DAY,ALP
1-1-2,1,1,2,Taimus WERNER-GIBBINGS,AJP
1-1-3,1,1,3,Joy BURCH,GREEN
1-1-4,1,1,4,Mick GENTLEMAN,LIB
1-1-5,1,1,5,Brendan FORDE,LDP
...,...,...,...,...,...
5-8-4,5,8,4,Georgia PHILLIPS,SUST
5-8-5,5,8,5,Michael PETTERSSON,PROG
5-9-1,5,9,1,Helen CROSS,DLP
5-9-2,5,9,2,Fuxin LI,LIB
