In [5]:
import pandas as pd
import time

# functions
def get_electorate():
    ''' select electorate from text menu '''       
    valid_response = True
    while valid_response:
        try:
            print("\nWhich electorate do you wish to analyse?")
            for i, electorate in enumerate(electorates.electorate):
                print(f"{i + 1}. {electorate}")
            print("6. quit program")
            text = input("Enter a number from 1 to 6:")            
            text = int(text)
            if (text > 0) and (text < 7):
                valid_response = True
                return text
            else:
                print("That's not a valid choice.")
        except:
            print("That's not a valid choice.")

def create_ids(label, df):
    ''' creates unique IDs and sets them as the dataframe index '''
    start_time = time.time()
    time_interval = 2
    time_total = time_interval
    ids = []
    for i, indice in enumerate(df.index):
        ids.append(f"{df.loc[indice].pcode}-{df.loc[indice].ccode}")
        if (time.time() - start_time) > time_total:
            print(f"creating {label} IDs ... {(i + 1) / len(df):.1%}", end="\r")
            time_total = time_total + time_interval
    df["id"] = ids
    print(f"creating {label} IDs ... complete")
    return df
            
def format_candidates(candidate_data, party_data):
    ''' format the candidates dataframe '''
    formatted_data = candidate_data[["cname", "pcode", "ccode"]]
    formatted_data["votes"] = 0
    formatted_data["primary"] = 0
    formatted_data["cname"] = candidate_data["cname"].apply(lambda x: x.split(", ")[1] + " " + x.split(", ")[0])
    formatted_data["party"] = formatted_data["pcode"].apply(lambda x: party_data.loc[x].pabbrev)
    formatted_data = create_ids("candidate", formatted_data)
    formatted_data.set_index("id", inplace=True)
    formatted_data = formatted_data[["cname", "party", "primary", "votes"]]
    return formatted_data

def create_voters(ballot_data):
    ''' creates data for invidual votes '''
    voters = pd.DataFrame(index=ballot_data.pindex.unique(), columns=["votes", "pref", "value"])
    start_time = time.time()
    time_interval = 2
    time_total = time_interval
    for i, indice in enumerate(voters.index):
        data = ballot_data[ballot_data.pindex == indice].sort_values("pref")
        votes = []
        for j in data.index:
            votes.append(data.loc[j].id)
        voters.loc[indice].votes = votes
        voters.loc[indice].pref = 0
        voters.loc[indice].value = 1
        if (time.time() - start_time) > time_total:
            print(f"creating voter files ... {(i + 1) / len(voters.index):.1%}", end="\r")
            time_total = time_total + time_interval
    print(f"creating voter files ... complete")
    return voters

# BEGIN PROGRAM

# if __name__ == "__main__":
print("2020 ACT ELECTION SIMULATOR")
print("\u00a9 Markus Mannheim (ABC Canberra)")

# read in parameters
print("\nestablishing databases ...", end=" ")
parties = pd.read_csv("./data/Groups.txt")
candidates = pd.read_csv("./data/Candidates.txt")
electorates = pd.read_csv("./data/Electorates.txt", index_col="ecode")
print("complete")

# read in votes
print("reading in ballot papers ...", end=" ")
ballots = pd.DataFrame(columns=["pindex", "pref", "pcode", "ccode", "ecode"])
for i, electorate in enumerate(electorates.electorate):
    add_ballots = pd.read_csv(f"./data/{electorate}Total.txt", usecols=["pindex", "pref", "pcode", "ccode"])
    add_ballots["ecode"] = i + 1
    ballots = ballots.append(add_ballots)
print("complete")

# begin cycle
while True:
    electorate = get_electorate()

    # user wants to quit
    if electorate == 6:
        break

    # filter data to chosen electorate
    print(f"\nfiltering {electorates.electorate[electorate]} data ...", end=" ")
    active_ballots = ballots[ballots["ecode"] == electorate]
    active_parties = parties[parties["ecode"] == electorate].set_index("pcode")
    active_candidates = candidates[candidates["ecode"] == electorate]
    print("complete")
    
    # creat sample data to limit speed up calculation
    print(f"sampling data for analysis...", end=" ")
    sample_ballots = active_ballots.sample(frac=.05, random_state=2)
    print("complete")
    
    # format candidates, ballots and voters
    active_candidates = format_candidates(active_candidates, active_parties)
    sample_ballots = create_ids("ballot", sample_ballots)[["pindex", "pref", "pcode", "id"]]
    voters = create_voters(sample_ballots)
    
    print()
    print(voters)
    
    # quota = len(voters) / 6 + 1
    # print(f"\nquota established: {quota:.1f}")


# exit program
print("\nEnjoy your day.")
# exit()

2020 ACT ELECTION SIMULATOR
Â© Markus Mannheim (ABC Canberra)

establishing databases ... complete
reading in ballot papers ... complete

Which electorate do you wish to analyse?
1. Brindabella
2. Ginninderra
3. Kurrajong
4. Murrumbidgee
5. Yerrabi
6. quit program


Enter a number from 1 to 6: 3



filtering Kurrajong data ... complete
sampling data for analysis... complete
creating candidate IDs ... complete
creating ballot IDs ... complete
creating voter files ... complete

                          votes pref value
682085                    [3-2]    0     1
867179                    [2-1]    0     1
287881               [8-4, 2-5]    0     1
463744512                 [2-4]    0     1
914664     [3-2, 8-4, 8-5, 8-1]    0     1
...                         ...  ...   ...
968720350                 [4-1]    0     1
458326                    [8-2]    0     1
982056                    [8-1]    0     1
741529                    [2-5]    0     1
852140                    [9-2]    0     1

[14489 rows x 3 columns]

Which electorate do you wish to analyse?
1. Brindabella
2. Ginninderra
3. Kurrajong
4. Murrumbidgee
5. Yerrabi
6. quit program


Enter a number from 1 to 6: 6



Enjoy your day.
