In [2]:
import pandas as pd
import time
pd.options.mode.chained_assignment = None

# functions
def get_electorate():
    ''' select electorate from text menu '''       
    valid_response = True
    while valid_response:
        try:
            print("\nWhich electorate do you wish to analyse?")
            for i, electorate in enumerate(electorates.electorate):
                print(f"{i + 1}. {electorate}")
            print("6. quit program")
            text = input("Enter a number from 1 to 6:")            
            text = int(text)
            if (text > 0) and (text < 7):
                valid_response = True
                return text
            else:
                print("That's not a valid choice.")
        except:
            print("That's not a valid choice.")

def create_ids(label, df):
    ''' creates unique IDs and sets them as the dataframe index '''
    start_time = time.time()
    time_total = time_interval
    ids = []
    for i, indice in enumerate(df.index):
        ids.append(f"{df.loc[indice].pcode}-{df.loc[indice].ccode}")
        if (time.time() - start_time) > time_total:
            print(f"creating {label} IDs ... {(i + 1) / len(df):.1%}", end="\r")
            time_total = time_total + time_interval
    print(f"creating {label} IDs ... complete")    
    new_df = df
    new_df["id"] = ids
    return new_df

def format_candidates(candidate_data, party_data):
    ''' format the candidates dataframe '''
    formatted_data = candidate_data[["cname", "pcode", "ccode"]]
    formatted_data["votes"] = 0
    formatted_data["primary"] = 0
    formatted_data["cname"] = candidate_data["cname"].apply(lambda x: x.split(", ")[1] + " " + x.split(", ")[0])
    formatted_data["party"] = formatted_data["pcode"].apply(lambda x: party_data.loc[x].pabbrev)
    formatted_data = create_ids("candidate", formatted_data)
    formatted_data.set_index("id", inplace=True)
    formatted_data = formatted_data[["cname", "party", "primary", "votes"]]
    return formatted_data

def create_voters(ballot_data):
    ''' creates data for invidual votes '''
    voters = pd.DataFrame(index=ballot_data.pindex.unique(), columns=["votes", "pref", "value"])
    start_time = time.time()
    time_total = time_interval
    for i, indice in enumerate(voters.index):
        data = ballot_data[ballot_data.pindex == indice].sort_values("pref")
        votes = []
        for j in data.index:
            votes.append(data.loc[j].id)
        voters.loc[indice].votes = votes
        voters.loc[indice].pref = 0
        voters.loc[indice].value = 1
        if (time.time() - start_time) > time_total:
            print(f"creating voter files ... {(i + 1) / len(voters):.1%}", end="\r")
            time_total = time_total + time_interval
    print(f"creating voter files ... complete", )
    return voters

def check_elected():
    global count_no, candidates
    print(f"\n***** COUNT {count_no} *****")
    reached_quota = candidates[candidates["votes"] >= quota]
    if len(reached_quota) == 0:
        print("no candidates elected")
        count_no = count_no + 1
        # select eliminated candidate
        candidate = candidates.index[len(candidates) - 1]
        name = candidates.loc[candidate].cname
        party = candidates.loc[candidate].party
        primary = candidates.loc[candidate].primary
        print(f"{name} is eliminated!")
        eliminated_candidates.loc[candidate] = [name, party, primary]
        eliminate_candidate(candidate)
    else:
        candidate = reached_quota.index[0]
        name = candidates.loc[candidate].cname
        party = candidates.loc[candidate].party
        primary = candidates.loc[candidate].primary
        print(f"{name} is elected!")
        elected_candidates.loc[candidate] = [name, party, primary]
        if len(elected_candidates) < 5:
            count_no = count_no + 1
            redistribute_excess_votes(candidate)
        else:
            print("\nELECTED")
            print(elected_candidates)
            candidates = candidates[candidates.index != candidate]
            for candidate in candidates.index:
                name = candidates.loc[candidate].cname
                party = candidates.loc[candidate].party
                primary = candidates.loc[candidate].primary
                eliminated_candidates.loc[candidate] = [name, party, primary]
            print("\nELIMINATED")
            print(eliminated_candidates)
            print(f"\ntotal votes: {exhausted_votes:,.0f}")
            print(f"exhausted votes: {exhausted_votes:,.0f}")



# BEGIN PROGRAM

print("2020 ACT ELECTION SIMULATOR")
print("\u00a9 Markus Mannheim (ABC Canberra)")

# read in parameters
print("\nestablishing databases ...", end=" ")
parties = pd.read_csv("./data/Groups.txt")
candidates = pd.read_csv("./data/Candidates.txt")
electorates = pd.read_csv("./data/Electorates.txt", index_col="ecode")
print("complete")

# read in votes
print("reading in ballot papers ...", end=" ")
ballots = pd.DataFrame(columns=["pindex", "pref", "pcode", "ccode", "ecode"])
for i, electorate in enumerate(electorates.electorate):
    add_ballots = pd.read_csv(f"./data/{electorate}Total.txt", usecols=["pindex", "pref", "pcode", "ccode"])
    add_ballots["ecode"] = i + 1
    ballots = ballots.append(add_ballots)
print("complete")

# begin cycle
while True:
    electorate = get_electorate()

    # user wants to quit
    if electorate == 6:
        break

    # filter data to chosen electorate
    print(f"\nfiltering {electorates.electorate[electorate]} data ...", end=" ")
    active_ballots = ballots[ballots["ecode"] == electorate]
    active_parties = parties[parties["ecode"] == electorate].set_index("pcode")
    active_candidates = candidates[candidates["ecode"] == electorate]
    print("complete")
    
    # create sample data to speed up calculation
    sample_size = .75
    print("sampling data for analysis ...", end="\r")
    sample = pd.Series(active_ballots.pindex.unique()).sample(frac=sample_size)
    sample_ballots = pd.DataFrame(columns=active_ballots.columns)
    start_time = time.time()
    time_interval = 2
    time_total = time_interval
    for i, preference in enumerate(active_ballots.index):
        if active_ballots.loc[preference, "pindex"] in sample:
            sample_ballots.loc[preference] = active_ballots.loc[preference]            
        if (time.time() - start_time) > time_total:
            print(f"sampling data for analysis ... {(i + 1) / len(active_ballots):.1%}", end="\r")
            time_total = time_total + time_interval
    print("sampling data for analysis ... complete")

    # format candidates, ballots and voters
    active_candidates = format_candidates(active_candidates, active_parties)
    sample_ballots = create_ids("ballot", sample_ballots)
    voters = create_voters(sample_ballots)

    # establish quota
    quota = len(voters) / 6 + 1
    print(f"\nquota for {electorates.electorate[electorate]}: {quota:.1f} votes")
    
    # prepare datasets to contain elected and eliminated candidates
    print("preparing results containers ...", end=" ")
    elected_candidates = pd.DataFrame(columns=["cname", "party", "primary"])
    eliminated_candidates = pd.DataFrame(columns=["cname", "party", "primary"])
    exhausted_votes = 0
    print("complete")

    # record primary votes
    print("recording primary votes ...", end="\r")
    count_no = 1
    start_time = time.time()
    time_total = time_interval
    for i, indice in enumerate(voters.index):
        candidate = voters.loc[indice].votes[voters.loc[indice].pref]
        value = voters.loc[indice].value
        previous = active_candidates.loc[candidate, "votes"]
        active_candidates.loc[candidate, "votes"] = previous + 1
        if (time.time() - start_time) > time_total:
            print(f"recording primary votes ... {(i + 1) / len(voters):.1%}", end="\r")
            time_total = time_total + time_interval
    
    # sort candidates by primary votes
    active_candidates.sort_values("votes", ascending=False, inplace=True)
    # total formal votes in electorate
    total_votes = active_candidates.votes.sum()
    for candidate in active_candidates.index:
        active_candidates.loc[candidate, "primary"] = f"{active_candidates.loc[candidate].votes / total_votes:.1%}"    
    print("recording primary votes ... complete")
    
    # begin counting cycle
    while len(elected_candidates) < 5:
        print(f"COUNT No. {count_no}")
        
        # check if candidates elected

        count_no = count_no + 1
        break

# exit program
print("\nEnjoy your day.")

2020 ACT ELECTION SIMULATOR
Â© Markus Mannheim (ABC Canberra)

establishing databases ... complete
reading in ballot papers ... complete

Which electorate do you wish to analyse?
1. Brindabella
2. Ginninderra
3. Kurrajong
4. Murrumbidgee
5. Yerrabi
6. quit program


Enter a number from 1 to 6: 1



filtering Brindabella data ... complete
sampling data for analysis ... complete
creating candidate IDs ... complete
creating ballot IDs ... complete
creating voter files ... complete

quota for Brindabella: 307.3 votes
preparing results containers ... complete
recording primary votes ... complete

                      cname  party primary  votes
id                                               
4-1             Mark PARTON    LIB   12.2%    225
1-4          Mick GENTLEMAN    ALP   12.1%    222
1-3               Joy BURCH    ALP   11.5%    211
4-5           Nicole LAWDER    LIB   10.4%    192
4-4             Andrew WALL    LIB    7.8%    143
1-2  Taimus WERNER-GIBBINGS    ALP    7.6%    139
3-1         Johnathan DAVIS  GREEN    5.9%    108
1-5           Brendan FORDE    ALP    5.1%     93
1-1               Cathy DAY    ALP    4.6%     84
4-2           James DANIELS    LIB    4.0%     73
4-3              Jane HIATT    LIB    3.9%     72
3-3           Laura NUTTALL  GREEN    2.8%     52


Enter a number from 1 to 6: 5



filtering Yerrabi data ... complete
sampling data for analysis ... complete
creating candidate IDs ... complete
creating ballot IDs ... complete
creating voter files ... complete

quota for Yerrabi: 285.0 votes
preparing results containers ... complete
recording primary votes ... complete

                      cname  party primary  votes
id                                               
2-5            Alistair COE    LIB   18.1%    308
8-3             Suzanne ORR    ALP    8.9%    152
2-4          Leanne CASTLEY    LIB    8.6%    147
8-5      Michael PETTERSSON    ALP    8.6%    146
2-2          James MILLIGAN    LIB    8.2%    140
8-1        Deepak-Raj GUPTA    ALP    6.7%    115
8-4        Georgia PHILLIPS    ALP    6.5%    111
3-1         Andrew BRADDOCK  GREEN    6.3%    107
2-3      Jacob VADAKKEDATHU    LIB    4.2%     71
3-2            Mainul HAQUE  GREEN    3.5%     60
2-1      Krishna NADIMPALLI    LIB    3.4%     58
1-1           Bernie STRANG    DLP    2.6%     45
1-2     

Enter a number from 1 to 6: 6



Enjoy your day.


In [17]:
sample_ballots

Unnamed: 0,pindex,pref,pcode,ccode,ecode


In [59]:
sample_size = .1
print("sampling data for analysis ...", end="\r")
sample = pd.Series(active_ballots.pindex.unique()).sample(frac=sample_size)
sample_ballots = pd.DataFrame(columns=active_ballots.columns)
start_time = time.time()
time_interval = .5
time_total = time_interval
for i, pindex in enumerate(active_ballots.pindex):
    if pindex not in sample:
        pass
    if (time.time() - start_time) > time_total:
        print(f"sampling data for analysis ... {(i + 1) / len(active_ballots):.1%}", end="\r")
        time_total = time_total + time_interval
print("sampling data for analysis ... complete")
len(active_ballots.pindex)

sampling data for analysis ... complete


355375

In [35]:
sample_size = .75
print("sampling data for analysis ...", end="\r")
sample = pd.Series(active_ballots.pindex.unique()).sample(frac=sample_size)
sample_ballots = pd.DataFrame(columns=active_ballots.columns)
start_time = time.time()
time_interval = .5
time_total = time_interval
container = []
for i, pindex in enumerate(active_ballots.pindex):
    if pindex in sample:
        sample_ballots.loc[preference] = active_ballots.loc[preference]
    if (time.time() - start_time) > time_total:
        print(f"sampling data for analysis ... {(i + 1) / len(active_ballots):.1%}", end="\r")
        time_total = time_total + time_interval
print("sampling data for analysis ... complete")

sampling data for analysis ... complete
