In [21]:
# We use M to guess the tcp finalists. But that is pretty conservative about the flow of preferences to the non-lead candidate because of the way that it calculates flows
# Hence, we use this actually calculate the expected flow from each of the parties to the final two candidates using direct tcp flow data from the election.


import pandas as pd
import numpy as np

tpp_flows = pd.read_csv("raw/HouseTppFlowByStateByPartyDownload-27966.csv",header=1)
tcp_flows = pd.read_csv("raw/HouseTcpFlowByStateByPartyDownload-27966.csv",header=1)

In [22]:
modeled_parties = ['GRN', 'ONP', 'IND']
def handle_parties(row):
    if row['PartyAb'] in modeled_parties:
        return row['PartyAb']
    elif not pd.isna(row['PartyAb']):
        return 'OTH'
    else:
        return np.nan
tpp_flows['PartyAb'] = tpp_flows.apply(handle_parties, axis=1)
tpp_flows = tpp_flows.dropna(axis=0)[['PartyAb','Liberal/National Coalition Transfer Votes','Australian Labor Party Transfer Votes']]
tpp_flows = tpp_flows.rename(columns = {"Liberal/National Coalition Transfer Votes":"LNP","Australian Labor Party Transfer Votes":"ALP"}).groupby("PartyAb").sum().apply(lambda x: x / x.sum(), axis=1)

In [23]:
modeled_parties = ['ALP', 'LNP', "GRN", "ONP", "IND", "OTH"]
party_mappers = {
    'ON':'ONP',
    'ALP':'ALP', 
    'IND':'IND', 
    'GRN':'GRN',
    'GVIC':'GRN',
    'LP':'LNP',
    'NP':'LNP',
    'CLP':'LNP',
    'LNP':'LNP',
    'XEN':'IND',
    'KAP':'IND',
}

def handle_parties(row, colname):
    if row[colname] in party_mappers.keys(): 
        return party_mappers[row[colname]]
    elif not pd.isna(row[colname]):
        return 'OTH'
    else:
        return np.nan
    
tcp_flows['FromPartyGroupAb'] = tcp_flows.apply(lambda row: handle_parties(row,'FromPartyGroupAb'), axis=1)
tcp_flows['ToPartyDisplayAb'] = tcp_flows.apply(lambda row: handle_parties(row,'ToPartyDisplayAb'), axis=1)

tcp_flows = tcp_flows.dropna(axis=0)[['FromPartyGroupAb','ToPartyDisplayAb','TransferVotes']]

In [33]:
M = tcp_flows.pivot_table(index=['FromPartyGroupAb'], columns='ToPartyDisplayAb', values='TransferVotes', aggfunc='sum')

M['ONP'] = 0

M = M.apply(lambda x: x / x.sum(), axis=1)

In [None]:
y = np.array([0.2906573286580627,
 0.2912008219092291,
 0.000267196539583,
 0.3460137277414765,
 0.0222447279860898,
 0.0])

In [43]:
M['OTH'] = 0

In [46]:
M = M[["ONP","ALP","GRN","LNP","IND","OTH"]].loc[["ONP","ALP","GRN","LNP","IND","OTH"]].to_numpy()

In [47]:
TCP(y, M)

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 7 is different from 6)

In [35]:
def eliminate_idx(idx,y:np.array,M:np.array, tol=1e-4, max_iter_sub=1000):
    if type(idx)==str:
        idx = ['ALP', 'GRN', 'IND', 'LNP', 'ONP', 'OTH', 'TEAL'].index(idx)
    if type(y)==pd.Series:
        yhat= y.to_numpy()
    else:
        yhat = y.copy()

    eliminated = (yhat==0)

    eliminated[idx] = True

    eliminating_idx = idx

    counter = 0
    while sum(yhat[eliminated]>0):
        flow = np.zeros_like(yhat)
        flow[eliminating_idx] = 1
        flow =  M @ flow

        yhat = yhat + flow*yhat[idx]
        yhat[eliminating_idx] = 0

        yhat[yhat<tol] = 0
        yhat = yhat / np.nansum(yhat)



        valid_indices = np.where((eliminated) & (yhat.reshape(-1) > 0))[0]
        if len(valid_indices)>0:
            eliminating_idx = valid_indices[0]
        else:
            break

        counter += 1
        if counter > max_iter_sub:
            break
    
    if type(y)==pd.Series:
        return pd.Series(yhat, index=y.index)
    return yhat

def eliminate_indices(indices,y:np.array,M:np.array, tol=1e-4, max_iter_sub=1000):
    yhat = y.copy()
    for idx in indices:
        yhat = eliminate_idx(idx,yhat,M,tol,max_iter_sub)
    return yhat

def eliminate_smallest(y:np.array,M:np.array, tol=1e-4, max_iter_sub=1000):
    idx = np.where(y==np.min(y[y>0]))[0][0]
    return eliminate_idx(idx,y,M,tol,max_iter_sub)

def TCP(y:np.array,M:np.array, tol=1e-4, max_iter_sub=1000):
    non_eliminated = sum(y>0)
    yhat = y.copy()
    while non_eliminated>2:
        yhat = eliminate_smallest(yhat,M,tol,max_iter_sub)
        non_eliminated = sum(yhat>0)
    return yhat