# Preferential Voting Neural Net: ***A First Draft***

Setting up the basic imports and devices to train the net on the gpu

In [1]:
import torch
if torch.cuda.is_available():
    device = torch.device("cuda:0")
else:
    device = torch.device("cpu")

  from .autonotebook import tqdm as notebook_tqdm


Handling the data

In [2]:
import torchvision
from torchvision import datasets, transforms
not_handled_pref_data = False
if not_handled_pref_data:
    train_dataset = datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
    test_dataset = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor(), download=True)

    trainset = torch.utils.data.DataLoader(train_dataset, batch_size=10, shuffle=True)
    testset = torch.utils.data.DataLoader(test_dataset, batch_size=10, shuffle=True)

Getting Primary and 2CP Vote Data For Polling Booth (Currently just NSW) 

Extracting data from the csv file and dumping into a list

In [3]:
import csv

def getrows(url):
    # Row Stuff ## Extract data into a big array and then combine booths.
    row_count = 0
    rows = [] # Will store the data as needed.
    with open(url,newline='') as primaryvotesFile:
        reader  = csv.reader(primaryvotesFile, delimiter=',', quotechar='"')
        for row in reader:
            rows.append(row)
            row_count += 1
    return rows

Parsing rows to get a each row as [PollingID, PartyIdentifier, Votes]

In [4]:
# Convert rows of the csv into important data
# From CSV Format
def parseData(rows):
    POLLPLACEID = 3
    CANDID = 5
    PARTYAB = 11
    PARTYNAME = 12
    VOTES = 13

    parsedData = []
    for row in rows[2:]: # Slice off the two overhead lines
        accepting = True
        data = []
        ## data = [PollingID, PartyIdentifier, Votes]
        data.append(row[3])
        # Check that the PARTYAB exist
        if row[PARTYAB] != None and row[PARTYAB] != '':
            data.append(row[PARTYAB])
        # If no PARTYAB exists, check if this is the informal vote at given polling booth
        elif row[PARTYNAME] == "Informal":
            data.append("INFORMAL")
        # If not informal and no PARTYAB try the PARTYNAME. ''.join(row[PARTYNAME].split()) removes whitespace
        elif row[PARTYNAME] != None and row[PARTYNAME] != '':
            data.append(''.join(row[PARTYNAME].split()))
        # If no PARTYAB and no PARTYNAME use candidate ID
        else:
            data.append(row[CANDID])

        data.append(row[VOTES])

        if accepting:
            parsedData.append(data)
    return parsedData

Reformatting the data into a list where each element represents the primary/2CP votes at a given pollingPlace

In [5]:
def combineData(parsedData):
    combinedData = []
    for idx, data_entry in enumerate(parsedData):
        try:
            pollingplace = int(data_entry[0])
            # Check The Current List
            if combinedData[-1]['pollingID'] == pollingplace:
                combinedData[-1][data_entry[1]] = int(data_entry[2])
            else:
                newdict = {'pollingID':pollingplace,data_entry[1]:int(data_entry[2])}
                combinedData.append(newdict)
        except IndexError:
            newdict = {'pollingID':pollingplace,data_entry[1]:int(data_entry[2])}
            combinedData.append(newdict)
    return combinedData

In [6]:
def try0(x, key):
    try:
        return x[key]
    except KeyError:
        return 0

In [7]:
def convert(combinedData):
    Xs = []
    for x in combinedData:
        others = 0
        for index, (key,value) in enumerate(x.items()):
            if key not in ["LP","NP","ALP","GRN","IND","ON","UAPP","AJP","pollingID","INFORMAL"]:
                others += value
        X = [
            try0(x,"LP"),
            try0(x,"NP"),
            try0(x,"ALP"),
            try0(x,"GRN"),
            try0(x,"ON"),
            try0(x,"UAPP"),
            try0(x,"AJP"),
            try0(x,"IND"),
            others,
            try0(x,"INFORMAL"),
        ]
        Xs.append(X)
    return Xs

In [8]:
def normalise(Xs):
    newXs = []
    for x in Xs:
        if sum(x)!= 0:
            newx = [value/sum(x) for value in x]
        else:
            newx = x
        newXs.append(newx)
    return newXs

In [9]:
rows = getrows('PrimaryPollingPlace.csv')
parsedData = parseData(rows)
combinedData = combineData(parsedData)
Xs = convert(combinedData)
Xs = normalise(Xs)
Xs = torch.tensor(Xs) # Convert to tensor


In [10]:
Xs[0]

tensor([0.3016, 0.0000, 0.3807, 0.1668, 0.0246, 0.0278, 0.0000, 0.0642, 0.0000,
        0.0342])

In [11]:
rows = getrows('2CPPollingPlace.csv')
parsedData = parseData(rows)
combinedData = combineData(parsedData)
ys = convert(combinedData)
ys = normalise(ys)
ys = torch.tensor(ys) # Convert to tensor


Balancing:

In [12]:
import numpy as np
balancing = False
if balancing:
    winners = {0:0,
                1:0,
                2:0,
                3:0,
                4:0,
                5:0,
                6:0,
                7:0,
                8:0,
                9:0}
    for they in ys:
        y = they.numpy()
        winner = np.where(y==max(y))[0][0]
        
        winners[winner] += 1

    total = sum(winners.values())
    for key in winners.keys():
        try: 
            winners[key] = round(total/(winners[key]))
        except ZeroDivisionError:
            winners[key] = 0

    newXs = []
    newys = []
    for index, y in enumerate(ys):
        numpyy = y.numpy()
        winner = np.where(numpyy==max(numpyy))[0][0]
        newXs += [Xs[index].numpy()]*winners[winner]
        newys += [y.numpy()]*winners[winner]

    Xs = torch.tensor(newXs)
    ys = torch.tensor(newys)

In [13]:
print(len(Xs))
print(len(ys))
print(Xs[0])
print(ys[0])

8479
8479
tensor([0.3016, 0.0000, 0.3807, 0.1668, 0.0246, 0.0278, 0.0000, 0.0642, 0.0000,
        0.0342])
tensor([0.3787, 0.0000, 0.6213, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000])


In [14]:
winners = {0:0,
            1:0,
            2:0,
            3:0,
            4:0,
            5:0,
            6:0,
            7:0,
            8:0,
            9:0}
for they in ys:
    print(they)
    y = they.numpy()
    winner = np.where(y==max(y))[0][0]
    
    winners[winner] += 1
    break
print(winners)

tensor([0.3787, 0.0000, 0.6213, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000])
{0: 0, 1: 0, 2: 1, 3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0}


In [15]:
VAL_PCT = 0.1
val_size = int(len(Xs)*VAL_PCT)

train_X = Xs[:-val_size]
train_y = ys[:-val_size]

test_X = Xs[-val_size:]
test_y = ys[-val_size:]

print(len(train_X))
print(len(test_X))
print(len(train_y))
print(len(test_y))

7632
847
7632
847


Creating the Neural Net

In [16]:
import torch.nn as nn
import torch.nn.functional as F

In [17]:
class Net(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.fc1 = nn.Linear(len(Xs[0]), 64) 
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 64)
        self.fc4 = nn.Linear(64, len(ys[0]))

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return F.softmax(x,dim=1)

In [18]:
net= Net().to(device)
print(net)

Net(
  (fc1): Linear(in_features=10, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=64, bias=True)
  (fc4): Linear(in_features=64, out_features=10, bias=True)
)


Creating the optimizer

In [19]:
import torch.optim as optim
from tqdm import tqdm

optimizer = optim.Adam(net.parameters(),lr=0.001)

EPOCHS = 1000
BATCH_SIZE = 100
loss_function = nn.MSELoss()
def train(net):
    for epoch in tqdm(range(EPOCHS)):
        for i in range(0,len(train_X),BATCH_SIZE):
            batch_X = train_X[i:i+BATCH_SIZE].view(-1,len(Xs[0]))
            batch_y = train_y[i:i+BATCH_SIZE]
            
            batch_X, batch_y = batch_X.to(device),batch_y.to(device)
            
            net.zero_grad()
            outputs = net(batch_X)
            loss = loss_function(outputs, batch_y)
            loss.backward()
            optimizer.step()
        # print(f"Epoch:{epoch}. Loss: {loss}")
train(net)

100%|██████████| 1000/1000 [03:18<00:00,  5.04it/s]


In [20]:
def printpred(pred):
    pred = torch.tensor(pred)
    atensor = net(pred.view(-1,len(pred)).to(device))[0]
    alist = [round(float(x),3) for x in atensor]
    print(alist)
printpred([0.377,0,0.273,0.272,0.022,0.019,0.02,0.0,0.017,0.021])

[0.448, 0.0, 0.546, 0.006, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]


Testing the accuracy

In [21]:
# Convert rows of the csv into important data
# From CSV Format
def parseDataDivision(rows):
    DIVISIONID = 1
    CANDID = 3
    PARTYAB = 9
    PARTYNAME = 10
    VOTES = 16

    parsedData = []
    for row in rows[2:]: # Slice off the two overhead lines
        accepting = True
        data = []
        ## data = [PollingID, PartyIdentifier, Votes]
        data.append(row[DIVISIONID])
        # Check that the PARTYAB exist
        if row[PARTYAB] != None and row[PARTYAB] != '':
            data.append(row[PARTYAB])
        # If no PARTYAB exists, check if this is the informal vote at given polling booth
        elif row[PARTYNAME] == "Informal":
            data.append("INFORMAL")
        # If not informal and no PARTYAB try the PARTYNAME. ''.join(row[PARTYNAME].split()) removes whitespace
        elif row[PARTYNAME] != None and row[PARTYNAME] != '':
            data.append(''.join(row[PARTYNAME].split()))
        # If no PARTYAB and no PARTYNAME use candidate ID
        else:
            data.append(row[CANDID])

        data.append(row[VOTES])

        if accepting:
            parsedData.append(data)
    return parsedData    

In [22]:
# Load in the Division Results From 2022

rows = getrows('PrimaryDivision.csv')
parsedData = parseDataDivision(rows)
combinedData = combineData(parsedData)
Xs = convert(combinedData)
Xs = normalise(Xs)

preds = net(torch.tensor(Xs).to(device)).to("cpu")

rows = getrows('2CPDivision.csv')
parsedData = parseDataDivision(rows)
combinedData = combineData(parsedData)
ys = convert(combinedData)
ys = normalise(ys)
ys = torch.tensor(ys)



In [23]:
print(combinedData[0])
print(ys[0])

{'pollingID': 318, 'ALP': 61935, 'LP': 36459}
tensor([0.3705, 0.0000, 0.6295, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000])


In [24]:
import numpy as np
correct_call = 0
within2pct = 0
correctand2pct = 0
for idx, they in enumerate(ys):
    thisIsCorrect = False
    pred = preds[idx].detach().numpy()
    y = they.numpy()

    winner = np.where(y==max(y))[0][0]
    predWinner = np.where(pred==max(pred))[0][0]
    if winner == predWinner:
        correct_call +=1
        thisIsCorrect = True
    else:
        thisIsCorrect = False
    #     print(idx)
    diff = np.absolute(pred-y)
    isWithin2pct = (max(diff)<0.02)
    if isWithin2pct:
        within2pct += 1

    if isWithin2pct and thisIsCorrect:
        correctand2pct += 1    



print(f"Number Of Correct Calls: {correct_call}")
print(f"Number Of Predictions with 2%: {within2pct}")
print(f"Number Of Correct Calls within 2%: {correctand2pct}")

Number Of Correct Calls: 146
Number Of Predictions with 2%: 79
Number Of Correct Calls within 2%: 79


With Balancing:
- Number Of Correct Calls: 138
- Number Of Predictions with 2%: 48
- Number Of Correct Calls within 2%: 47

Without Balancing:
- Number Of Correct Calls: 132
- Number Of Predictions with 2%: 70
- Number Of Correct Calls within 2%: 67