In [1]:
import os
import sys
import logging
import itertools
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm
from typing import Dict, Any, List, Generator

In [2]:
# package imports
from vsim import electorate, candidates, simulation, voting_system

In [10]:
# setup helper for generating experiments (basically all sets of params to run through sim)
def generate_experiment_matrix(experiments: Dict[str, List[Any]]):
    return (dict(zip(experiments, x)) for x in itertools.product(*experiments.values()))


experiments = {
    "electorate_size": [10_000, 25_000], # not O(n^2) with KDTree, but still bad
    "clusters": [1, 2, 10, 25],
    "cluster_std": [1, 2, 3],
    "electoral_system": ["majority", "plurality", "approval"],
    "apathy_prob": [0, 0.01, 0.05],
    "candidates": np.arange(2, 10 + 1),
    "issues": np.arange(2, 25, 10, dtype=int),
    "n_approvals_per_voter": [2, 3, 5]
}

records = []
columns = [*experiments.keys(), "unweighted_fairness", "weighted_fairness"]
experiment_matrix = list(generate_experiment_matrix(experiments))

for params in tqdm(experiment_matrix):
    
    # setup objects
    voters = electorate.setup_electorate(**params)
    parties = candidates.setup_candidates(**params, electorate=voters)
    system = voting_system.setup_voting_system(name=params["electoral_system"], **params)
    
    # simulate elections
    sim = simulation.VotingSimulator(electorate=voters, candidates=parties, system=system, log=logging.getLogger())
    
    try:    
        result = sim.run()
        result_record = { 
            "unweighted_fairness": round(result.unweighted_fairness, 3),
            "weighted_fairness": round(result.weighted_fairness, 3),
            **params,
        }
        result_record = {k: v for k, v in result_record.items() if k in columns}
        records.append(result_record)
    except AssertionError as e:
        if e is AssertionError("more votes than candidates"):
            continue

  0%|                              | 4/17496 [00:00<21:15, 13.72it/s]


ZeroDivisionError: Weights sum to zero, can't be normalized

In [9]:
results_df = pd.DataFrame.from_records(records)
results_df

Unnamed: 0,unweighted_fairness,weighted_fairness,electorate_size,clusters,cluster_std,electoral_system,apathy_prob,candidates,issues,n_approvals_per_voter
0,5.732,5.722,10000,1,1,majority,0,2,2,2
1,3.353,3.33,10000,1,1,majority,0,2,2,3
2,7.388,7.388,10000,1,1,majority,0,2,2,5
3,6.018,6.018,10000,1,1,majority,0,2,12,2
4,5.83,5.814,10000,1,1,majority,0,2,12,3
5,6.344,6.344,10000,1,1,majority,0,2,12,5
6,5.836,5.836,10000,1,1,majority,0,2,22,2
7,5.801,5.801,10000,1,1,majority,0,2,22,3
8,6.382,6.376,10000,1,1,majority,0,2,22,5
9,5.307,5.307,10000,1,1,majority,0,3,2,2


##### Store data or read from already performed experiment files

In [7]:
filepath = "data/plurality-vs-majority-vs-ranked-420e25f5-2ead-4b71-8e35-3e2759137939.csv"

In [6]:
# cache data so we don't need to start from scratch
assert not os.path.isfile(filepath), "data export already exists. Use a new uuid to prevent overwriting"
results_df.to_csv(filepath, index=False)
results_df = pd.read_csv(filepath)

AssertionError: data export already exists. Use a new uuid to prevent overwriting

In [8]:
# read from cache 
results_df = pd.read_csv(filepath)

# Investigation of fairness between majority and pluralism

In [9]:
# generate correlation matrices for both different systems
majority = results_df.electoral_system == "majority"
plurality = results_df.electoral_system == "plurality"
approval = results_df.electoral_system == "approval"

In [10]:
majority_subset = results_df[majority].drop(["electoral_system"], axis=1) 
plurality_subset = results_df[plurality].drop(["electoral_system"], axis=1)
approval_subset = results_df[approval].drop(["electoral_system"], axis=1)

In [14]:
approval_subset

Unnamed: 0,electorate_size,clusters,cluster_std,apathy_prob,candidates,issues,n_approvals_per_voter
486,10000,1,1,0.00,2,2,2
487,10000,1,1,0.00,2,12,2
488,10000,1,1,0.00,2,22,2
489,10000,1,1,0.00,3,2,2
490,10000,1,1,0.00,3,2,3
...,...,...,...,...,...,...,...
16627,25000,25,3,0.05,10,12,3
16628,25000,25,3,0.05,10,12,5
16629,25000,25,3,0.05,10,22,2
16630,25000,25,3,0.05,10,22,3


In [13]:
# vector of correlation with fairness for both systems
majority_corr_record = dict(**majority_subset.corrwith(majority_subset.weighted_fairness), **{"electoral_system": "majority"})
plurality_corr_record = dict(**plurality_subset.corrwith(plurality_subset.weighted_fairness), **{"electoral_system": "plurality"})
approval_corr_record = dict(**approval_subset.corrwith(approval_subset.weighted_fairness), **{"electoral_system": "approval"})

AttributeError: 'DataFrame' object has no attribute 'weighted_fairness'

In [None]:
approval_corr_record

In [None]:
# compose to make plotting easier
corr_df = pd.DataFrame.from_records([majority_corr_record, plurality_corr_record, approval_corr_record]).drop(["clusters", "fairness"], axis=1)

In [None]:
corr_df

In [None]:
# convert to long to enable bar plot
long_corr_cols = ["measure", "value", "electoral_system"]
long_corr_df = pd.DataFrame(columns=long_corr_cols)

for system in ["majority", "plurality", "approval"]:
    for measure in corr_df.columns:   
        for value in corr_df[ corr_df.electoral_system == system ][measure]:
            row = pd.DataFrame.from_records([{ "measure": measure, "value": value, "electoral_system": system }])
            long_corr_df = pd.concat([long_corr_df, row])
        
long_corr_df = long_corr_df.reset_index().drop("index", axis=1).drop([4, 9], axis=0)

In [None]:
long_corr_df

In [None]:
# drop the odd rows with non numerical value and plot
cleaned_long_corr = long_corr_df.drop([5, 11], axis=0)

fig, ax1 = plt.subplots(1, 1, figsize=(8, 4), tight_layout=True)
sns.barplot(data=cleaned_long_corr, x="measure", y="value", hue="electoral_system", ax=ax1)
ax1.set_ylim([-0.05, 0.05])
ax1.grid(axis="y")
ax1.set_ylabel("Correlation")
fig.suptitle("Measure correlation with fairness")
plt.show()

## Verify resuts via significance testing

In [None]:
import statsmodels.api as sm

In [None]:
# reusing prior wide representation with OLS model
# see https://www.statsmodels.org/dev/generated/statsmodels.regression.linear_model.OLS.html

In [None]:
# before model, cast electoral_system to binary flag to enable regression
results_df["majority_electoral_system"] = results_df.electoral_system.map(lambda x: int(x == "majority"))

In [None]:
results_df

In [None]:
target = "fairness"
features = [c for c in results_df.columns if c not in ["fairness", "electoral_system"]]
ols = sm.OLS(endog=results_df[target], exog=results_df[features]).fit()

In [None]:
ols.summary()