In [1]:
# Makes report plot
# improt and define basic params
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib as mpl
from pathlib import Path
from os import listdir, path, makedirs
sns.set(style="darkgrid")
%matplotlib inline

mpl.rcParams["savefig.dpi"] = 200
# Calibrated data dir
calibrated_base_dir = Path("..")/"../new_calibrate_full/"
uncalibrated_base_dir = Path("..")/"../uncalibrated_data/"
calibration_curve_base_dir = Path("..")/"../calibration_curve/"
calibrated_table_dir = Path("..")/"../tables/"
uncalibrated_table_dir = Path("..")/"../uncalibrated_tables/"

# Class to use
import sys
sys.path.append("../../python")
from auditing_setup.election_setting import Election

report_dir = Path('/Users/Dovermore/Documents/Research/AustralianElectionAuditing/ReportRelated/school_report/')
report_figure_dir = report_dir/'figures'
report_tabular_dir = report_dir/'tabular'

In [2]:
legend_order = ['bravo    | p=0.7 ',
                'bravo    | p=0.55 ',
                'bravo    | p=0.52 ',
                'bravo    | p=0.51 ',
                'bravo_without_replacement | p=0.7 ',
                'bravo_without_replacement | p=0.55 ',
                'bravo_without_replacement | p=0.52 ',
                'bravo_without_replacement | p=0.51 ',
                'bayesian | a=1 | b=1 ',
                'bayesian_with_replacement | a=1 | b=1 ',
                'truncated_bayesian | a=1 | b=1 ',
                'clip     ', 
                'max_sprt ']

def get_position(name):
    for i, legend_pattern in enumerate(legend_order):
        if name.lower().startswith(legend_pattern):
            return i
    else:
        assert False

def reorder_df(df):
    return df.loc[sorted(df.index, key=get_position)]

mpl.rcParams["figure.figsize"] = [8, 6]

def read_csv(file_path, reorder=True):
    df = pd.read_csv(file_path)
    cols = []
    for i in df.columns:
        try:
            cols.append(float(i))
        except Exception as e:
            cols.append(i)
    df.columns = cols
    df.set_index('legend', inplace=True)
    if reorder:
        df = reorder_df(df)
    return df.reset_index()

def get_num_cols(df):
    cols = []
    for i in df.columns:
        try:
            float(i)
            cols.append(float(i))
        except Exception as e:
            pass
    return cols

def format_legend(ax=None, legend=None, how=0):
    if ax is not None:
        legend_ = ax.legend_
    elif legend is not None:
        legend_ = legend
    else:
        return
    
    for text in legend_.texts:
        if how == 0:
            raw = text.get_text()
            text.set_text(raw.replace(" | ", ", ").replace(" ", "").rsplit(",", 1)[0])
        elif how == 1:
            raw = text.get_text()
            print(raw.rsplit(",", 1)[0].replace("_", " "))
            text.set_text(raw.rsplit(",", 1)[0].replace("_", " "))
    return legend_

def make_legend(df, ax, outside=False, *args, **kwargs):
    if outside:
        ax.legend(labels=[basename+" "+parameter for i, (basename, parameter) in 
                          df[['basename_with_type', 'parameter']].drop_duplicates()
                          .iterrows()],
                 bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.,
                 *args, **kwargs)
    else:
        ax.legend(labels=[basename+" "+parameter for i, (basename, parameter) in 
                          df[['basename_with_type', 'parameter']].drop_duplicates()
                          .iterrows()], *args, **kwargs)

In [3]:
def pivot_widetable(df, names, groupers, *args, **kwargs):
    df_long = df.melt(*args, **kwargs)
    for name, grouper in zip(names, groupers):
        group = [grouper(i) for i in df_long['variable']]
        df_long[name] = group
    return df_long

names = [
    'type'
    'basename'
    'basename_with_type'
    'parameter'
    'parameter_with_type'
]

basename_map = {'bayesian ': 'Bayesian',
                'max_sprt ': 'MaxBRAVO',
                'bravo    ': 'BRAVO',
                'clip     ': 'ClipAudit',
                'bravo_without_replacement ': 'BRAVO',
                'truncated_bayesian ': 'MartInf',
                'bayesian_with_replacement ': 'Bayesian'}

type_map = {'bayesian ': 'BetaBinomial',
            'max_sprt ': '',
            'bravo    ': 'Binomial',
            'clip     ': '',
            'bravo_without_replacement ': 'Hypergeometric',
            'truncated_bayesian ': '',
            'bayesian_with_replacement ': 'Beta'}

basename_map_with_type = {'bayesian ': 'Bayesian (BetaBinomial)',
                          'max_sprt ': 'MaxBRAVO',
                          'bravo    ': 'BRAVO (Binomial)',
                          'clip     ': 'Clip',
                          'bravo_without_replacement ': 'BRAVO (Hypergeometric)',
                          'truncated_bayesian ': 'MartInf',
                          'bayesian_with_replacement ': 'Bayesian (Beta)'}

def group_basename(name):
    name = name.split('|')[0]
    return basename_map[name]

def group_type(name):
    name = name.split('|')[0]
    return type_map[name]

def group_basename_with_type(name):
    name = name.split('|')[0]
    return basename_map_with_type[name]

def group_parameter(name):
    _, name = name.split('|', 1)
    name = name.rsplit('|', 1)[0] if len(name.rsplit('|', 1)) > 1 else ''
    name = name.replace('|', ',')
    name = name.replace(' ', '')
    return name

def group_parameter_with_type(name):
    _, name = name.split('|', 1)
    type = group_type(_)
    name = name.rsplit('|', 1)[0] if len(name.rsplit('|', 1)) > 1 else ''
    name = name.replace('|', ',')
    name = name.replace(' ', '')
    return type+' '+name
    
groupers = [
    group_type,
    group_basename,
    group_basename_with_type,
    group_parameter,
    group_parameter_with_type
]

def prep_legend(df):
    parameters = list(df.legend.apply(group_parameter))
    basenames_with_type = list(df.legend.apply(group_basename_with_type))
    df['legend'] = [name + ' ' + param for name, param in zip(basenames_with_type, parameters)]
    df.set_index('legend', inplace=True)
    return df

In [4]:
# 1. Basic 500/500 plot without replacement

# data
election = Election(500, 500, 0.5, 1, False)

data_type = 'power.csv'
data_path = calibrated_table_dir/str(election)/data_type
data = prep_legend(read_csv(data_path))
filter_cols = filter(lambda x:x>=0.5, data.columns)
filter_cols = [col for i,col in enumerate(filter_cols) if i % 2 == 0]
data.index.name = 'Audit Method'
data[filter_cols].round(2).to_latex(report_tabular_dir/'power{}_tabular.tex'.format(election.n))

data_type = 'unconditional_mean.csv'
data_path = calibrated_table_dir/str(election)/data_type
data = prep_legend(read_csv(data_path))
filter_cols = filter(lambda x:x>=0.5, data.columns)
filter_cols = [col for i,col in enumerate(filter_cols) if i % 2 == 0]
data.index.name = 'Audit Method'
data[filter_cols].round(2).to_latex(report_tabular_dir/'unconditional_mean{}_tabular.tex'.format(election.n))

data_type = 'unconditional_mean_with_recount.csv'
data_path = calibrated_table_dir/str(election)/data_type
data = prep_legend(read_csv(data_path))
filter_cols = filter(lambda x:x>=0.5, data.columns)
filter_cols = [col for i,col in enumerate(filter_cols) if i % 2 == 0]
data.index.name = 'Audit Method'
data[filter_cols].round(2).to_latex(report_tabular_dir/'unconditional_mean_with_recount{}_tabular.tex'.format(election.n))

In [8]:
data_type = 'power.csv'
data_path = calibrated_table_dir/str(election)/data_type
data = prep_legend(read_csv(data_path))

In [9]:
data

Unnamed: 0_level_0,0.45,0.48,0.50,0.51,0.52,0.53,0.54,0.55,0.56,0.58,0.60,0.62,0.64,0.66,0.68,0.70
legend,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
BRAVO (Binomial) p=0.7,0.011441,0.027498,0.049158,0.065682,0.087737,0.117176,0.156448,0.208667,0.277371,0.473179,0.717955,0.907886,0.984628,0.998871,0.999968,1.0
BRAVO (Binomial) p=0.55,0.000474,0.00873,0.049619,0.104544,0.198022,0.333881,0.500048,0.669167,0.811122,0.963161,0.996699,0.999876,0.999998,1.0,1.0,1.0
BRAVO (Binomial) p=0.51,0.000106,0.006112,0.049698,0.113518,0.222761,0.377037,0.555788,0.724903,0.854988,0.976675,0.998333,0.999951,0.999999,1.0,1.0,1.0
BRAVO (Hypergeometric) p=0.7,0.011454,0.027589,0.049428,0.066127,0.088448,0.118284,0.15813,0.211142,0.280865,0.478841,0.723797,0.911038,0.985395,0.998946,0.999971,1.0
BRAVO (Hypergeometric) p=0.55,0.000376,0.008146,0.049267,0.105437,0.201259,0.340012,0.50837,0.677834,0.818208,0.965515,0.997006,0.999891,0.999998,1.0,1.0,1.0
BRAVO (Hypergeometric) p=0.51,8.8e-05,0.005885,0.049243,0.113124,0.222623,0.377255,0.556278,0.725456,0.855423,0.976792,0.998345,0.999952,0.999999,1.0,1.0,1.0
"Bayesian (BetaBinomial) a=1,b=1",0.006966,0.019542,0.04957,0.085372,0.148333,0.248457,0.3871,0.550075,0.709983,0.924982,0.990756,0.999511,0.99999,1.0,1.0,1.0
"Bayesian (Beta) a=1,b=1",0.007796,0.020725,0.04901,0.081546,0.138528,0.230278,0.360496,0.51872,0.68011,0.910784,0.987989,0.999299,0.999984,1.0,1.0,1.0
"MartInf a=1,b=1",0.015602,0.029352,0.04985,0.069736,0.103565,0.160802,0.251353,0.378826,0.533162,0.822774,0.965504,0.99697,0.999892,0.999999,1.0,1.0
Clip,0.007796,0.020725,0.04901,0.081546,0.138528,0.230278,0.360496,0.51872,0.68011,0.910784,0.987989,0.999299,0.999984,1.0,1.0,1.0


In [10]:
# 1. Basic 5000/500 plot without replacement

# data
election = Election(5000, 500, 0.5, 1, False)

data_type = 'power.csv'
data_path = calibrated_table_dir/str(election)/data_type
data = prep_legend(read_csv(data_path))
filter_cols = filter(lambda x:x>=0.5, data.columns)
filter_cols = [col for i,col in enumerate(filter_cols) if i % 2 == 0]
data.index.name = 'Audit Method'
data[filter_cols].round(2).to_latex(report_tabular_dir/'power{}_tabular.tex'.format(election.n))

data_type = 'unconditional_mean.csv'
data_path = calibrated_table_dir/str(election)/data_type
data = prep_legend(read_csv(data_path))
filter_cols = filter(lambda x:x>=0.5, data.columns)
filter_cols = [col for i,col in enumerate(filter_cols) if i % 2 == 0]
data.index.name = 'Audit Method'
data[filter_cols].round(2).to_latex(report_tabular_dir/'unconditional_mean{}_tabular.tex'.format(election.n))

data_type = 'unconditional_mean_with_recount.csv'
data_path = calibrated_table_dir/str(election)/data_type
data = prep_legend(read_csv(data_path))
filter_cols = filter(lambda x:x>=0.5, data.columns)
filter_cols = [col for i,col in enumerate(filter_cols) if i % 2 == 0]
data.index.name = 'Audit Method'
data[filter_cols].round(2).to_latex(report_tabular_dir/'unconditional_mean_with_recount{}_tabular.tex'.format(election.n))

In [11]:
# 1. Basic 10000/1000 plot without replacement

# data
election = Election(10000, 1000, 0.5, 1, False)

data_type = 'power.csv'
data_path = calibrated_table_dir/str(election)/data_type
data = prep_legend(read_csv(data_path))
filter_cols = filter(lambda x:x>=0.5, data.columns)
filter_cols = [col for i,col in enumerate(filter_cols) if i % 2 == 0]
data.index.name = 'Audit Method'
data[filter_cols].round(2).to_latex(report_tabular_dir/'power{}_tabular.tex'.format(election.n))

data_type = 'unconditional_mean.csv'
data_path = calibrated_table_dir/str(election)/data_type
data = prep_legend(read_csv(data_path))
filter_cols = filter(lambda x:x>=0.5, data.columns)
filter_cols = [col for i,col in enumerate(filter_cols) if i % 2 == 0]
data.index.name = 'Audit Method'
data[filter_cols].round(2).to_latex(report_tabular_dir/'unconditional_mean{}_tabular.tex'.format(election.n))

data_type = 'unconditional_mean_with_recount.csv'
data_path = calibrated_table_dir/str(election)/data_type
data = prep_legend(read_csv(data_path))
filter_cols = filter(lambda x:x>=0.5, data.columns)
filter_cols = [col for i,col in enumerate(filter_cols) if i % 2 == 0]
data.index.name = 'Audit Method'
data[filter_cols].round(2).to_latex(report_tabular_dir/'unconditional_mean_with_recount{}_tabular.tex'.format(election.n))

In [12]:
# 1. Basic 20000/2000 plot without replacement

# data
election = Election(20000, 2000, 0.5, 1, False)

data_type = 'power.csv'
data_path = calibrated_table_dir/str(election)/data_type
data = prep_legend(read_csv(data_path))
filter_cols = filter(lambda x:x>=0.5, data.columns)
filter_cols = [col for i,col in enumerate(filter_cols) if i % 2 == 0]
data.index.name = 'Audit Method'
data[filter_cols].round(2).to_latex(report_tabular_dir/'power{}_tabular.tex'.format(election.n))

data_type = 'unconditional_mean.csv'
data_path = calibrated_table_dir/str(election)/data_type
data = prep_legend(read_csv(data_path))
filter_cols = filter(lambda x:x>=0.5, data.columns)
filter_cols = [col for i,col in enumerate(filter_cols) if i % 2 == 0]
data.index.name = 'Audit Method'
data[filter_cols].round(2).to_latex(report_tabular_dir/'unconditional_mean{}_tabular.tex'.format(election.n))

data_type = 'unconditional_mean_with_recount.csv'
data_path = calibrated_table_dir/str(election)/data_type
data = prep_legend(read_csv(data_path))
filter_cols = filter(lambda x:x>=0.5, data.columns)
filter_cols = [col for i,col in enumerate(filter_cols) if i % 2 == 0]
data.index.name = 'Audit Method'
data[filter_cols].round(2).to_latex(report_tabular_dir/'unconditional_mean_with_recount{}_tabular.tex'.format(election.n))