# fins auditing library

Import the fins package

In [1]:
import fins as fins

In [2]:
import pandas as pd
import numpy as np

Load in relevant data. NewZealand AirBnB data and North Carolina voter registration data. 

In [3]:
nz_data = pd.read_csv('newzealand.csv')

In [4]:
burke_county = pd.read_csv('geocoded_burke_county.csv')

## New Zealand AirBnB data
Prepare pool and subsets. New Zealand data is sorted in decreasing order by reviews_per_month. For the subset we take the 50 listings with the highest reviews_per_month.

Numeric encoding of hosts groups:
    SINGLE : 0
    SMALL : 1
    PROFESSIONAL : 2

In [5]:
nz_pool_items = np.arange(0, len(nz_data))
nz_pool_scores = np.array(nz_data.reviews_per_month)
nz_pool_groups = np.array(nz_data.host)
nz_num_items = len(nz_pool_items)
k_val = 50

nz_subset_items = nz_pool_items[:k_val]
nz_subset_scores = nz_pool_scores[:k_val]
nz_subset_groups = nz_pool_groups[:k_val]

### Relevance Parity

In [6]:
grp_relrate, rp_score = fins.relevance_parity(nz_pool_items, nz_pool_scores, nz_pool_groups, nz_subset_items, nz_subset_scores, nz_subset_groups)


Relevance rate for host groups.

In [7]:
print("SINGLE: ",  grp_relrate[0], "SMALL: ",  grp_relrate[1], "PROFESSIONAL: ",  grp_relrate[2]) #group measures

SINGLE:  13.858374754310928 SMALL:  7.830223045038759 PROFESSIONAL:  6.376492763156077


Relevance parity score.

In [8]:
print("Relevance parity: ",  rp_score)

Relevance parity:  0.4601183671391582


### Qualified Parity 

In [9]:
q_val = 1.77 #qualified reviews_per_mot=nth
grp_qsrate, qp_score = fins.qualififed_parity(nz_pool_items, nz_pool_scores, nz_pool_groups, nz_subset_items, nz_subset_scores, nz_subset_groups, q_val)


Qualified selection rate for host groups.

In [10]:
print("SINGLE: ",  grp_qsrate[0], "SMALL: ",  grp_qsrate[1], "PROFESSIONAL: ",  grp_qsrate[2]) #group measures

SINGLE:  0.004140786749482402 SMALL:  0.0053639846743295016 PROFESSIONAL:  0.0042417815482502655


Qualified parity score.

In [11]:
print("Qualified parity: ",  qp_score)

Qualified parity:  0.7719609582963621


### Qualified Balance

In [12]:
grp_qprop, rb_score = fins.qualified_balance(nz_pool_items, nz_pool_scores, nz_pool_groups, nz_subset_items, nz_subset_scores, nz_subset_groups, q_val)


Qualified proportion of subset for host groups.

In [13]:
print("SINGLE: ",  grp_qprop[0], "SMALL: ",  grp_qprop[1], "PROFESSIONAL: ",  grp_qprop[2]) #group measures

SINGLE:  0.56 SMALL:  0.28 PROFESSIONAL:  0.16


Qualified balance score.

In [14]:
print("Qualified balance: ",  rb_score)

Qualified balance:  0.2857142857142857


### Calibrated Parity

In [15]:
lb_bin = np.asarray([0, 1, 5,20, 100])
ub_bin = np.asarray([1, 5, 20, 100, 200])

bin_group_selectr, cp_val = fins.calibrated_parity(nz_pool_items, nz_pool_scores, nz_pool_groups, nz_subset_items, nz_subset_scores, nz_subset_groups, lb_bin, ub_bin)

In [16]:
nz_subset_scores

array([191.14, 154.  , 102.32,  97.92,  85.5 ,  78.63,  72.58,  71.05,
        53.28,  47.7 ,  47.  ,  42.66,  42.41,  42.12,  42.  ,  40.21,
        39.25,  39.  ,  36.94,  35.75,  35.69,  35.53,  35.4 ,  35.04,
        34.5 ,  34.18,  33.48,  33.28,  32.21,  32.  ,  31.18,  30.77,
        29.  ,  28.5 ,  27.19,  26.25,  25.91,  25.54,  25.48,  24.55,
        24.48,  24.16,  23.65,  23.08,  23.  ,  22.56,  21.49,  21.29,
        19.67,  19.54])

Bin selection rates for host groups.

In [17]:
print("BIN 0 -- ", "SINGLE: ",  bin_group_selectr[0,0], "SMALL: ",  bin_group_selectr[0,1], "PROFESSIONAL: ",  bin_group_selectr[0,2]) #group measures bin 0
print("BIN 1 -- ", "SINGLE: ",  bin_group_selectr[1,0], "SMALL: ",  bin_group_selectr[1,1], "PROFESSIONAL: ",  bin_group_selectr[1,2]) #group measures bin 1
print("BIN 2 -- ", "SINGLE: ",  bin_group_selectr[2,0], "SMALL: ",  bin_group_selectr[2,1], "PROFESSIONAL: ",  bin_group_selectr[2,2]) #group measures bin 2
print("BIN 3 -- ", "SINGLE: ",  bin_group_selectr[3,0], "SMALL: ",  bin_group_selectr[3,1], "PROFESSIONAL: ",  bin_group_selectr[3,2]) #group measures bin 3


BIN 0 --  SINGLE:  0.0 SMALL:  0.0 PROFESSIONAL:  0.0
BIN 1 --  SINGLE:  0.0 SMALL:  0.0 PROFESSIONAL:  0.0
BIN 2 --  SINGLE:  0.0006561679790026247 SMALL:  0.0 PROFESSIONAL:  0.002976190476190476
BIN 3 --  SINGLE:  1.0 SMALL:  1.0 PROFESSIONAL:  1.0


Calibrated Parity score.

In [18]:
print("Calibrated parity: ",  cp_val)

Calibrated parity:  0.0


### Calibrated Balance

In [19]:
lb_bin = np.asarray([0, 1, 5,20, 100])
ub_bin = np.asarray([1, 5, 20, 100, 200])

bin_group_proportions, cb_val = fins.calibrated_balance(nz_pool_items, nz_pool_scores, nz_pool_groups, nz_subset_items, nz_subset_scores, nz_subset_groups, lb_bin, ub_bin)

Bin proportion of subset for host groups.

In [22]:
print("BIN 0 -- ", "SINGLE: ",  bin_group_proportions[0,0], "SMALL: ",  bin_group_proportions[0,1], "PROFESSIONAL: ",  bin_group_proportions[0,2]) #group measures bin 0
print("BIN 1 -- ", "SINGLE: ",  bin_group_proportions[1,0], "SMALL: ",  bin_group_proportions[1,1], "PROFESSIONAL: ",  bin_group_proportions[1,2]) #group measures bin 1
print("BIN 2 -- ", "SINGLE: ",  bin_group_proportions[2,0], "SMALL: ",  bin_group_proportions[2,1], "PROFESSIONAL: ",  bin_group_proportions[2,2]) #group measures bin 2
print("BIN 3 -- ", "SINGLE: ",  bin_group_proportions[3,0], "SMALL: ",  bin_group_proportions[3,1], "PROFESSIONAL: ",  bin_group_proportions[3,2]) #group measures bin 3


BIN 0 --  SINGLE:  0.0 SMALL:  0.0 PROFESSIONAL:  0.0
BIN 1 --  SINGLE:  0.0 SMALL:  0.0 PROFESSIONAL:  0.0
BIN 2 --  SINGLE:  0.02040816326530612 SMALL:  0.0 PROFESSIONAL:  0.02040816326530612
BIN 3 --  SINGLE:  0.5306122448979592 SMALL:  0.2653061224489796 PROFESSIONAL:  0.12244897959183673


Calibrated balance score

In [23]:
print("Calibrated balance: ",  cb_val)

Calibrated balance:  0.0


## NC Burke County voter data
Prepare pool and subsets.
Numeric encoding of Groups .

Party groups: DEMOCRAT : 0 REPUBLICAN 2 (but recoded to 1 below)

Race groups: ASIAN : 0
BLACK : 1
AMINDIAN : 2
MULTI : 3
OTHER : 4
UNDESIGNATED : 5
WHITE : 6

In [24]:
###This is the data (only dem and rep values) used to determine parity and balance
burke_county = pd.read_csv('geocoded_burke_county.csv', header=0, na_values='?')

np_voter_lat = np.array(burke_county["voter_lat"])
np_voter_long = np.array(burke_county["voter_long"])
voter_geometry = np.vstack((np_voter_lat, np_voter_long)).transpose()

np_precint_lat = np.array(burke_county["precinct_lat"])
np_precint_long = np.array(burke_county["precinct_long"])
precinct_geometry = np.vstack((np_precint_lat, np_precint_long)).transpose()
distance = np.full_like(np_precint_lat, np.Inf)
for i in range(0, len(precinct_geometry)):
    distance[i] = np.linalg.norm(precinct_geometry[i] - voter_geometry[i])


burke_county['d_to_precinct'] = distance

precincts = np.unique(burke_county['precinct_id']).tolist()

unique_race_groups = np.unique(burke_county['race_id'].to_numpy())
dems_reps = [0,2]  #will need to recode to consecutive integers 
burke_county_dems_rs = burke_county.query("party_cd == @dems_reps")
pool_items = np.arange(0,burke_county_dems_rs.shape[0])
pool_party_groups = burke_county_dems_rs['party_cd'].to_numpy()
pool_race_groups = burke_county_dems_rs['race_id'].to_numpy()
prec_i = 2 #use second precint

precinct_data = burke_county.query("precinct_id == @prec_i")
precinct_data = precinct_data.query("party_cd == @dems_reps")
subset_scores = precinct_data['d_to_precinct'].to_numpy()
subset_items = np.arange(0, precinct_data.shape[0])
subset_groups = precinct_data['party_cd'].to_numpy()
subset_race_groups = precinct_data['race_id'].to_numpy()
#Need to recode groups to consecutive integers
present_grps = np.unique(pool_party_groups)
# num_present_grps = len(present_grps)
# recoded_present_groups = np.arange(0,num_present_grps)
recoded_subset_groups = np.array([np.argwhere(present_grps == item)[0][0] for item in subset_groups.tolist()])
recoded_pool_groups = np.array([np.argwhere(present_grps == item)[0][0] for item in pool_party_groups.tolist()])




In [25]:
prec_i = 2 #use second precint

### Parity

In [26]:
grp_selectrt, par_val = fins.parity(pool_items, recoded_pool_groups, subset_items, recoded_subset_groups)

Select rate for party groups.

In [27]:
print("DEMOCRAT: ",  grp_selectrt[0], "REPUBLICAN: ",  grp_selectrt[1]) #group measures

DEMOCRAT:  0.0357609710550887 REPUBLICAN:  0.031134473197781884


Parity score

In [28]:
print("Parity: ",  par_val)

Parity:  0.8706271747995927


### Balance

In [29]:
grp_propofs, bal_val = fins.balance(recoded_pool_groups, subset_items, recoded_subset_groups)

Group proportion of the subset for party groups

In [30]:
print("DEMOCRAT: ",  grp_propofs[0], "REPUBLICAN: ",  grp_propofs[1]) #group measures

DEMOCRAT:  0.4154013015184382 REPUBLICAN:  0.5845986984815619


Balance score

In [31]:
print("Balance: ",  bal_val)

Balance:  0.7105751391465677


### Conditioned Parity
Utilize party groups, but condition on white race group (6).

In [32]:
condgrp_selectrt, cpar_val = fins.cond_parity(pool_items, recoded_pool_groups,pool_race_groups, subset_items, recoded_subset_groups, subset_race_groups,6)


Conditioned select rate for party groups.

In [33]:
print("DEMOCRAT | WHITE: ",  condgrp_selectrt[0], "REPUBLICAN |WHITE: ",  condgrp_selectrt[1]) #group measures

DEMOCRAT | WHITE:  0.03538033864038413 REPUBLICAN |WHITE:  0.03101744731411419


Conditioned parity score

In [34]:
print("Conditioned parity: ",  cpar_val)

Conditioned parity:  0.8766859930139275


### Conditioned Balance
Utilize party groups, but condition on white race group (6).

In [35]:

condgrp_props, cbal_val = fins.cond_balance(recoded_pool_groups,subset_items, recoded_subset_groups, subset_race_groups,6)

Conditioned group proportion of the subset for party groups

In [36]:
print("DEMOCRAT | WHITE: ",  condgrp_props[0], "REPUBLICAN |WHITE: ",  condgrp_props[1]) #group measures

DEMOCRAT | WHITE:  0.36082474226804123 REPUBLICAN |WHITE:  0.6391752577319587


Conditioned balance score

In [37]:
print("Conditioned balance: ",  cpar_val)

Conditioned balance:  0.8766859930139275


### Score Parity
Score parity for race groups in precint 2 (aka 0400), note that this is for the same data the is limited to dems and republicans, this does not also include inxdependents. The AIES2022 experiment does include independents. 

In [38]:
avgdist, scorep_val = fins.score_parity(subset_items, subset_scores, subset_race_groups)

Average distance to pool for racial groups.

In [39]:
print("ASIAN: ", avgdist[0], "BLACK: ", avgdist[1], "AMINDIAN : ", avgdist[2], "MULTI : ", avgdist[3], "OTHER : ", avgdist[4], "UNDESIGNATED : ", avgdist[5], "WHITE : ", avgdist[6]) #group measures

ASIAN:  0.032299593893750034 BLACK:  0.013103943314941883 AMINDIAN :  0.018822665713235855 MULTI :  0.03709304812387848 OTHER :  0.017150098003811613 UNDESIGNATED :  0.028891044362378757 WHITE :  0.029753477561497682


Score parity value

In [40]:
print("Score parity: ",  scorep_val)

Score parity:  0.353272216162421


### Score Balance
Score balance for race groups in precint 2 (aka 0400), note that this is for the same data the is limited to dems and republicans, this does not also include inxdependents. The AIES2022 experiment does include independents. 

In [41]:
totalscore, sb_val = fins.score_balance(subset_items, subset_scores, subset_race_groups)

Total distance to pool for racial groups.

In [42]:
print("ASIAN: ", totalscore[0], "BLACK: ", totalscore[1], "AMINDIAN : ", totalscore[2], "MULTI : ", totalscore[3], "OTHER : ", totalscore[4], "UNDESIGNATED : ", totalscore[5], "WHITE : ", totalscore[6]) #group measures

ASIAN:  0.35529553283125037 BLACK:  0.8779642021011062 AMINDIAN :  0.03764533142647171 MULTI :  0.07418609624775696 OTHER :  0.30870176406860905 UNDESIGNATED :  1.3289880406694228 WHITE :  23.088698587722202


Score balance value

In [43]:
print("Score balance: ",  sb_val)

Score balance:  0.001630465713926823
