# Auditing a Classifier for Fairness Based on Movement Patterns

In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np

from tqdm import tqdm

### Aux functions

In [None]:
# Simplest working version of the function, without particular optimizations.
def sequential_compute_in_out_probs(labels: np.ndarray, sel_ids: np.ndarray, 
                                    tot_num_els : int, tot_sum_labels : float) -> tuple[float, float]:
    """
    probs[i] = probability of object i
    ids = iterable of object IDs (indices)

    Returns:
        (mean_inside, mean_outside)
    """

    k = sel_ids.size
    sum_inside = labels.take(sel_ids).sum(dtype=np.uint32)
    mean_inside = sum_inside / k
    mean_outside = (tot_sum_labels - sum_inside) / (tot_num_els - k)
    
    return float(mean_inside), float(mean_outside)

### Main code

In [None]:
# Generate a dummy vector of labels 0/1
n_objects = 100000
labels = np.random.randint(0, 2, size=n_objects)

path_candidates = './data_simulator/huge_dataset/gencand/candidates_100_0.pkl'
candidates = pd.read_pickle(path_candidates)
candidates['list_users'] = candidates['list_users'].apply(lambda s: np.array(list(s)))
candidates


In [None]:

sum_all_labels = labels.sum()
tot_num_labels = len(labels)
for el in tqdm(candidates["list_users"], total=len(candidates["list_users"]), desc="Computing in/out probs"):
    # print(f"Selected IDs: {sel_ids}")
    mean_inside, mean_outside = sequential_compute_in_out_probs(labels, el, tot_num_labels, sum_all_labels)
    # print(f"Positive rate inside: {mean_inside}, Positive rate outside: {mean_outside}")