In [None]:
import sys
from pathlib import Path

parent_dir = str(Path().absolute().parent)
if parent_dir not in sys.path:
    sys.path.append(parent_dir)

%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from pathlib import Path
import seaborn as sns
import matplotlib.pyplot as plt
import pickle

# sns.set_context("paper")
sns.set_theme(
    style='ticks', 
    rc={
    "text.usetex": False,
    "font.family": "serif",
    "axes.grid": True,
    "lines.linewidth": 0.8,
    'axes.linewidth':0.8,
    'grid.linewidth': 0.8,
    'xtick.major.width': 0.8,
    'ytick.major.width': 0.8,
    'xtick.major.size': 3.0,
    'ytick.major.size': 3.0,
    "axes.spines.top": True,
    "axes.spines.right": True,
    "axes.edgecolor": "black"
    },
    palette='deep',
    )
sns.set_palette('Set2')


In [None]:
from src.utils import find_root


BASE_PATH = find_root()
DATASET_TAG = 'adult'
SENSITIVE_FEAT = 'sex'
knows = ['low', 'med']
clfs = ['GB_no_sensitive', 'RF_no_sensitive', 'LR_no_sensitive']

interventions = ['Female_to_Male', 'Male_to_Female']
cf_metrics = ['negative_to_positive_switch_rate', 'positive_to_negative_switch_rate']
metrics = [f'{i}.{m}' for i in interventions for m in cf_metrics]

## 1. Graph uncertainty

In [None]:
from src.causality.causal_world import  inspect_graph_uncertainty
cws_by_know = {}
for know in knows:
    fpath = BASE_PATH / 'output' / DATASET_TAG / know / 'causal_worlds.pkl'
    with open(fpath, 'rb') as f:
        cws_by_know[know] = pickle.load(f)

In [None]:
len(cws_by_know['low'])

In [None]:
# low knowledge
cws = cws_by_know['low']
print(inspect_graph_uncertainty(cws, SENSITIVE_FEAT))

In [None]:
# medium knowledge
cws = cws_by_know['med']
print(inspect_graph_uncertainty(cws, SENSITIVE_FEAT))

## 2. Feature variance

In [None]:
feat_var_by_know = {}
for know in knows:
    fpath = BASE_PATH / 'output' / DATASET_TAG / know / 'counterfactuals_summary' / 'feat_var_by_individual.csv'
    feat_var_by_know[know] = pd.read_csv(fpath)

In [None]:
print('Feature var, low knowledge:')
feat_var_by_know['low']

In [None]:
print('Feature var, med knowledge:')
feat_var_by_know['med']

## 3. Score variance

In [None]:
from notebooks.notebook_utils import load_score_variance_data


load_score_variance_data(BASE_PATH, DATASET_TAG, knows, clfs)

In [None]:
from notebooks.notebook_utils import load_score_variance_data
from src.plot.bar_charts import plot_score_variance


df_score_var = load_score_variance_data(BASE_PATH, DATASET_TAG, knows, clfs)
g = plot_score_variance(df_score_var, './adult_charts/adult_score_variance.pdf',)


## 4. CF metrics variance

In [None]:
from notebooks.notebook_utils import load_and_tidy_fairness_metrics
from src.plot.bar_charts import plot_counterfactual_metrics


df = load_and_tidy_fairness_metrics(BASE_PATH, DATASET_TAG, knows, clfs)
g = plot_counterfactual_metrics(df, interventions, 'adult_charts/adult_cf.pdf')
plt.show()


In [None]:
summary_df = (
    df.groupby(['Knowledge', 'intervention', 'metric', 'Classifier'])['Rate']
      .agg(mean_rate='mean',
           ci_low=lambda s: np.percentile(s, 2.5),   # 2.5 th percentile
           ci_high=lambda s: np.percentile(s, 97.5)) # 97.5 th percentile
      .reset_index()
)
summary_df.round(4).to_clipboard(sep=';')

## 5. Counterfactuals quality

In [None]:
cf_quality_by_know = {}
for know in knows:
    fpath = BASE_PATH / 'output' / DATASET_TAG / know / 'counterfactuals_summary' / 'counterfactuals_quality.csv'
    df = pd.read_csv(fpath)
    cols = [col for col in df.columns 
                            if col.endswith('coverage') or col.endswith('density')]
    df = df[cols]
    df['avg'] = df.T.mean()
    df.sort_values(by='avg', ascending=False, inplace=True)
    cf_quality_by_know[know] = df

In [None]:
cf_quality_by_know['low'].head()

In [None]:
cf_quality_by_know['med'].head()