In [1]:
import os
import json

root_dir = "/scratch/yifwang/new_fairness_x_explainability/encoder_results"
models = ["bert", "roberta", "distilbert"]
bias_types = ["race", "gender", "religion"]
methods = ["Attention", "Saliency", "DeepLift", "InputXGradient", "IntegratedGradients", "Occlusion", "KernelShap"]
debiasing_methods = ["no_debiasing", "group_balance", "group_class_balance", "cda", "dropout", "attention_entropy", "causal_debias"]
training_types = ["all axes", "one axis"]
reliance_keys = ["raw", "max", "len", "norm"]
num_examples = {"race": 2000, "gender": 2000, "religion": 1000}

In [2]:
data_type = "all" # "race"
correlation_w_abs_dir = f"/nethome/yifwang/fairness_x_explainability/new_fairness_explainability/analysis/correlation_w_abs"
correlation_wo_abs_dir = f"/scratch/yifwang/new_fairness_x_explainability/encoder_results"

correlation_dict = {"debiasing_method": [], "explanation_method": [], "aggregation_method": [], "correlation": [], "reliance_method": []}
bias_type = "race"
model = "bert"


data_token = bias_type
for debiasing_method in debiasing_methods:
    for explanation_method in methods:
        file_path = os.path.join(correlation_wo_abs_dir, f"{model}_civil_{data_token}_{bias_type}_test_{num_examples[bias_type]}", debiasing_method, "correlation", f"correlation_{explanation_method}_{bias_type}_test.json")

        with open(file_path, "r") as f:
            correlation_data = json.load(f)
        for aggregation_method, value in correlation_data.items():
            for reliance_method in reliance_keys:
                correlation_dict["debiasing_method"].append(debiasing_method)
                correlation_dict["aggregation_method"].append(aggregation_method)
                correlation_dict["explanation_method"].append(explanation_method)
                correlation_dict["correlation"].append(value['abs_average'][reliance_method])
                correlation_dict["reliance_method"].append(reliance_method)

        file_path = os.path.join(correlation_w_abs_dir, f"{model}_civil_{data_token}_{bias_type}_test_{num_examples[bias_type]}", debiasing_method, "correlation", f"correlation_{explanation_method}_{bias_type}_test.json")
        with open(file_path, "r") as f:
            correlation_data = json.load(f)
        for aggregation_method, value in correlation_data.items():
            if "L2" in aggregation_method or "attention" in aggregation_method:
                continue
            for reliance_method in reliance_keys:
                correlation_dict["debiasing_method"].append(debiasing_method)
                correlation_dict["explanation_method"].append(explanation_method)
                correlation_dict["aggregation_method"].append(f"{aggregation_method}_abs")
                correlation_dict["correlation"].append(value['abs_average'][reliance_method])
                correlation_dict["reliance_method"].append(reliance_method)

In [3]:
# convert to a pandas DataFrame
import pandas as pd
correlation_df = pd.DataFrame(correlation_dict)
correlation_df

Unnamed: 0,debiasing_method,explanation_method,aggregation_method,correlation,reliance_method
0,no_debiasing,Attention,raw_attention,0.616991,raw
1,no_debiasing,Attention,raw_attention,0.620664,max
2,no_debiasing,Attention,raw_attention,0.432899,len
3,no_debiasing,Attention,raw_attention,0.628654,norm
4,no_debiasing,Attention,attention_rollout,0.585958,raw
...,...,...,...,...,...
527,causal_debias,KernelShap,ShapleyValue,0.143727,norm
528,causal_debias,KernelShap,ShapleyValue_abs,0.140331,raw
529,causal_debias,KernelShap,ShapleyValue_abs,0.094774,max
530,causal_debias,KernelShap,ShapleyValue_abs,0.212852,len


In [4]:
reliance_method = "raw"
methods_with_directions = ["Saliency", "DeepLift", "InputXGradient", "IntegratedGradients", "Occlusion"]
for debiasing_method in debiasing_methods:
    print("\n\n")
    print(f"Debiasing Method: {debiasing_method}")
    print("=" * 50)
    for explanation_method in methods_with_directions:
        avg_corr_df = correlation_df[(correlation_df["debiasing_method"] == debiasing_method) & (correlation_df["explanation_method"] == explanation_method) & (correlation_df["reliance_method"] == reliance_method)].groupby("aggregation_method")["correlation"].mean().reset_index()
        
        print(avg_corr_df)
        print("\n")




Debiasing Method: no_debiasing
  aggregation_method  correlation
0        Saliency_L2     0.495682
1      Saliency_mean     0.063675
2  Saliency_mean_abs     0.317893


  aggregation_method  correlation
0        DeepLift_L2     0.501819
1      DeepLift_mean     0.060396
2  DeepLift_mean_abs     0.260645


        aggregation_method  correlation
0        InputXGradient_L2     0.489937
1      InputXGradient_mean     0.296884
2  InputXGradient_mean_abs     0.381399


             aggregation_method  correlation
0        IntegratedGradients_L2     0.566025
1      IntegratedGradients_mean     0.453126
2  IntegratedGradients_mean_abs     0.510584


  aggregation_method  correlation
0          Occlusion     0.621426
1      Occlusion_abs     0.657727





Debiasing Method: group_balance
  aggregation_method  correlation
0        Saliency_L2     0.600200
1      Saliency_mean     0.179951
2  Saliency_mean_abs     0.428645


  aggregation_method  correlation
0        DeepLift_L2     0.555252
1