# Algorithm performance on credit card data under unobserved confounding (Figure 5)

In this notebook we produce plots to compare the performance of the algorithms presented in the paper in terms of utility for instances generated based on real data, when the probabilities $P(y=1|x)$ are affected by unobserved confounding. The resulting plots are presented in Figure 5.

**Required script execution:** confounding.sh

In [1]:
import os
os.chdir("../")

In [2]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
from matplotlib import container
plt.style.use('default')
import seaborn as sns
import glob
import json
from lib import utils
from matplotlib.colors import LogNorm
import matplotlib.ticker as mtick
import copy
import scipy
from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes
from mpl_toolkits.axes_grid1.inset_locator import mark_inset
sns.set_context("paper", font_scale=4)

In [3]:
name="confounding"
datas="credit"
gamma=0.8563985631883158
cost_method='max_percentile_shift'
final_df = []
cols = sns.color_palette("Set2",9)
col_dict = {"Non-Strategic": cols[0],
            "Bruteforce": cols[1],
            "Threshold": cols[2],
            "Iterative": cols[3],
            "Iterative (Optimized)": cols[4],
            "DP": cols[5]
           }
linestyles_dict = {"Non-Strategic": "-.",
            "Bruteforce": "-",
            "Threshold": "-",
            "Iterative": "--",
            "Iterative (Optimized)": "-.",
            "DP": ":"
           }
markers_dict = {"Non-Strategic": "v",
            "Bruteforce": "s",
            "Threshold": "^",
            "Iterative": "o",
            "Iterative (Optimized)": "P",
            "DP": "D"
           }

In [4]:
files = glob.glob("./outputs/optimal/{name}_*_data_{datas}_cost_{cost_method}_*_config.json".format(name=name, datas=datas, cost_method=cost_method))
data = []

for ind,fi in enumerate(files):
    with open(fi,"r") as file:
        parts = fi.split('_')
        algo = parts[1]
        level = parts[9]
        seed = parts[11]
        
        obj = json.load(file)
        if algo == 'thres':
            util = obj['strategic_threshold']
        elif algo =='iterative':
            util = obj['strategic']
            non_strategic_util = obj['non_strategic']
            
        data.append({'Algorithm': algo, 'level': level, 'seed': seed, 'utility': util})
        if algo == 'iterative':
            data.append({'Algorithm': 'non_strategic', 'level': level, 'seed': seed, 'utility': non_strategic_util})
        
df_standard = pd.DataFrame(data)

In [5]:
df_standard['Algorithm'] = df_standard['Algorithm'].str.replace("non_strategic", "Non-Strategic")
df_standard['Algorithm'] = df_standard['Algorithm'].str.replace("thres", "Threshold")
df_standard['Algorithm'] = df_standard['Algorithm'].str.replace("iterative", "Iterative")

In [6]:
df_standard['level'].unique()

array(['0.15', '0.55', '0.65', '0.45', '0.30', '0.95', '0.40', '0.10',
       '0.50', '0.20', '0.60', '0.90', '0.35', '0.80', '0.05', '0.75',
       '0.25', '0.00', '0.70', '0.85', '1.00'], dtype=object)

### Utility plot under confounding

In [7]:
utils.latexify(8,5, font_scale=3.0)
ax = plt.gca()
df = df_standard
df['level'] = pd.to_numeric(df['level'])
df['utility'] = pd.to_numeric(df['utility'])
df.sort_values(["Algorithm"], inplace=True)
algorithms=["Non-Strategic", "Threshold", "Iterative"]
colors = [col_dict[alg] for alg in algorithms]
markers = [markers_dict[alg] for alg in algorithms]
linestyles = [linestyles_dict[alg] for alg in algorithms]

lines={}
levels=sorted(df['level'].unique())
for alg in algorithms:
    lines[alg]=[]
    for x_val in levels:
        line_l=pd.DataFrame(df[(df['Algorithm']==alg) & (df['level']==x_val)], columns=['utility']).values.tolist()
        lines[alg].append((x_val,np.mean(line_l),np.std(line_l)))

ax=plt.subplot(111)
new_handles = []
for alg in algorithms:
    line_sorted=sorted(lines[alg], key=lambda x:x[0])
    x_vals=[x[0] for x in line_sorted]
    y_vals=[x[1] for x in line_sorted]
    std_vals=[x[2] for x in line_sorted]
    # h, = ax.plot(x_vals, y_vals, linestyle=linestyles_dict[alg], marker=markers_dict[alg], label=alg, markersize=9, color=col_dict[alg], linewidth=2)
    h,_,_ = ax.errorbar(x_vals, y_vals, yerr=std_vals, linestyle=linestyles_dict[alg], marker=markers_dict[alg],
    label=alg, markersize=9, color=col_dict[alg], linewidth=2) #linestyle=linestyles_dict[alg], marker=markers_dict[alg], label=alg, markersize=9, color=col_dict[alg], linewidth=2)
    new_handles.append(copy.copy(h))

for ind, alg in enumerate(algorithms):
    new_handles[ind].set_color('w')
    new_handles[ind].set_label(alg)
    new_handles[ind].set_markerfacecolor(col_dict[alg])
    new_handles[ind].set_markeredgecolor(col_dict[alg])
    new_handles[ind].set_markersize(9)
    new_handles[ind].set_markeredgewidth(1)

plt.legend(handles=new_handles, shadow=None, loc='upper right', labelspacing=0.1, handletextpad=0.3)
plt.ylabel("Utility, $u(\\pi,\\gamma)$")
plt.xlabel("$\\lambda$")
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
# ax.set_xscale('log')
ax.set_ylim([0.015, 0.035])
# plt.xticks(ticks=sorted(df['level'].unique()), labels=sorted(df['level'].unique()), rotation = 30)
plt.savefig("./figures/optimal/confounding.pdf", bbox_inches='tight', )
plt.clf()

### Plot of PDF of P(y | x -> x')

In [8]:
def confounding_to_beta(level, min_alphabeta=0.1, max_alphabeta=1.5):
    assert 0 <= level <= 1, 'level must be between 0 and 1'
    if level == 0.5:
        return (scipy.stats.beta(max_alphabeta, max_alphabeta), max_alphabeta, max_alphabeta)
    elif level < 0.5:
        alpha = max_alphabeta
        beta = (1 - 2 * level) * min_alphabeta + 2 * level * max_alphabeta
        return (scipy.stats.beta(alpha, beta), alpha, beta)
    else:
        alpha = (2 * level - 1) * min_alphabeta + (2 - 2 * level) * max_alphabeta
        beta = max_alphabeta
        return (scipy.stats.beta(alpha, beta), alpha, beta)

# Define the range of x values
x_values = np.linspace(0, 1, 1000)

levels = [0.1, 0.3, 0.5, 0.7, 0.9]
distributions = [confounding_to_beta(level) for level in levels]

utils.latexify(8,5, font_scale=3.0)
ax = plt.gca()
linestyles = ["-.", "-", ":", "--", "-"]
for ind, dist in enumerate(distributions):
    u_values = [x for x in x_values]
    pdf_values = dist[0].pdf(x_values)
    level = levels[ind]
    label = f"$\\lambda={level:.1f}$"
    maxv = pdf_values[1:-1].max()
    pdf_values /= maxv  # Normalize the PDF values
    ax.plot(x_values, pdf_values, label=label, color=cols[ind], linewidth=2, linestyle=linestyles[ind])
ax.legend(loc='right', bbox_to_anchor=(1.35, 0.5))
ax.set_xlabel("$P_V(y=1 \\,|\\, \\mathbf{x} \\rightarrow \\mathbf{x'})$")
ax.set_ylabel('Probability density')
ax.set_yticks([0, 1], labels=["0", "Max."])
ax.set_xticks([0, 1], labels=["$P(y=1 \\,|\\, \\mathbf{x})$", "$P(y=1 \\,|\\, \\mathbf{x'})$"])
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.savefig("./figures/optimal/confounding_distributions.pdf", bbox_inches='tight', )
plt.clf()