# Algorithm performance on credit card data under misspecifications (Figure 4)

In this notebook we produce plots to compare the performance of the algorithms presented in the paper in terms of utility for instances generated based on real data, when the probabilities $P(y=1|x)$ and the costs $c(x_i, x_j)$ are misspecified. The resulting plots are presented in Figure 4.

**Required script execution:** noisy.sh

In [1]:
import os
os.chdir("../")

In [2]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
from matplotlib import container
plt.style.use('default')
import seaborn as sns
import glob
import json
from lib import utils
from matplotlib.colors import LogNorm
import matplotlib.ticker as mtick
import copy
from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes
from mpl_toolkits.axes_grid1.inset_locator import mark_inset
sns.set_context("paper", font_scale=4)

In [3]:
name="noisy"
datas="credit"
gamma=0.8563985631883158
cost_method='max_percentile_shift'
final_df = []
cols = sns.color_palette("Set2",9)
col_dict = {"Non-Strategic": cols[0],
            "Bruteforce": cols[1],
            "Threshold": cols[2],
            "Iterative": cols[3],
            "Iterative (Optimized)": cols[4],
            "DP": cols[5]
           }
linestyles_dict = {"Non-Strategic": "-.",
            "Bruteforce": "-",
            "Threshold": "-",
            "Iterative": "--",
            "Iterative (Optimized)": "-.",
            "DP": ":"
           }
markers_dict = {"Non-Strategic": "v",
            "Bruteforce": "s",
            "Threshold": "^",
            "Iterative": "o",
            "Iterative (Optimized)": "P",
            "DP": "D"
           }

#### Noisy P(y|x) utility plot

In [4]:
files = glob.glob("./outputs/optimal/{name}_*_data_{datas}_cost_{cost_method}_noisy_pyx_*_config.json".format(name=name, datas=datas, cost_method=cost_method))
data = []

for ind,fi in enumerate(files):
    with open(fi,"r") as file:
        parts = fi.split('_')
        algo = parts[1]
        type_of_noise = parts[9]
        beta = parts[11]
        seed = parts[13]

        obj = json.load(file)
        if algo == 'thres':
            util = obj['strategic_threshold']
        elif algo =='iterative':
            util = obj['strategic']
            non_strategic_util = obj['non_strategic']
            
        data.append({'Algorithm': algo, 'beta': beta, 'seed': seed, 'utility': util})
        if algo == 'iterative':
            data.append({'Algorithm': 'non_strategic', 'beta': beta, 'seed': seed, 'utility': non_strategic_util})
        
df_standard = pd.DataFrame(data)

In [5]:
df_standard['Algorithm'] = df_standard['Algorithm'].str.replace("non_strategic", "Non-Strategic")
df_standard['Algorithm'] = df_standard['Algorithm'].str.replace("thres", "Threshold")
df_standard['Algorithm'] = df_standard['Algorithm'].str.replace("iterative", "Iterative")

In [6]:
utils.latexify(8,5, font_scale=3.0)
ax = plt.gca()
df = df_standard
df['beta'] = pd.to_numeric(df['beta'])
df['utility'] = pd.to_numeric(df['utility'])
df.sort_values(["Algorithm"], inplace=True)
algorithms=["Non-Strategic", "Threshold", "Iterative"]
colors = [col_dict[alg] for alg in algorithms]
markers = [markers_dict[alg] for alg in algorithms]
linestyles = [linestyles_dict[alg] for alg in algorithms]

lines={}
betas=sorted(df['beta'].unique())
for alg in algorithms:
    lines[alg]=[]
    for x_val in betas:
        line_l=pd.DataFrame(df[(df['Algorithm']==alg) & (df['beta']==x_val)], columns=['utility']).values.tolist()
        lines[alg].append((x_val,np.mean(line_l),np.std(line_l)))

ax=plt.subplot(111)
new_handles = []
for alg in algorithms:
    line_sorted=sorted(lines[alg], key=lambda x:x[0])
    x_vals=[x[0] for x in line_sorted]
    y_vals=[x[1] for x in line_sorted]
    std_vals=[x[2] for x in line_sorted]
    # h, = ax.plot(x_vals, y_vals, linestyle=linestyles_dict[alg], marker=markers_dict[alg], label=alg, markersize=9, color=col_dict[alg], linewidth=2)
    h,_,_ = ax.errorbar(x_vals, y_vals, yerr=std_vals, linestyle=linestyles_dict[alg], marker=markers_dict[alg],
    label=alg, markersize=9, color=col_dict[alg], linewidth=2) #linestyle=linestyles_dict[alg], marker=markers_dict[alg], label=alg, markersize=9, color=col_dict[alg], linewidth=2)
    new_handles.append(copy.copy(h))

for ind, alg in enumerate(algorithms):
    new_handles[ind].set_color('w')
    new_handles[ind].set_label(alg)
    new_handles[ind].set_markerfacecolor(col_dict[alg])
    new_handles[ind].set_markeredgecolor(col_dict[alg])
    new_handles[ind].set_markersize(9)
    new_handles[ind].set_markeredgewidth(1)

plt.legend(handles=new_handles, shadow=None, loc='upper right', labelspacing=0.1, handletextpad=0.3)
plt.ylabel("Utility, $u(\\pi,\\gamma)$")
plt.xlabel("$\\phi$")
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.set_xscale('log')
ax.set_ylim([0, 0.05])
plt.xticks(ticks=sorted(df['beta'].unique()), labels=sorted(df['beta'].unique()), rotation = 30)
plt.savefig("./figures/optimal/noisy_pyx.pdf", bbox_inches='tight', )
plt.clf()

#### Noisy cost utility plot

In [7]:
files = glob.glob("./outputs/optimal/{name}_*_data_{datas}_cost_{cost_method}_noisy_cost_*_config.json".format(name=name, datas=datas, cost_method=cost_method))
data = []

for ind,fi in enumerate(files):
    with open(fi,"r") as file:
        parts = fi.split('_')
        algo = parts[1]
        type_of_noise = parts[9]
        beta = parts[11]
        seed = parts[13]
        
        obj = json.load(file)
        if algo == 'thres':
            util = obj['strategic_threshold']
        elif algo =='iterative':
            util = obj['strategic']
            non_strategic_util = obj['non_strategic']
        
        data.append({'Algorithm': algo, 'beta': beta, 'seed': seed, 'utility': util})
        if algo == 'iterative':
            data.append({'Algorithm': 'non_strategic', 'beta': beta, 'seed': seed, 'utility': non_strategic_util})
        

df_standard = pd.DataFrame(data)

In [8]:
df_standard['Algorithm'] = df_standard['Algorithm'].str.replace("non_strategic", "Non-Strategic")
df_standard['Algorithm'] = df_standard['Algorithm'].str.replace("thres", "Threshold")
df_standard['Algorithm'] = df_standard['Algorithm'].str.replace("iterative", "Iterative")

In [9]:
utils.latexify(8,5, font_scale=3.0)
ax = plt.gca()
df = df_standard
df['beta'] = pd.to_numeric(df['beta'])
df['utility'] = pd.to_numeric(df['utility'])
df.sort_values(["Algorithm"], inplace=True)
algorithms=["Non-Strategic", "Threshold", "Iterative"]
colors = [col_dict[alg] for alg in algorithms]
markers = [markers_dict[alg] for alg in algorithms]
linestyles = [linestyles_dict[alg] for alg in algorithms]

lines={}
betas=sorted(df['beta'].unique())
for alg in algorithms:
    lines[alg]=[]
    for x_val in betas:
        line_l=pd.DataFrame(df[(df['Algorithm']==alg) & (df['beta']==x_val)], columns=['utility']).values.tolist()
        lines[alg].append((x_val,np.mean(line_l),np.std(line_l)))

ax=plt.subplot(111)
new_handles = []
for alg in algorithms:
    line_sorted=sorted(lines[alg], key=lambda x:x[0])
    x_vals=[x[0] for x in line_sorted]
    y_vals=[x[1] for x in line_sorted]
    std_vals=[x[2] for x in line_sorted]
    # h, = ax.plot(x_vals, y_vals, linestyle=linestyles_dict[alg], marker=markers_dict[alg], label=alg, markersize=9, color=col_dict[alg], linewidth=2)
    h,_,_ = ax.errorbar(x_vals, y_vals, yerr=std_vals, linestyle=linestyles_dict[alg], marker=markers_dict[alg],
    label=alg, markersize=9, color=col_dict[alg], linewidth=2) #linestyle=linestyles_dict[alg], marker=markers_dict[alg], label=alg, markersize=9, color=col_dict[alg], linewidth=2)
    new_handles.append(copy.copy(h))

for ind, alg in enumerate(algorithms):
    new_handles[ind].set_color('w')
    new_handles[ind].set_label(alg)
    new_handles[ind].set_markerfacecolor(col_dict[alg])
    new_handles[ind].set_markeredgecolor(col_dict[alg])
    new_handles[ind].set_markersize(9)
    new_handles[ind].set_markeredgewidth(1)

plt.legend(handles=new_handles, shadow=None, loc='upper right', labelspacing=0.1, handletextpad=0.3)
plt.ylabel("Utility, $u(\\pi,\\gamma)$")
plt.xlabel("$\\phi$")
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.set_xscale('log')
ax.set_ylim([0.015, 0.042])
plt.xticks(ticks=sorted(df['beta'].unique()), labels=sorted(df['beta'].unique()), rotation = 30)
plt.savefig("./figures/optimal/noisy_cost.pdf", bbox_inches='tight', )
plt.clf()