In [None]:
import re
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
sns.set_palette("Set2")
sns.set_theme(style="whitegrid", font_scale=2.25)
print(sns.color_palette("muted").as_hex())

In [None]:
def add_star(p):
    sig = ' '
    if p < 0.05:
        sig = '*'
    if p < 0.01:
        sig = '**'
    if p < 0.001:
        sig = '***'
   
    return sig

## Tendency Patterns (BaseGap)

In [None]:
basegap_o = pd.read_csv("lme_results/offensive_basegap_ci_results2.csv")
basegap_o['label'] = basegap_o['label'].apply(lambda x: x.capitalize())
basegap_p = pd.read_csv("lme_results/polite_basegap_ci_results2.csv")
basegap_p['label'] = basegap_p['label'].apply(lambda x: x.capitalize())

basegap_o_gender = basegap_o[basegap_o['label']=='Woman']
basegap_p_gender = basegap_p[basegap_p['label']=='Woman']

options = ['Black', 'Asian']
basegap_o_ethnicity = basegap_o.loc[basegap_o['label'].isin(options)]
basegap_p_ethnicity = basegap_p.loc[basegap_p['label'].isin(options)]

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(29, 14),
                         gridspec_kw={'width_ratios': [1.3, 2], 'height_ratios': [1, 1]})

(sns.barplot(data=basegap_o_gender, ax=axes[0, 0],
              x='label', y='coef', hue='model',
              palette='deep', alpha=0.85)
.set(title="Offensiveness, Gender", xlabel="", ylabel="Change of Prediction Error"))

(sns.barplot(data=basegap_o_ethnicity, ax=axes[0, 1],
              x='label', y='coef', hue='model',
              palette='deep', alpha=0.85)
.set(title="Offensiveness, Ethnicity", xlabel="", ylabel=""))

(sns.barplot(data=basegap_p_gender, ax=axes[1, 0],
              x='label', y='coef', hue='model',
              palette='deep', alpha=0.85)
.set(title="Politeness, Gender", xlabel="", ylabel="Change of Prediction Error"))

(sns.barplot(data=basegap_p_ethnicity, ax=axes[1, 1],
              x='label', y='coef', hue='model',
              palette='deep', alpha=0.85)
.set(title="Politeness, Ethnicity", xlabel="", ylabel=""))

for ax in axes.flat:
    ax.get_legend().remove()
    ax.grid(False)
    ax.axhline(y=0, color='black', linestyle='dashed', alpha=.5)

axes[0,0].annotate("reference=Man", (-0.5,0.002), fontsize=17)
axes[1,0].annotate("reference=Man", (-0.5,0.002), fontsize=17)
axes[0,1].annotate("reference=White", (-0.5,-0.03), fontsize=17)
axes[1,1].annotate("reference=White", (-0.5,-0.03), fontsize=17)

handles, labels = axes[0,0].get_legend_handles_labels()
handles = handles
labels = ["FLAN-T5", "FLAN-UL2", "Tulu2-7B", "Tulu2-13B", "GPT3.5", "GPT4",
          "Llama3.1-8B", "Mistral0.3-7B", "Qwen2.5-7B"]
fig.legend(handles, labels, loc='upper center', ncol=9)

axes[0,0].set_ylim(-0.1, 0.05)
axes[1,0].set_ylim(-0.1, 0.05)
axes[0,1].set_ylim(-0.15, 0.4)
axes[1,1].set_ylim(-0.15, 0.4)
        
plt.subplots_adjust(hspace=0.3, wspace=0.2)

plt.savefig("plots/plot_v3_base_gap.pdf", format='pdf')

## Effect of Adding Identity in Prompt (AddGap)

In [None]:
addgap_o = pd.read_csv("lme_results/offensive_addgap_ci_results2.csv")
addgap_o['label'] = addgap_o['label'].apply(lambda x: x.capitalize())
addgap_p = pd.read_csv("lme_results/polite_addgap_ci_results2.csv")
addgap_p['label'] = addgap_p['label'].apply(lambda x: x.capitalize())

options = ['Man', 'Woman']
addgap_o_gender = addgap_o[addgap_o['label'].isin(options)]
addgap_p_gender = addgap_p[addgap_p['label'].isin(options)]

options = ['White', 'Black', 'Asian']
addgap_o_ethnicity = addgap_o.loc[addgap_o['label'].isin(options)]
addgap_p_ethnicity = addgap_p.loc[addgap_p['label'].isin(options)]

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(29, 13))

(sns.pointplot(data=addgap_o_gender, ax=axes[0, 0],
              x='label', y='coef', hue='model',
              errorbar=(lambda x: (x.min(), x.max())),
              palette='deep', linestyles='none', dodge=.4)
.set(title="Offensiveness, Gender", xlabel="", ylabel="Change of Prediction Error"))

(sns.pointplot(data=addgap_o_ethnicity, ax=axes[0, 1],
              x='label', y='coef', hue='model',
              errorbar=(lambda x: (x.min(), x.max())),
              palette='deep', linestyles='none', dodge=.4)
.set(title="Offensiveness, Ethnicity", xlabel="", ylabel=""))

(sns.pointplot(data=addgap_p_gender, ax=axes[1, 0],
              x='label', y='coef', hue='model',
              errorbar=(lambda x: (x.min(), x.max())),
              palette='deep', linestyles='none', dodge=.4)
.set(title="Politeness, Gender", xlabel="", ylabel="Change of Prediction Error"))

(sns.pointplot(data=addgap_p_ethnicity, ax=axes[1, 1],
              x='label', y='coef', hue='model',
              errorbar=(lambda x: (x.min(), x.max())),
              palette='deep', linestyles='none', dodge=.4)
.set(title="Politeness, Ethnicity", xlabel="", ylabel=""))

for ax in axes.flat:
    ax.get_legend().remove()
    ax.grid(False)
    ax.axhline(y=0, color='black', linestyle='dashed', alpha=.5)

handles, labels = axes[0,0].get_legend_handles_labels()
handles = handles
labels = ["FLAN-T5", "FLAN-UL2", "Tulu2-7B", "Tulu2-13B", "GPT3.5", "GPT4",
          "Llama3.1-8B", "Mistral0.3-7B", "Qwen2.5-7B"]
fig.legend(handles, labels, loc='upper center', ncol=9)


plot_dfs = [addgap_o_gender, addgap_o_ethnicity, addgap_p_gender, addgap_o_ethnicity]

for i in range(0, len(axes.flat)):
    plot_df = plot_dfs[i]
    plot_df = plot_df.drop_duplicates(subset=['model', 'label'], keep='first')
    pvalues = plot_df['p'].to_list()
    x_values = np.ma.concatenate([[path.get_offsets()[:,0] for path in axes.flat[i].collections][0],
                                [path.get_offsets()[:,0] for path in axes.flat[i].collections][1],
                                [path.get_offsets()[:,0] for path in axes.flat[i].collections][2],
                                [path.get_offsets()[:,0] for path in axes.flat[i].collections][3],
                                [path.get_offsets()[:,0] for path in axes.flat[i].collections][4],
                                [path.get_offsets()[:,0] for path in axes.flat[i].collections][5],
                                [path.get_offsets()[:,0] for path in axes.flat[i].collections][6],
                                [path.get_offsets()[:,0] for path in axes.flat[i].collections][7],
                                [path.get_offsets()[:,0] for path in axes.flat[i].collections][8]]).tolist()
    y_values = np.ma.concatenate([[path.get_offsets()[:,1] for path in axes.flat[i].collections][0],
                                [path.get_offsets()[:,1] for path in axes.flat[i].collections][1],
                                [path.get_offsets()[:,1] for path in axes.flat[i].collections][2],
                                [path.get_offsets()[:,1] for path in axes.flat[i].collections][3],
                                [path.get_offsets()[:,1] for path in axes.flat[i].collections][4],
                                [path.get_offsets()[:,1] for path in axes.flat[i].collections][5],
                                [path.get_offsets()[:,1] for path in axes.flat[i].collections][6],
                                [path.get_offsets()[:,1] for path in axes.flat[i].collections][7],
                                [path.get_offsets()[:,1] for path in axes.flat[i].collections][8]]).tolist()
    sigs = [add_star(p) for p in pvalues]

    for j, sig in enumerate(sigs):
        axes.flat[i].annotate(sig, (x_values[j]+0.005, y_values[j]+0.005),
                              fontsize=15, rotation=90)
        
plt.subplots_adjust(hspace=0.3)

plt.savefig("plots/plot_v3_add_gap.pdf", format='pdf')

## Distributions of Scores

In [None]:
results_o = pd.read_table("../LLM_pred/offensive_results_w.tsv")
results_p = pd.read_table("../LLM_pred/polite_results_w.tsv")

label_o = pd.melt(results_o.loc[:, 'label':'asian_score'], var_name='group', value_name='score')
label_o['dimension'] = 'Offensiveness'
label_p = pd.melt(results_p.loc[:, 'label':'asian_score'], var_name='group', value_name='score')
label_p['dimension'] = 'Politeness'
labels  = pd.concat([label_o, label_p], ignore_index=True)

In [None]:
p1 = sns.catplot(data=labels, x='group', y='score', col='dimension', kind='violin',
                 height=3, aspect=1.7, sharey=False)
p1.set_axis_labels("", "Scores")
p1.set_xticklabels(["US Population", "Man", "Woman", "White", "Black", "Asian"])
p1.set_titles("{col_name}")
p1.savefig("plots/plot_v1_score_dist.pdf", format='pdf')

## Robustness Check

In [None]:
corr_robust = pd.read_csv("../LLM_pred/corr_robust_l.csv")
corr_robust_o = corr_robust[corr_robust['dimension']=='offensiveness']
corr_robust_p = corr_robust[corr_robust['dimension']=='politeness']

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(21, 5.5))

col1 = ['#4878d0', '#ee854a', '#6acc64', '#dc7ec0', '#797979', '#d5bb67', '#82c6e2']

(sns.barplot(data=corr_robust_o,
            x='model', y='corr', hue='prompt',
            palette=col1, ax=axes[0])
.set(title="Offensiveness", xlabel="", ylabel="Correlation Coefficient"))

(sns.barplot(data=corr_robust_p,
            x='model', y='corr', hue='prompt',
            palette=col1, ax=axes[1])
.set(title="Politeness", xlabel="", ylabel="Correlation Coefficient"))

for ax in axes:
    ax.set_xticklabels(labels=["FLAN-T5", "FLAN-UL2", "Tulu2-7B", "Tulu2-13B"])
    ax.get_legend().remove()

handles1, labels1 = axes[0].get_legend_handles_labels()
labels = ['Prompt 1', 'Prompt 2', 'Prompt 3']
fig.legend(handles1, labels, loc='upper center', ncol=3)

plt.savefig("../results/plot_v1_robust.pdf", format='pdf')