In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import mixedlm


## Descriptives

### Distributions of POPQUORN datasets

In [None]:
offensive  = pd.read_table('../LLM_pred/offensive_results_w.tsv')
polite = pd.read_table('../LLM_pred/polite_results_w.tsv')

In [None]:
offensive.loc[:, 'label':'asian_score'].describe()

Unnamed: 0,label,man_score,woman_score,white_score,black_score,asian_score
count,1500.0,1483.0,1488.0,1500.0,1003.0,708.0
mean,1.880857,1.879838,1.89286,1.865309,2.095563,1.90572
std,0.756187,0.861071,0.89259,0.790384,1.269715,1.147322
min,1.0,1.0,1.0,1.0,1.0,1.0
25%,1.285714,1.166667,1.0,1.2,1.0,1.0
50%,1.7,1.666667,1.666667,1.714286,2.0,1.5
75%,2.285714,2.333333,2.333333,2.258929,3.0,2.5
max,4.75,5.0,5.0,5.0,5.0,5.0


In [None]:
polite.loc[:, 'label':'asian_score'].describe()

Unnamed: 0,label,man_score,woman_score,white_score,black_score,asian_score
count,3718.0,3660.0,3681.0,3717.0,2222.0,1327.0
mean,3.304835,3.317881,3.307059,3.303973,3.403255,3.196307
std,0.910449,0.955605,1.069658,0.946495,1.226647,1.181742
min,1.0,1.0,1.0,1.0,1.0,1.0
25%,2.714286,2.666667,2.5,2.666667,3.0,2.0
50%,3.428571,3.4,3.4,3.4,3.5,3.0
75%,4.0,4.0,4.0,4.0,4.333333,4.0
max,5.0,5.0,5.0,5.0,5.0,5.0


### Percentage of invalid responses

In [2]:
labels = ['base','male', 'female', 'white', 'black', 'asian']
invalid = {'offensive':{}, 'polite':{}}

for task in ['offensive', 'polite']:
    task_df = (pd.read_table(f'../LLM_pred/{task}_results_w2.tsv')
               .rename(columns={'label':'base_score', 'man_score':'male_score', 'woman_score':'female_score'}))

    for label in labels:
        subset_df = task_df.dropna(subset=[f'{label}_score'])
        col = (subset_df.loc[:, [f'flant5_xxl_{label}', f'flan_ul2_{label}', f'tulu2_7b_{label}', 
                                 f'tulu2_13b_{label}', f'gpt35_{label}', f'gpt4_{label}',
                                 f'llama31_8b_{label}', f'mistral03_7b_{label}', f'qwen25_7b_{label}']]
               .isna().sum() / subset_df.shape[0])
        invalid[f'{task}'].update({f'{label}':col.round(3).values})

In [3]:
(pd.DataFrame.from_dict(invalid['offensive']).T
 .set_axis(['flant5_xxl', 'flan_ul2', 'tulu2_7b', 'tulu2_13b', 'gpt35', 'gpt4',
            'llama31_8b', 'mistral_7b', 'qwen25_7b'], axis=1)).T

Unnamed: 0,base,male,female,white,black,asian
flant5_xxl,0.0,0.001,0.001,0.001,0.001,0.0
flan_ul2,0.0,0.0,0.0,0.0,0.002,0.001
tulu2_7b,0.075,0.022,0.036,0.063,0.143,0.153
tulu2_13b,0.02,0.032,0.032,0.034,0.2,0.13
gpt35,0.013,0.04,0.169,0.231,0.711,0.448
gpt4,0.0,0.0,0.0,0.0,0.0,0.0
llama31_8b,0.004,0.006,0.005,0.009,0.009,0.007
mistral_7b,0.043,0.035,0.039,0.135,0.243,0.137
qwen25_7b,0.007,0.006,0.007,0.009,0.013,0.01


In [4]:
(pd.DataFrame.from_dict(invalid['polite']).T
 .set_axis(['flant5_xxl', 'flan_ul2', 'tulu2_7b', 'tulu2_13b', 'gpt35', 'gpt4',
            'llama31_8b', 'mistral_7b', 'qwen25_7b'], axis=1)).T

Unnamed: 0,base,male,female,white,black,asian
flant5_xxl,0.0,0.0,0.0,0.0,0.0,0.0
flan_ul2,0.0,0.001,0.001,0.001,0.001,0.001
tulu2_7b,0.028,0.016,0.027,0.029,0.131,0.072
tulu2_13b,0.017,0.026,0.026,0.033,0.097,0.04
gpt35,0.001,0.001,0.001,0.003,0.065,0.002
gpt4,0.0,0.0,0.0,0.0,0.0,0.0
llama31_8b,0.0,0.0,0.001,0.001,0.002,0.002
mistral_7b,0.004,0.005,0.007,0.009,0.036,0.013
qwen25_7b,0.003,0.004,0.005,0.006,0.007,0.007


## Tendency to Which Identity (BaseGap)

In [9]:
gender = ['man', 'woman']
ethnicity = ['white', 'asian', 'black']
cats = [gender, ethnicity]

for task in ['offensive', 'polite']:
    gap_df = pd.read_table(f'../LLM_pred/{task}_gap_l2.tsv')
    gap_df = gap_df[gap_df['prompt']==0].dropna()
    gap_df['gap'] = gap_df['gap'].apply(abs)

    models = gap_df['model'].unique()
    stat_results = []

    for model in models:
        for cat in cats:
            df = gap_df[(gap_df['model']==model) & (gap_df['label'].isin(cat))]
            if cat==gender:
                lmm = mixedlm(data=df, formula="gap ~ C(label, Treatment(reference='man'))", groups=df['id'])
            else:
                lmm = mixedlm(data=df, formula="gap ~ C(label, Treatment(reference='white'))", groups=df['id'])
            result = lmm.fit()

            for i in range(1, len(cat)):
                stat_results.append({'model':model, 'label':cat[i], 'coef':round(result.params[i],3),
                                     'se':round(result.bse[i],3), 'p':round(result.pvalues[i],3)})
                #stat_results.append({'model':model, 'label':cat[i], 'coef':round(result.params[i],3), 'p':round(result.pvalues[i],3)})
                #stat_results.append({'model':model, 'label':cat[i], 'coef':round(result.conf_int()[0][i],3), 'p':round(result.pvalues[i],3)})
                #stat_results.append({'model':model, 'label':cat[i], 'coef':round(result.conf_int()[1][i],3), 'p':round(result.pvalues[i],3)})

    pd.DataFrame.from_records(stat_results).to_csv(f'lme_results/{task}_basegap_se_results2.csv', index=False)
    #pd.DataFrame.from_records(stat_results).to_csv(f'lme_results/{task}_basegap_ci_results2.csv', index=False)

## Effect of Adding Identity Prompt (AddGap)

In [2]:
for task in ['offensive', 'polite']:
    gap_df = pd.read_table(f'../LLM_pred/{task}_gap_l2.tsv')
    gap_df['id'] = gap_df['id'].astype('int')

    models = gap_df['model'].unique()
    labels = gap_df['label'].unique()
    stat_results = []

    for model in models:
        for label in labels:
            df = gap_df[(gap_df['model']==model) & (gap_df['label']==label)].dropna()
            df['gap'] = df['gap'].apply(abs)
            lmm = mixedlm(data=df, formula='gap ~ prompt', groups=df['id'])
            result = lmm.fit()
            stat_results.append({'model':model, 'label':label, 'coef':round(result.params[1],3),
                                 'se':round(result.bse[1],3), 'p':round(result.pvalues[1],3)})
            #stat_results.append({'model':model, 'label':label, 'coef':round(result.params[1],3), 'p':round(result.pvalues[1],3)})
            #stat_results.append({'model':model, 'label':label, 'coef':round(result.conf_int()[0][1],3), 'p':round(result.pvalues[1],3)})
            #stat_results.append({'model':model, 'label':label, 'coef':round(result.conf_int()[1][1],3), 'p':round(result.pvalues[1],3)})
    
    pd.DataFrame.from_records(stat_results).to_csv(f'lme_results/{task}_addgap_se_results2.csv', index=False)
    #pd.DataFrame.from_records(stat_results).to_csv(f'lme_results/{task}_addgap_ci_results2.csv', index=False)
