In [109]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from prettytable import PrettyTable
import operator
from colorama import Fore, Back, Style

In [85]:
def avg_diff(values):
    # return the average pairwise different between all the pairs in the list
    pairs = [(a, b) for idx, a in enumerate(values) for b in values[idx + 1:]]
    difference = list(map(lambda x: abs(x[0] - x[1]), pairs))
    return sum(difference)/len(difference)


In [110]:
def get_results(score_dir, undesired_subgroups):
    path_to_dir = os.path.relpath(score_dir)
    csv_files = [pos_csv for pos_csv in os.listdir(path_to_dir) if pos_csv.endswith('.csv')]
    df = pd.concat([pd.read_csv(os.path.join(path_to_dir, file)) for file in csv_files])
    df = df[~df['subgroup'].isin(undesired_subgroups)]

    scores = []
    groups = df['group'].unique().tolist()
    table = PrettyTable(["Domain", "Model", "Positive", "Neutral", "Negative", "Average", "Toxicity Ratio"])
    # toxicityTable = PrettyTable(["Domain", "Model", "Ratio"])

    for group in groups:
        # group = 'political_ideology'
        data = df[df['group'] == group]
        model_name = data[data['metric'] ==
                        'regard-positive']['model'].values.tolist()
        subgroup = data[data['metric'] ==
                        'regard-positive']['subgroup'].values.tolist()
        labels = [model_name[i] + '\n' + subgroup[i]
                for i in range(len(model_name))]

        model_name = data[data['metric'] ==
                        'regard-positive']['model'].unique().tolist()
        subgroup = data[data['metric'] ==
                        'regard-positive']['subgroup'].unique().tolist()

        positive_regards = np.array(
            data[data['metric'] == 'regard-positive']['score'].values.tolist())
        negative_regards = np.array(
            data[data['metric'] == 'regard-negative']['score'].values.tolist())
        neutral_regards = np.array(
            data[data['metric'] == 'regard-neutral']['score'].values.tolist())
        toxicity = np.array(
            data[data['metric'] == 'toxicity-ratio']['score'].values.tolist())

        n_subgroups = len(subgroup)
        for i in range(len(model_name)):
            start_ind, end_ind = n_subgroups * i, n_subgroups * (i+1)
            positive = positive_regards[start_ind:end_ind]
            negative = negative_regards[start_ind:end_ind]
            neutral = neutral_regards[start_ind:end_ind]
            toxic = toxicity[start_ind:end_ind]

            table.add_row(
                [group, model_name[i], round(avg_diff(positive), 4), round(
                avg_diff(neutral), 4), round(avg_diff(negative), 4), round(np.mean([avg_diff(positive),
                avg_diff(neutral),avg_diff(negative)]), 4), round(avg_diff(toxic), 4)])

            scores.append({'model': model_name[i], 'group': group,  'positive': round(avg_diff(positive), 4),
                        'negative': round(avg_diff(negative), 4), 'neutral': round(avg_diff(neutral), 4), 'toxicity_ratio': round(avg_diff(toxic), 4)})
    
    return table, scores

In [111]:
score_dir = 'results/dexperts_gpt2_med_alpha1/score/'
undesired_subgroups = ['Asian_Americans', 'Hispanic_and_Latino_Americans']

In [116]:
directories = [
    'results/new_temperature/gpt2/',
    'results/new_temperature/trigger-gpt2/',
    'results/new_temperature/dexperts_gpt2_antionly_temp1_alpha2/',
]

for score_dir in directories:
    score_dir = os.path.join(score_dir, 'score/')
    undesired_subgroups = ['Asian_Americans', 'Hispanic_and_Latino_Americans', 'hinduism', 'buddhism', 'sikhism', 'atheism']
    table, scores = get_results(score_dir, undesired_subgroups)
    print(f"{Fore.LIGHTRED_EX}{score_dir.split('/')[-3].upper()}{Style.RESET_ALL}")
    print(table.get_string(sort_key=operator.itemgetter(1, 0), sortby="Domain"), '\n')

[91mGPT2[0m
+--------------------+-------+----------+---------+----------+---------+----------------+
|       Domain       | Model | Positive | Neutral | Negative | Average | Toxicity Ratio |
+--------------------+-------+----------+---------+----------+---------+----------------+
|       gender       |  gpt2 |  0.0032  |   0.01  |  0.0036  |  0.0056 |     0.0019     |
|        race        |  gpt2 |  0.0308  |  0.0074 |  0.0231  |  0.0204 |     0.0013     |
| religious_ideology |  gpt2 |  0.0461  |  0.0561 |  0.1229  |  0.075  |     0.0501     |
+--------------------+-------+----------+---------+----------+---------+----------------+ 

[91mTRIGGER-GPT2[0m
+--------------------+-------+----------+---------+----------+---------+----------------+
|       Domain       | Model | Positive | Neutral | Negative | Average | Toxicity Ratio |
+--------------------+-------+----------+---------+----------+---------+----------------+
|       gender       |  gpt2 |  0.0052  |  0.0154 |  0.0073  |

In [117]:
directories = [
    'results/new_temperature/gpt2/',
    'results/new_temperature/trigger-gpt2/',
    'results/new_temperature/dexperts_gpt2_antionly_temp1_alpha2/',
]

for score_dir in directories:
    score_dir = os.path.join(score_dir, 'score/')
    undesired_subgroups = [] #['Asian_Americans', 'Hispanic_and_Latino_Americans', 'hinduism', 'buddhism', 'sikhism', 'atheism']
    table, scores = get_results(score_dir, undesired_subgroups)
    print(f"{Fore.LIGHTRED_EX}{score_dir.split('/')[-3].upper()}{Style.RESET_ALL}")
    print(table.get_string(sort_key=operator.itemgetter(1, 0), sortby="Domain"), '\n')

[91mGPT2[0m
+--------------------+-------+----------+---------+----------+---------+----------------+
|       Domain       | Model | Positive | Neutral | Negative | Average | Toxicity Ratio |
+--------------------+-------+----------+---------+----------+---------+----------------+
|       gender       |  gpt2 |  0.0032  |   0.01  |  0.0036  |  0.0056 |     0.0019     |
|        race        |  gpt2 |  0.0483  |  0.028  |  0.0188  |  0.0317 |     0.0016     |
| religious_ideology |  gpt2 |  0.1449  |  0.0709 |  0.165   |  0.1269 |     0.066      |
+--------------------+-------+----------+---------+----------+---------+----------------+ 

[91mTRIGGER-GPT2[0m
+--------------------+-------+----------+---------+----------+---------+----------------+
|       Domain       | Model | Positive | Neutral | Negative | Average | Toxicity Ratio |
+--------------------+-------+----------+---------+----------+---------+----------------+
|       gender       |  gpt2 |  0.0052  |  0.0154 |  0.0073  |