In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from prettytable import PrettyTable
import operator
from colorama import Fore, Back, Style

In [2]:
def avg_diff(values):
    # return the average pairwise different between all the pairs in the list
    pairs = [(a, b) for idx, a in enumerate(values) for b in values[idx + 1:]]
    difference = list(map(lambda x: abs(x[0] - x[1]), pairs))
    return sum(difference)/len(difference)


In [3]:
def get_results(score_dir, undesired_subgroups):
    path_to_dir = os.path.relpath(score_dir)
    csv_files = [pos_csv for pos_csv in os.listdir(path_to_dir) if pos_csv.endswith('.csv')]
    df = pd.concat([pd.read_csv(os.path.join(path_to_dir, file)) for file in csv_files])
    df = df[~df['subgroup'].isin(undesired_subgroups)]

    scores = []
    groups = df['group'].unique().tolist()
    table = PrettyTable(["Domain", "Model", "Positive", "Neutral", "Negative", "Average", "Toxicity Ratio"])
    # toxicityTable = PrettyTable(["Domain", "Model", "Ratio"])

    for group in groups:
        # group = 'political_ideology'
        data = df[df['group'] == group]
        model_name = data[data['metric'] ==
                        'regard-positive']['model'].values.tolist()
        subgroup = data[data['metric'] ==
                        'regard-positive']['subgroup'].values.tolist()
        labels = [model_name[i] + '\n' + subgroup[i]
                for i in range(len(model_name))]

        model_name = data[data['metric'] ==
                        'regard-positive']['model'].unique().tolist()
        subgroup = data[data['metric'] ==
                        'regard-positive']['subgroup'].unique().tolist()

        positive_regards = np.array(
            data[data['metric'] == 'regard-positive']['score'].values.tolist())
        negative_regards = np.array(
            data[data['metric'] == 'regard-negative']['score'].values.tolist())
        neutral_regards = np.array(
            data[data['metric'] == 'regard-neutral']['score'].values.tolist())
        toxicity = np.array(
            data[data['metric'] == 'toxicity-ratio']['score'].values.tolist())

        n_subgroups = len(subgroup)
        for i in range(len(model_name)):
            start_ind, end_ind = n_subgroups * i, n_subgroups * (i+1)
            positive = positive_regards[start_ind:end_ind]
            negative = negative_regards[start_ind:end_ind]
            neutral = neutral_regards[start_ind:end_ind]
            toxic = toxicity[start_ind:end_ind]

            table.add_row(
                [group, model_name[i], round(avg_diff(positive), 4), round(
                avg_diff(neutral), 4), round(avg_diff(negative), 4), round(np.mean([avg_diff(positive),
                avg_diff(neutral),avg_diff(negative)]), 4), round(avg_diff(toxic), 4)])

            scores.append({'model': model_name[i], 'group': group,  'positive': round(avg_diff(positive), 4),
                        'negative': round(avg_diff(negative), 4), 'neutral': round(avg_diff(neutral), 4), 'toxicity_ratio': round(avg_diff(toxic), 4)})
    
    return table, scores

In [111]:
score_dir = 'results/dexperts_gpt2_med_alpha1/score/'
undesired_subgroups = ['Asian_Americans', 'Hispanic_and_Latino_Americans']

In [4]:
directories = [
    # 'results/new_temperature/gpt2/',
    'results/new_temperature/typical_sampling/gpt2/',
    'results/new_temperature/typical_sampling/gpt2-medium/',
    # 'results/new_temperature/dexperts_gpt2_temp1_alpha2/',
    # 'results/new_temperature/dexperts_gpt2_antionly_temp1_alpha2/',
]

for score_dir in directories:
    score_dir = os.path.join(score_dir, 'score/')
    undesired_subgroups = ['Asian_Americans', 'Hispanic_and_Latino_Americans', 'hinduism', 'buddhism', 'sikhism', 'atheism']
    table, scores = get_results(score_dir, undesired_subgroups)
    print(f"{Fore.LIGHTRED_EX}{score_dir.split('/')[-3].upper()}{Style.RESET_ALL}")
    print(table.get_string(sort_key=operator.itemgetter(1, 0), sortby="Domain"), '\n')

[91mGPT2[0m
+--------------------+-------+----------+---------+----------+---------+----------------+
|       Domain       | Model | Positive | Neutral | Negative | Average | Toxicity Ratio |
+--------------------+-------+----------+---------+----------+---------+----------------+
|       gender       |  gpt2 |  0.0044  |  0.0095 |  0.0106  |  0.0082 |     0.0034     |
|        race        |  gpt2 |  0.0236  |  0.0012 |  0.023   |  0.0159 |     0.001      |
| religious_ideology |  gpt2 |  0.0398  |  0.0433 |  0.1004  |  0.0612 |     0.0605     |
+--------------------+-------+----------+---------+----------+---------+----------------+ 

[91mGPT2-MEDIUM[0m
+--------------------+-------------+----------+---------+----------+---------+----------------+
|       Domain       |    Model    | Positive | Neutral | Negative | Average | Toxicity Ratio |
+--------------------+-------------+----------+---------+----------+---------+----------------+
|       gender       | gpt2-medium |  0.0172 

In [6]:
directories = [
    'results/new_temperature/gpt2-medium/',
    # 'results/new_temperature/dexperts_gpt2_antionly_temp1_alpha05/',
    'results/new_temperature/dexperts_gpt2_med_antionly_temp1_alpha1/',
    'results/new_temperature/dexperts_gpt2_med_antionly_temp1_alpha2/',
]
tables = []

for score_dir in directories:
    score_dir = os.path.join(score_dir, 'score/')
    undesired_subgroups = ['Asian_Americans', 'Hispanic_and_Latino_Americans', 'hinduism', 'buddhism', 'sikhism', 'atheism']
    table, scores = get_results(score_dir, undesired_subgroups)
    tables.append(table)
    print(f"{Fore.LIGHTRED_EX}{score_dir.split('/')[-3].upper()}{Style.RESET_ALL}")
    print(table.get_string(sort_key=operator.itemgetter(1, 0), sortby="Domain"), '\n')

[91mGPT2-MEDIUM[0m
+--------------------+-------------+----------+---------+----------+---------+----------------+
|       Domain       |    Model    | Positive | Neutral | Negative | Average | Toxicity Ratio |
+--------------------+-------------+----------+---------+----------+---------+----------------+
|       gender       | gpt2-medium |  0.0164  |  0.0313 |  0.0114  |  0.0197 |     0.0023     |
|        race        | gpt2-medium |  0.0249  |  0.0056 |  0.031   |  0.0205 |     0.0015     |
| religious_ideology | gpt2-medium |  0.0713  |  0.0565 |  0.114   |  0.0806 |     0.0439     |
+--------------------+-------------+----------+---------+----------+---------+----------------+ 

[91mDEXPERTS_GPT2_MED_ANTIONLY_TEMP1_ALPHA1[0m
+--------------------------------+----------------------+----------+---------+----------+---------+----------------+
|             Domain             |        Model         | Positive | Neutral | Negative | Average | Toxicity Ratio |
+---------------------

In [11]:
tables[3]

Domain,Model,Positive,Neutral,Negative,Average,Toxicity Ratio
gpt2_race,dexperts_gpt2,0.0051,0.0103,0.0208,0.0121,0.0004
gpt2_gender,dexperts_gpt2,0.0173,0.0097,0.0106,0.0125,0.0005
gpt2_religious_ideology,dexperts_gpt2,0.0462,0.0647,0.078,0.063,0.0342


In [17]:
directories = [
    'results/new_temperature/gpt2/',
    'results/new_temperature/dexperts_gpt2_temp1_alpha05/',
    'results/new_temperature/dexperts_gpt2_temp1_alpha1/',
    'results/new_temperature/dexperts_gpt2_temp1_alpha2/',
]
tables = []
for score_dir in directories:
    score_dir = os.path.join(score_dir, 'score/')
    undesired_subgroups = ['Asian_Americans', 'Hispanic_and_Latino_Americans', 'hinduism', 'buddhism', 'sikhism', 'atheism']
    table, scores = get_results(score_dir, undesired_subgroups)
    tables.append(table)
    print(f"{Fore.LIGHTRED_EX}{score_dir.split('/')[-3].upper()}{Style.RESET_ALL}")
    print(table.get_string(sort_key=operator.itemgetter(1, 0), sortby="Domain"), '\n')

[91mGPT2[0m
+--------------------+-------+----------+---------+----------+---------+----------------+
|       Domain       | Model | Positive | Neutral | Negative | Average | Toxicity Ratio |
+--------------------+-------+----------+---------+----------+---------+----------------+
|       gender       |  gpt2 |  0.0032  |   0.01  |  0.0036  |  0.0056 |     0.0019     |
|        race        |  gpt2 |  0.0308  |  0.0074 |  0.0231  |  0.0204 |     0.0013     |
| religious_ideology |  gpt2 |  0.0461  |  0.0561 |  0.1229  |  0.075  |     0.0501     |
+--------------------+-------+----------+---------+----------+---------+----------------+ 

[91mDEXPERTS_GPT2_TEMP1_ALPHA05[0m
+-------------------------+---------------+----------+---------+----------+---------+----------------+
|          Domain         |     Model     | Positive | Neutral | Negative | Average | Toxicity Ratio |
+-------------------------+---------------+----------+---------+----------+---------+----------------+
|       

In [21]:
tables[3]

Domain,Model,Positive,Neutral,Negative,Average,Toxicity Ratio
gpt2_race,dexperts_gpt2,0.0185,0.0044,0.022,0.015,0.0014
gpt2_gender,dexperts_gpt2,0.0047,0.0158,0.012,0.0108,0.0003
gpt2_religious_ideology,dexperts_gpt2,0.0371,0.0386,0.0323,0.036,0.1061


In [4]:
directories = [
    'results/new_temperature/gpt2-medium/',
    'results/new_temperature/dexperts_gpt2_med_antionly_temp1_alpha1/',
    'results/new_temperature/dexperts_gpt2_med_anti_base_temp1_alpha1/',
    'results/new_temperature/dexperts_gpt2_med_anti_base_temp1_alpha2/',
]
tables = []
for score_dir in directories:
    score_dir = os.path.join(score_dir, 'score/')
    undesired_subgroups = ['Asian_Americans', 'Hispanic_and_Latino_Americans', 'hinduism', 'buddhism', 'sikhism', 'atheism']
    table, scores = get_results(score_dir, undesired_subgroups)
    tables.append(table)
    print(f"{Fore.LIGHTRED_EX}{score_dir.split('/')[-3].upper()}{Style.RESET_ALL}")
    print(table.get_string(sort_key=operator.itemgetter(1, 0), sortby="Domain"), '\n')

[91mGPT2-MEDIUM[0m
+--------------------+-------------+----------+---------+----------+---------+----------------+
|       Domain       |    Model    | Positive | Neutral | Negative | Average | Toxicity Ratio |
+--------------------+-------------+----------+---------+----------+---------+----------------+
|       gender       | gpt2-medium |  0.0164  |  0.0313 |  0.0114  |  0.0197 |     0.0023     |
|        race        | gpt2-medium |  0.0249  |  0.0056 |  0.031   |  0.0205 |     0.0015     |
| religious_ideology | gpt2-medium |  0.0713  |  0.0565 |  0.114   |  0.0806 |     0.0439     |
+--------------------+-------------+----------+---------+----------+---------+----------------+ 

[91mDEXPERTS_GPT2_MED_ANTIONLY_TEMP1_ALPHA1[0m
+--------------------------------+----------------------+----------+---------+----------+---------+----------------+
|             Domain             |        Model         | Positive | Neutral | Negative | Average | Toxicity Ratio |
+---------------------

In [5]:
s = "()[]{}"

In [7]:
s.find(')')

1

In [8]:
len(s)

6

In [10]:
s[3:6]

']{}'

In [32]:

def isValid(s):
    # Create a pair of opening and closing parrenthesis...
    opcl = {
            '(': ')',
            '[': ']',
            '{': '}'
    }
    # Create stack data structure...
    stack = []
    # Traverse each charater in input string...
    for el in s:
        # If open parentheses are present, append it to stack...
        if el in '([{':
            stack.append(el)
        elif len(stack) == 0 or el != opcl[stack.pop()]:
            return False
    return len(stack) == 0

In [31]:
isValid("(((([{()}]))))")

True

In [29]:
opcl = dict(('()', '[]', '{}'))

In [21]:
dict(('()', '[]', '{}'))

{'(': ')', '[': ']', '{': '}'}

In [22]:
stack = [1, 2, 3]
stack.pop()

3

In [23]:
stack.pop()

2

In [24]:
stack

[1]

In [35]:
stack.append('{')

In [38]:
stack

[]

In [37]:
opcl[stack.pop()]

'}'

In [39]:
[][::-1]

[]

In [40]:
ListNode

NameError: name 'ListNode' is not defined