# SBIC

In [1]:
import json, os, re
import pandas as pd
import numpy as np
from glob import glob

In [2]:
TASK = "css_data/sbic"
baseline = f"{TASK}/T5-finetune-sbic_predict.json"
with open(baseline, "r") as infile:
    base_json = json.load(infile)
sbic = pd.read_csv('css_data/sbic/sbic.csv')

In [3]:
sbic_test = []
for idx, lbl in enumerate(base_json['labels']):
    pred = base_json['predictions'][idx]
    pred = pred.split(", ")[0].replace("[", "").replace('"', "")
    consider = sbic[sbic["targetStereotype"]==lbl].copy()
    consider['Generated'] = pred
    consider['Model'] = "baseline"
    consider['Task'] = "sbic"
    if len(consider)>1:
        print("\tnon-unique", lbl)
    if len(consider)==1:
        sbic_test.append(consider)
    else:
        print("no match", lbl)
    #sbic_test.append(None)

	non-unique ["trivializes harm to victims"]
no match ["trivializes harm to victims"]
	non-unique ["trivializes harm to victims"]
no match ["trivializes harm to victims"]
	non-unique ["trivializes harm to victims"]
no match ["trivializes harm to victims"]
	non-unique ["trivializes harm to victims."]
no match ["trivializes harm to victims."]
	non-unique ["trivializes harm to victims."]
no match ["trivializes harm to victims."]
	non-unique ["are marginalized for a joke"]
no match ["are marginalized for a joke"]
	non-unique ["trivializes harm to victims"]
no match ["trivializes harm to victims"]
	non-unique ["trivializes harm to victims"]
no match ["trivializes harm to victims"]
	non-unique ["trivializes harm to victims"]
no match ["trivializes harm to victims"]
	non-unique ["trivializes harm to victims"]
no match ["trivializes harm to victims"]
	non-unique ["trivializes harm to victims"]
no match ["trivializes harm to victims"]
	non-unique ["trivializes harm to victims."]
no match ["trivi

In [4]:
hit_df = pd.concat(sbic_test)[['post', 'targetStereotype', 'targetMinority', 'Generated', 'Model', 'Task']].sample(frac=1, random_state=7).copy()
hit_df.to_csv('hit/input/sbic/sbic_baseline.csv', index=False)

In [5]:
for fn in glob("css_data/sbic/answer*"):
    model = "-".join(fn.split("-")[1:])
    print(model)
    df = pd.read_csv(fn, sep='\t', names=['idx', 'targetStereotype', 'Generated'])
    
    sbic_test = []
    for _, row in df.iterrows():
        pred = row["Generated"].replace("&", "")
        lbl = row["targetStereotype"]
        consider = sbic[sbic["targetStereotype"]==lbl].copy()
        consider['Generated'] = pred
        consider['Model'] = model
        consider['Task'] = "sbic"
        if len(consider)==1:
            sbic_test.append(consider)
        else:
            print("\tno match", lbl)
    hit_df = pd.concat(sbic_test)[['post', 'targetStereotype', 'targetMinority', 'Generated', 'Model', 'Task']].sample(frac=1, random_state=7).copy()
    hit_df.to_csv(f'hit/input/sbic/sbic_{model}.csv', index=False)

text-ada-001
	no match ["trivializes harm to victims"]
	no match ["trivializes harm to victims"]
	no match ["trivializes harm to victims"]
	no match ["trivializes harm to victims."]
	no match ["trivializes harm to victims."]
	no match ["are marginalized for a joke"]
	no match ["trivializes harm to victims"]
	no match ["trivializes harm to victims"]
	no match ["trivializes harm to victims"]
	no match ["trivializes harm to victims"]
	no match ["trivializes harm to victims"]
	no match ["trivializes harm to victims."]
text-babbage-001
	no match ["trivializes harm to victims"]
	no match ["trivializes harm to victims"]
	no match ["trivializes harm to victims"]
	no match ["trivializes harm to victims."]
	no match ["trivializes harm to victims."]
	no match ["are marginalized for a joke"]
	no match ["trivializes harm to victims"]
	no match ["trivializes harm to victims"]
	no match ["trivializes harm to victims"]
	no match ["trivializes harm to victims"]
	no match ["trivializes harm to victims"]

# MRF

In [1]:
import json, os, re
import pandas as pd
import numpy as np
from glob import glob

In [2]:
TASK = "css_data/mrf"
baseline = f"{TASK}/T5-finetune-mrf-explain.json"
with open(baseline, "r") as infile:
    base_json = json.load(infile)
mrf = pd.read_csv('css_data/mrf/mrf.csv')

In [3]:
mrf_test = []
for idx, lbl in enumerate(base_json['labels']):
    pred = base_json['predictions'][idx]
    pred = pred.split(", ")[0].replace("[", "").replace('"', "")
    consider = mrf[mrf["writer_intent"]==lbl].copy()
    consider['Generated'] = pred
    consider['Model'] = "baseline"
    consider['Task'] = "mrf"
    consider['misinfo'] = consider['gold_label']
    if len(consider)>1:
        print("\tnon-unique", lbl)
    if len(consider)==1:
        mrf_test.append(consider)
    else:
        print("no match", lbl)
    #sbic_test.append(None)

	non-unique ['unknown intent']
no match ['unknown intent']
	non-unique ['unknown intent']
no match ['unknown intent']
	non-unique ['unknown intent']
no match ['unknown intent']
	non-unique ['unknown intent']
no match ['unknown intent']
	non-unique ['unknown intent']
no match ['unknown intent']
	non-unique ['unknown intent']
no match ['unknown intent']
	non-unique ['unknown intent']
no match ['unknown intent']
	non-unique ['unknown intent']
no match ['unknown intent']
	non-unique ['unknown intent']
no match ['unknown intent']
	non-unique ['unknown intent']
no match ['unknown intent']
	non-unique ['unknown intent']
no match ['unknown intent']
	non-unique ['unknown intent']
no match ['unknown intent']
	non-unique ['unknown intent']
no match ['unknown intent']
	non-unique ['unknown intent']
no match ['unknown intent']
	non-unique ['covid-19 is not real']
no match ['covid-19 is not real']
	non-unique ['unknown intent']
no match ['unknown intent']
	non-unique ['unknown intent']
no match ['un

In [4]:
hit_df = pd.concat(mrf_test)[['headline', 'writer_intent', 'misinfo', 'Generated', 'Model', 'Task']].sample(frac=1, random_state=7).copy()
hit_df.to_csv('hit/input/mrf/mrf_baseline.csv', index=False)

In [5]:
for fn in glob("css_data/mrf/answer-explanation*"):
    model = "-".join(fn.split("-")[2:])
    print(model)
    with open(f"css_data/mrf/prompts.json-explanation-{model}", "r") as f:
        prompts = json.load(f)
        
    df = pd.read_csv(fn, sep='\t', names=['idx', 'writer_intent', 'Generated'], error_bad_lines=False)

    mrf_test = []
    for _, row in df.iterrows():
        if type(row["Generated"])==str:
            pred = row["Generated"].replace("&", "")
            headline = ""
            if str(row["idx"]) in prompts:
                headline = prompts[str(row["idx"])].split('\n')[0].strip()
            
            lbl = row["writer_intent"]
            consider = mrf[mrf["headline"]==headline].copy()
            
            if (not len(consider)) or (consider["writer_intent"].iloc[0]!=lbl):
                consider = mrf[mrf["writer_intent"]==lbl].copy()
            
            consider['Generated'] = pred
            consider['Model'] = model
            consider['Task'] = "mrf"
            consider['misinfo'] = consider['gold_label']
            
            if len(consider)==1:
                if consider["writer_intent"].iloc[0]==lbl:
                    mrf_test.append(consider)
                else:
                    print("Mismatch", model, row["idx"])
                    break
            elif len(consider)>1:
                print("\tno match", headline, '\t', lbl)
            else:
                pass
    if len(mrf_test):
        hit_df = pd.concat(mrf_test)[['headline', 'writer_intent', 'misinfo', 'Generated', 'Model', 'Task']].sample(frac=1, random_state=7).copy()
        hit_df.to_csv(f'hit/input/mrf/mrf_{model}.csv', index=False)
    
    

flan-t5-base




  df = pd.read_csv(fn, sep='\t', names=['idx', 'writer_intent', 'Generated'], error_bad_lines=False)


chatgpt




  df = pd.read_csv(fn, sep='\t', names=['idx', 'writer_intent', 'Generated'], error_bad_lines=False)
Skipping line 21: expected 3 fields, saw 4
Skipping line 22: expected 3 fields, saw 4
Skipping line 54: expected 3 fields, saw 4
Skipping line 63: expected 3 fields, saw 4
Skipping line 64: expected 3 fields, saw 4
Skipping line 66: expected 3 fields, saw 5
Skipping line 107: expected 3 fields, saw 4
Skipping line 110: expected 3 fields, saw 5
Skipping line 155: expected 3 fields, saw 4
Skipping line 168: expected 3 fields, saw 4
Skipping line 172: expected 3 fields, saw 4
Skipping line 188: expected 3 fields, saw 4
Skipping line 193: expected 3 fields, saw 4
Skipping line 202: expected 3 fields, saw 4
Skipping line 218: expected 3 fields, saw 4
Skipping line 219: expected 3 fields, saw 5
Skipping line 223: expected 3 fields, saw 4
Skipping line 226: expected 3 fields, saw 4
Skipping line 228: expected 3 fields, saw 4
Skipping line 232: expected 3 fields, saw 4
Skipping line 235: expe

text-ada-001
	no match The coronavirus vaccine will cause other more serious diseases to emerge, said Anthony Fauci. 	 ['unknown intent']
	no match Otters Show How Predators Can Blunt Climate Damage 	 ['unknown intent']
	no match New Evidence That the Ancient Climate Was Warmer than Today's  'Roman Warming was the warmest in the last 2,000 years' 	 ['unknown intent']
	no match Punishing Companies For CO2 Emissions Won't Affect Temps, Climate 	 ['unknown intent']




  df = pd.read_csv(fn, sep='\t', names=['idx', 'writer_intent', 'Generated'], error_bad_lines=False)


	no match Climate report warns of rising risk as U.N. pushes nations to take action 	 ['unknown intent']
	no match The end of total quarantine: the Cabinet of Ministers of Ukraine has just made a decision. 	 ['unknown intent']
	no match 'The most significant climate legislation ever': How stimulus bill tackles warming planet 	 ['unknown intent']
	no match No 10 and Treasury clash over spending on environmental agenda 	 ['unknown intent']
	no match The Drilldown: Opposition disappointed with government's climate legislation 	 ['unknown intent']
	no match Councils reporting thousands of 'climate-related' incidents, including flooding 	 ['unknown intent']
	no match UK to make climate risk reports mandatory for large companies 	 ['unknown intent']
	no match French schooner Tara sets sail on scientific mission to study climate change 	 ['unknown intent']
	no match How climate change could benefit Russia 	 ['covid-19 is not real']
	no match Monetary Expansion Yielding Diminishing Returns And



  df = pd.read_csv(fn, sep='\t', names=['idx', 'writer_intent', 'Generated'], error_bad_lines=False)


flan-ul2




  df = pd.read_csv(fn, sep='\t', names=['idx', 'writer_intent', 'Generated'], error_bad_lines=False)


text-babbage-001




  df = pd.read_csv(fn, sep='\t', names=['idx', 'writer_intent', 'Generated'], error_bad_lines=False)


flan-t5-small




  df = pd.read_csv(fn, sep='\t', names=['idx', 'writer_intent', 'Generated'], error_bad_lines=False)


text-davinci-001




  df = pd.read_csv(fn, sep='\t', names=['idx', 'writer_intent', 'Generated'], error_bad_lines=False)


flan-t5-xl




  df = pd.read_csv(fn, sep='\t', names=['idx', 'writer_intent', 'Generated'], error_bad_lines=False)


flan-t5-large




  df = pd.read_csv(fn, sep='\t', names=['idx', 'writer_intent', 'Generated'], error_bad_lines=False)


text-davinci-003




  df = pd.read_csv(fn, sep='\t', names=['idx', 'writer_intent', 'Generated'], error_bad_lines=False)


flan-t5-xxl




  df = pd.read_csv(fn, sep='\t', names=['idx', 'writer_intent', 'Generated'], error_bad_lines=False)


text-curie-001




  df = pd.read_csv(fn, sep='\t', names=['idx', 'writer_intent', 'Generated'], error_bad_lines=False)


In [None]:
q = "'Buddha would be green': Dalai Lama calls for urgent climate action"
mrf[mrf['headline']==q]

In [7]:
for _, row in df.iterrows():
    pred = row["Generated"].replace("&", "")
    lbl = prompts[str(row["idx"])].split('\n')[0].strip()
    break

In [12]:
pred, lbl

('Some people are reporting delays in getting the flu vaccine in Texas',
 'Texas sees delays amid push for faster vaccine rollout')

# FLUTE

In [1]:
import json, os, re
import pandas as pd
import numpy as np
from glob import glob

In [2]:
def get_premise_hypothesis(txt):
    premises = re.findall(r"premise: (.*?)(?:\n|$|hypothesis)", txt)
    hypotheses = re.findall(r"hypothesis: (.*?)(?:\n|$|premise)", txt)
    premise, hypothesis = "", ""
    if len(premises):
        premise = premises[0]
    if len(hypotheses):
        hypothesis = hypotheses[0]
    return premise.strip(), hypothesis.strip()

In [3]:
TASK = "css_data/flute"
baseline = f"{TASK}/T5-finetune-flute_predict.json"
with open(baseline, "r") as infile:
    base_json = json.load(infile)
with open("css_data/flute/flute-explanation.json", "r") as f:
    flute = pd.DataFrame.from_dict(json.load(f))

In [4]:
premises = []
hypotheses = []
for c in flute.context.values:
    p, h = get_premise_hypothesis(c)
    premises.append(p)
    hypotheses.append(h)
flute["premise"] = premises
flute["hypothesis"] = hypotheses

In [5]:
flute_test = []
for idx, lbl in enumerate(base_json['labels']):
    pred = base_json['predictions'][idx]
    pred_label, pred_expl = re.split(r"[&]+", pred)
    lbl_label, lbl_expl = re.split(r"[&]+", lbl)
    
    consider = flute[flute["additional_labels"]==lbl_expl].copy()
    consider['Generated'] = pred_expl
    consider['Generated_Label'] = pred_label
    consider['Model'] = "baseline"
    consider['Task'] = "flute"
    if len(consider)>0:
        flute_test.append(consider.iloc[0:1])
    else:
        print("no match", lbl)
    #sbic_test.append(None)

In [6]:
hit_df = pd.concat(flute_test)[['premise', 'hypothesis', 'labels', 'additional_labels', 'Generated', 'Generated_Label', 'Model', 'Task']].sample(frac=1, random_state=7).copy()
hit_df.to_csv('hit/input/flute/flute_baseline.csv', index=False)

In [7]:
for fn in glob("css_data/flute/answer-explain*"):
    model = "-".join(fn.split("-")[2:])
    print(model)
    with open(f"css_data/flute/prompts.json-explanation-{model}", "r") as f:
        prompts = json.load(f)
        
    df = pd.read_csv(fn, sep='\t', names=['idx', 'writer_intent', 'Generated'], error_bad_lines=False)

    flute_test = []
    for _, row in df.iterrows():
        if type(row["Generated"])==str:
            pred = row["Generated"].replace("&", "")
            if str(row["idx"]) in prompts:
                prompt = prompts[str(row["idx"])]
                p, h = get_premise_hypothesis(prompt)
                consider = flute[(flute["premise"]==p) & (flute["hypothesis"]==h)].copy() 

                consider['Generated'] = pred
                consider['Model'] = model
                consider['Task'] = "flute"

                if len(consider)>0:
                    flute_test.append(consider.iloc[0:1])
                else:
                    print("\tno match", p, '\t', h)
            else:
                print("Misaligned", model)
                break
    if len(flute_test):
        hit_df = pd.concat(flute_test)[['premise', 'hypothesis', 'labels', 'additional_labels', 'Generated', 'Model', 'Task']].sample(frac=1, random_state=7).copy()
        hit_df.to_csv(f'hit/input/flute/flute_{model}.csv', index=False)

flan-t5-xl




  df = pd.read_csv(fn, sep='\t', names=['idx', 'writer_intent', 'Generated'], error_bad_lines=False)


flan-t5-small




  df = pd.read_csv(fn, sep='\t', names=['idx', 'writer_intent', 'Generated'], error_bad_lines=False)


flan-t5-base




  df = pd.read_csv(fn, sep='\t', names=['idx', 'writer_intent', 'Generated'], error_bad_lines=False)


text-curie-001




  df = pd.read_csv(fn, sep='\t', names=['idx', 'writer_intent', 'Generated'], error_bad_lines=False)


text-davinci-003




  df = pd.read_csv(fn, sep='\t', names=['idx', 'writer_intent', 'Generated'], error_bad_lines=False)


chatgpt
flan-t5-large




  df = pd.read_csv(fn, sep='\t', names=['idx', 'writer_intent', 'Generated'], error_bad_lines=False)


  df = pd.read_csv(fn, sep='\t', names=['idx', 'writer_intent', 'Generated'], error_bad_lines=False)


text-ada-001




  df = pd.read_csv(fn, sep='\t', names=['idx', 'writer_intent', 'Generated'], error_bad_lines=False)


text-babbage-001




  df = pd.read_csv(fn, sep='\t', names=['idx', 'writer_intent', 'Generated'], error_bad_lines=False)


text-davinci-002




  df = pd.read_csv(fn, sep='\t', names=['idx', 'writer_intent', 'Generated'], error_bad_lines=False)


text-davinci-001




  df = pd.read_csv(fn, sep='\t', names=['idx', 'writer_intent', 'Generated'], error_bad_lines=False)


In [4]:
with open('css_data/flute/prompts.json-explanation-text-davinci-003', 'r') as f: 
    dv3 = json.load(f)
df = pd.read_csv("hit/input/flute/flute_text-davinci-003.csv")
#df["Gen_DV3"] = df["Generated"]
df["Generated"] = [None for x in df.Generated.values]
df["Model"] = "chatgpt"
rerun = pd.read_csv('css_data/flute/answer-explanation-chatgpt-rerun', sep='\t', names=["idx", "Generated"])
for _, row in rerun.iterrows():
    prompt = dv3[str(row["idx"])]
    p, h = get_premise_hypothesis(prompt)
    idx = df[(df["premise"]==p) & (df["hypothesis"]==h)]
    if len(idx):
        df.loc[idx.index[0]]["Generated"] = row["Generated"]
df[~df["Generated"].isna()].to_csv("hit/input/flute/flute_chatgpt.csv", index=False)

## Positive Reframing

In [6]:
import json, os, re
import pandas as pd
import numpy as np
from glob import glob

In [2]:
prf = pd.read_csv('css_data/positive_reframing/positive_reframing.csv')

In [3]:
prf

Unnamed: 0,original_text,reframed_text,strategy,original_with_label,split
0,Always stressing and thinking about loads of t...,"Loads of things on my mind, I need to make a l...","['growth', 'neutralizing']",Always stressing and thinking about loads of t...,test
1,The list of things I have to do today is forev...,Today I have a lot to do. Time for productivit...,"['growth', 'optimism']",The list of things I have to do today is forev...,test
2,If I don't have a mental breakdown before the ...,I'm going to look after my mental health over ...,"['growth', 'neutralizing', 'optimism']",If I don't have a mental breakdown before the ...,test
3,I just caught myself taking extra deep breaths...,Trying to keep breathing. Extra deep,['neutralizing'],I just caught myself taking extra deep breaths...,test
4,Worst night ever. Just one bad thing after ano...,I had a tough night but it's over and the holi...,['impermanence'],Worst night ever. Just one bad thing after ano...,test
...,...,...,...,...,...
8344,Anybody would think I'm 13 again and going thr...,People would think I am going through puberty ...,"['neutralizing', 'optimism']",Anybody would think I'm 13 again and going thr...,train
8345,How come it seems the spring semester is harde...,Even though spring semester seems harder than ...,"['neutralizing', 'optimism']",How come it seems the spring semester is harde...,train
8346,The snow and ice is bringing out the crazy in ...,The snow and ice calm me. I don't see any bad ...,['neutralizing'],The snow and ice is bringing out the crazy in ...,train
8347,The amount of stuff I need to sort out before ...,I can't wait to finish settling my affairs and...,['impermanence'],The amount of stuff I need to sort out before ...,train


In [4]:
for fn in glob("css_data/positive_reframing/answer-*"):
    model = "-".join(fn.split("-")[1:])
    print(model)
    with open(f"css_data/positive_reframing/prompts.json-{model}", "r") as f:
        prompts = json.load(f)
        
    df = pd.read_csv(fn, sep='\t', names=['idx', 'reframed_text', 'Generated'], error_bad_lines=False)

    reframing_test = []
    for _, row in df.iterrows():
        
        if type(row["Generated"])==str:
            pred = row["Generated"].replace("&", "")
            
            lbl = row["reframed_text"]
            consider = prf[prf["reframed_text"]==lbl].copy()
            
            consider['Generated'] = pred
            consider['Model'] = model
            consider['Task'] = "positive_reframing"
            
            if len(consider)>0:
                reframing_test.append(consider.iloc[0:1])
            else:
                print("\tno match", lbl)
        else:
            print("Misaligned", row)
    if len(reframing_test):
        hit_df = pd.concat(reframing_test)[['original_text', 'reframed_text', "strategy", 'Generated', 'Model', 'Task']].sample(frac=1, random_state=7).copy()
        hit_df.to_csv(f'hit/input/positive_reframing/positive_reframing_{model}.csv', index=False)

text-ada-001
Misaligned idx                                                             25
reframed_text    Only halfway through the work day and I've com...
Generated                                                      NaN
Name: 25, dtype: object
Misaligned idx                                                             27
reframed_text    Essential workers are finding it hard at the m...
Generated                                                      NaN
Name: 27, dtype: object
Misaligned idx                                                             49
reframed_text    I'm so thankful to be offered a promotion at m...
Generated                                                      NaN
Name: 49, dtype: object
Misaligned idx                                                             25
reframed_text    Only halfway through the work day and I've com...
Generated                                                      NaN
Name: 63, dtype: object
Misaligned idx                             



  df = pd.read_csv(fn, sep='\t', names=['idx', 'reframed_text', 'Generated'], error_bad_lines=False)


Misaligned idx                                                             25
reframed_text    Only halfway through the work day and I've com...
Generated                                                      NaN
Name: 159, dtype: object
Misaligned idx                                                             27
reframed_text    Essential workers are finding it hard at the m...
Generated                                                      NaN
Name: 160, dtype: object
Misaligned idx                                                             49
reframed_text    I'm so thankful to be offered a promotion at m...
Generated                                                      NaN
Name: 161, dtype: object
Misaligned idx                                                             75
reframed_text    Going to treat myself and have a relaxing time...
Generated                                                      NaN
Name: 162, dtype: object
Misaligned idx                                      



  df = pd.read_csv(fn, sep='\t', names=['idx', 'reframed_text', 'Generated'], error_bad_lines=False)


  df = pd.read_csv(fn, sep='\t', names=['idx', 'reframed_text', 'Generated'], error_bad_lines=False)


  df = pd.read_csv(fn, sep='\t', names=['idx', 'reframed_text', 'Generated'], error_bad_lines=False)


flan-t5-small




  df = pd.read_csv(fn, sep='\t', names=['idx', 'reframed_text', 'Generated'], error_bad_lines=False)


flan-t5-xl




  df = pd.read_csv(fn, sep='\t', names=['idx', 'reframed_text', 'Generated'], error_bad_lines=False)


flan-t5-xxl




  df = pd.read_csv(fn, sep='\t', names=['idx', 'reframed_text', 'Generated'], error_bad_lines=False)


text-curie-001




  df = pd.read_csv(fn, sep='\t', names=['idx', 'reframed_text', 'Generated'], error_bad_lines=False)


text-davinci-002




  df = pd.read_csv(fn, sep='\t', names=['idx', 'reframed_text', 'Generated'], error_bad_lines=False)


flan-ul2




  df = pd.read_csv(fn, sep='\t', names=['idx', 'reframed_text', 'Generated'], error_bad_lines=False)


flan-t5-base




  df = pd.read_csv(fn, sep='\t', names=['idx', 'reframed_text', 'Generated'], error_bad_lines=False)


chatgpt




  df = pd.read_csv(fn, sep='\t', names=['idx', 'reframed_text', 'Generated'], error_bad_lines=False)


text-davinci-001




  df = pd.read_csv(fn, sep='\t', names=['idx', 'reframed_text', 'Generated'], error_bad_lines=False)


## Combine

In [1]:
import json, os, re
import pandas as pd
import numpy as np
from glob import glob
from ast import literal_eval

In [2]:
# def build_comparison_rows(df, txt="post", txt2=None, gold="targetStereotype", secondary="targetMinority", 
#                           n=4, literal=True, random_state=7):
    
#     gold_df = {}
#     g = df.iloc[0][gold]
    
#     if literal:
#         g = literal_eval(g)
#         for i, t in enumerate(g):
#             gold_df[i] = {
#                 txt: df.iloc[0][txt],
#                 secondary: df.iloc[0][secondary],
#                 "Generated": t,
#                 "Model": "human",
#                 "Task": df.iloc[0]["Task"]
#             }
#             if txt2:
#                 gold_df[i][txt2] = df.iloc[0][txt2]
#     else:
#         gold_df = {0: {
#                 txt: df.iloc[0][txt],
#                 secondary: df.iloc[0][secondary],
#                 "Generated": g,
#                 "Model": "human",
#                 "Task": df.iloc[0]["Task"]   
#         }}
#         if txt2:
#             gold_df[0][txt2] = df.iloc[0][txt2]
#     gold_df = pd.DataFrame().from_dict(gold_df).T
#     comb = pd.concat([gold_df, df])
    
#     rand = comb.sample(frac=1, random_state=random_state)
#     rows = {}
#     for i in range(0, len(rand)-4, n):
#         row = {}
#         sub = rand.iloc[i:i+4]
#         j = 1
#         for _, r in sub.iterrows():
#             row[txt] = r[txt]
#             if txt2:
#                 row[txt2] = r[txt2]
#             row[secondary] = r[secondary]
#             row[f"Generated_{j}"] = r["Generated"]
#             row[f"Model_{j}"] = r["Model"]
#             row["Task"] = r["Task"]
#             j+=1
#         row[gold] = df.iloc[0][gold]
#         rows[i] = row
        
#     return pd.DataFrame().from_dict(rows).T

In [2]:
def build_comparison_rows(df, txt="post", txt2=None, gold="targetStereotype", secondary="targetMinority", 
                          n=4, literal=True, random_state=7,
                          MODELSET={"human", "baseline", "text-davinci-003", "chatgpt"}
                         ):
    
    gold_df = {}
    g = df.iloc[0][gold]
    
    if literal:
        g = literal_eval(g)
        for i, t in enumerate(g):
            gold_df[i] = {
                txt: df.iloc[0][txt],
                secondary: df.iloc[0][secondary],
                "Generated": t,
                "Model": "human",
                "Task": df.iloc[0]["Task"]
            }
            if txt2:
                gold_df[i][txt2] = df.iloc[0][txt2]
            break
    else:
        gold_df = {0: {
                txt: df.iloc[0][txt],
                secondary: df.iloc[0][secondary],
                "Generated": g,
                "Model": "human",
                "Task": df.iloc[0]["Task"]   
        }}
        if txt2:
            gold_df[0][txt2] = df.iloc[0][txt2]
    gold_df = pd.DataFrame().from_dict(gold_df).T
    comb = pd.concat([gold_df, df])
    
    #print(set(comb.Model.values))
    #comb_alt = comb[[model in {"baseline", "flan-ul2", "text-davinci-003", "chatgpt"} for model in comb.Model.values]]
    comb = comb[[model in MODELSET for model in comb.Model.values]]
    
    
    rand = comb.sample(frac=1)
    row = {}
    for j in range(0, n):
        try:
            r = rand.iloc[j]
        except:
            print(j, "out of position")
            #continue
            return pd.DataFrame()
        
        row[txt] = r[txt]
        if txt2:
            row[txt2] = r[txt2]
        row[secondary] = r[secondary]
        row[f"Generated_{j+1}"] = r["Generated"]
        row[f"Model_{j+1}"] = r["Model"]
        row["Task"] = r["Task"]
        row[gold] = df.iloc[0][gold]
    
    return pd.DataFrame().from_dict({0: row}).T

In [7]:
TASK = 'sbic'
txt="post"
gold="targetStereotype"
secondary="targetMinority"
n=4
literal=True
random_state=7

comb = []
for fn in glob(f"hit/input/{TASK}/*.csv"):
    comb.append(pd.read_csv(fn))
df = pd.concat(comb)

comparison_rows = []
for lbl in set(df[txt]):
    consider = df[df[txt]==lbl].copy()
    comparison_rows.append(build_comparison_rows(consider,
                                                 txt=txt,
                                                 gold=gold,
                                                 secondary=secondary,
                                                 n=n,
                                                 literal=literal,
                                                 random_state=random_state,
                                                 MODELSET={"human", "text-curie-001", "text-babbage-001", "text-ada-001"}
                                                ))
    
comparison_df = pd.concat(comparison_rows)
comparison_df.sample(n=66, random_state=random_state).to_csv(f"hit/input/comparison_balanced_{TASK}_ada_babbage_curie.csv", index=False)

In [8]:
comparison_df

Unnamed: 0,Generated_1,Generated_2,Generated_3,Generated_4,Model_1,Model_2,Model_3,Model_4,Task,post,targetMinority,targetStereotype
0,The stereotype that this sentence communicates...,The stereotype communicated by this sentence i...,Pizza is a type of food that is made from doug...,jewish are boring people,text-ada-001,text-curie-001,text-babbage-001,human,sbic,What's the difference between pizza and Jews? ...,"[""jewish folks""]","[""jewish are boring people"", ""jewish people ar..."
0,The group is going to die,The stereotype communicated by this sentence i...,indians are dirty,The train is heading towards the individual,text-babbage-001,text-curie-001,human,text-ada-001,sbic,Aan indian was taking a dump and died. Strange...,"[""India""]","[""indians are dirty"", ""indians have bad hygiene""]"
0,makes light of holocaust,The difference between a Boy Scout and a Jew i...,A Boy Scout comes back from backpacking,A Jew is someone who is from a certain religio...,human,text-ada-001,text-curie-001,text-babbage-001,sbic,What’s the difference between a Jew and a Boy ...,"[""jewish folks""]","[""makes light of holocaust"", ""refers to holoca..."
0,The stereotype communicated by this sentence i...,The sentence communicates a stereotype that Mu...,muslim folks are subhuman/objects or animals,Muslims are typically seen as being religious ...,text-curie-001,text-ada-001,human,text-babbage-001,sbic,How do you dissect a Muslim? Pretty sure the b...,"[""muslim folks""]","[""muslim folks are subhuman/objects or animals..."
0,are not equal,The stereotype that this sentence communicates...,This sentence communicates that the people who...,The stereotype communicated by this sentence i...,human,text-babbage-001,text-ada-001,text-curie-001,sbic,"I think its wrong to refer to a negro as ""boy""...","[""black folks""]","[""are not equal"", ""are not human"", ""blacks are..."
...,...,...,...,...,...,...,...,...,...,...,...,...
0,A black man left hanging is a lynching victim,The stereotype communicated by this sentence i...,lynching victim,allusion to lynching,text-ada-001,text-curie-001,text-babbage-001,human,sbic,What do you call a black man left hanging? a l...,"[""black folks"", ""murder victims""]","[""allusion to lynching"", ""black folks are infe..."
0,The stereotype communicated by this sentence i...,The stereotype that this sentence communicates...,The stereotype that this sentence communicates...,african folks don't deserve to eat,text-curie-001,text-babbage-001,text-ada-001,human,sbic,we should not help poor african children\nthey...,"[""African folks""]","[""african folks don't deserve to eat"", ""africa..."
0,"The stereotype communicated by ""nigger mortis""...",black people are criminals,The stereotype that black people die from natu...,Nigger mortis is the process of getting into a...,text-curie-001,human,text-babbage-001,text-ada-001,sbic,What happens when a black person dies? Nigger ...,"[""black folks""]","[""black people are criminals"", ""black people a..."
0,refers to women as bitches,The stereotype that this sentence communicates...,The stereotype communicated by this sentence i...,The stereotype that this sentence communicates...,human,text-ada-001,text-curie-001,text-babbage-001,sbic,Bitches really be mad when u can just stop spe...,"[""women""]","[""refers to women as bitches"", ""women are clin..."


In [11]:
TASK = 'mrf'
txt="headline"
gold="writer_intent"
secondary="misinfo"
n=4
literal=True
random_state=7

comb = []
for fn in glob(f"hit/input/{TASK}/*.csv"):
    comb.append(pd.read_csv(fn))
df = pd.concat(comb)

comparison_rows = []
for lbl in set(df[txt]):
    consider = df[df[txt]==lbl].copy()
    cr = build_comparison_rows(consider,
                                 txt=txt,
                                 gold=gold,
                                 secondary=secondary,
                                 n=n,
                                 literal=literal,
                                 random_state=random_state,
                                 MODELSET={"human", "baseline", "text-davinci-002", "text-davinci-001"}
                                )
    if len(cr):
        comparison_rows.append(cr)
    
comparison_df = pd.concat(comparison_rows)
comparison_df.sample(n=66, random_state=random_state).to_csv(f"hit/input/comparison_balanced_{TASK}_davinci_001_002.csv", index=False)

1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
3 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
3 out of position
1 out of position
1 out of position
1 out of position
3 out of position
1 out of position
1 out of position
1 out of position
1 out of position
3 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
3 out of position
1 out of p

In [12]:
comparison_df

Unnamed: 0,Generated_1,Generated_2,Generated_3,Generated_4,Model_1,Model_2,Model_3,Model_4,Task,headline,misinfo,writer_intent
0,'climate change poses the most profound long-t...,Climate change is a more pressing issue than t...,Some masks are better than others,ceo of uk's national health service says covid...,baseline,text-davinci-002,text-davinci-001,human,mrf,CEO Of UK's National Health Service: COVID-19 ...,misinfo,"[""ceo of uk's national health service says cov..."
0,The implied message of the headline is that th...,The UK minister's first visit to Russia since ...,'nassau and climate change on agenda in first ...,government officials are not on the same page,text-davinci-001,text-davinci-002,baseline,human,mrf,Navalny and climate on agenda in first Russia ...,real,['government officials are not on the same pag...
0,International shipping is harming the environment,Some masks are better than others for climate,consumerism is causing climate change,'international shipping is killing the climate',text-davinci-002,text-davinci-001,human,baseline,mrf,International shipping is killing the climate,real,['consumerism is causing climate change']
0,vaccine was deadly during trial,The vaccine is not safe,'vaccines are dangerous',Some masks are better than others,human,text-davinci-002,baseline,text-davinci-001,mrf,A doctor who was volunteering in clinical test...,misinfo,"['vaccine was deadly during trial', 'people sh..."
0,Some masks are better than others,The only way to end Australia's climate crimin...,only global tariffs will help australia on cli...,'the world is going to have to pay more',text-davinci-001,text-davinci-002,human,baseline,mrf,Global tariffs the only way to end Australia's...,real,['only global tariffs will help australia on c...
...,...,...,...,...,...,...,...,...,...,...,...,...
0,Covid-19 is serious and can lead to multiple h...,'he was hospitalized for covid-19.',people can get covid and repeatedly go into th...,Some masks are better than others,text-davinci-002,baseline,human,text-davinci-001,mrf,He Was Hospitalized for Covid-19. Then Hospita...,real,['people can get covid and repeatedly go into ...
0,The first winners of the $10 billion climate-c...,Some masks are better than others,a few environmental groups received large gran...,'the bezos administration is announcing the fi...,text-davinci-002,text-davinci-001,human,baseline,mrf,Bezos announces first winners of $10 billion c...,real,['a few environmental groups received large gr...
0,the uk will be shutting down again because of ...,'the government is preparing for a second lock...,"The UK is preparing for a second lockdown, imp...",Some masks are better than others,human,baseline,text-davinci-002,text-davinci-001,mrf,UK Coronavirus LIVE: England prepares for seco...,real,['the uk will be shutting down again because o...
0,The vaccine may have negative side effects for...,'covid vaccine is female sterilization',the covid-19 vaccine causes female sterilization,Some masks are better than others,text-davinci-002,baseline,human,text-davinci-001,mrf,Head of Pfizer research said that covid vaccin...,misinfo,['the covid-19 vaccine causes female steriliza...


In [15]:
TASK = 'flute'
txt="premise"
txt2="hypothesis"
gold="additional_labels"
secondary="labels"
n=4
literal=False
random_state=7

comb = []
for fn in glob(f"hit/input/{TASK}/*.csv"):
    if 'flan' in fn:
        continue
    comb.append(pd.read_csv(fn))
df = pd.concat(comb)

comparison_rows = []
phs = set([(row['premise'],row['hypothesis']) for _, row in df.iterrows()])
for p,h in phs:
    consider = df[(df['premise']==p) & (df['hypothesis']==h)].copy()
    cr = build_comparison_rows(consider,
                                 txt=txt,
                                 txt2=txt2,
                                 gold=gold,
                                 secondary=secondary,
                                 n=n,
                                 literal=literal,
                                 random_state=random_state,
                                MODELSET={"human", "text-curie-001", "text-babbage-001", "text-ada-001"}
                                )
    if len(cr):
        comparison_rows.append(cr)
    
comparison_df = pd.concat(comparison_rows)
comparison_df.sample(frac=1, random_state=random_state).to_csv(f"hit/input/comparison_balanced_{TASK}_ada_babbage_curie.csv", index=False)

1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position
1 out of position


In [14]:
comparison_df

Unnamed: 0,Generated_1,Generated_2,Generated_3,Generated_4,Model_1,Model_2,Model_3,Model_4,Task,additional_labels,hypothesis,labels,premise
0,A melting pat of butter is a slippery and slip...,The figurative language in the hypothesis is t...,The figurative language in the hypothesis is a...,A pat of butter melting would become lazy and ...,baseline,text-davinci-001,text-davinci-002,human,flute,A pat of butter melting would become lazy and ...,His voice was like a melting pat of butter.,Entailment,His voice was lazy and slippery
0,The figurative language in the hypothesis is a...,The figurative language in the hypothesis is t...,To throw something at someone is to be sarcast...,Monkeys foodfighting with bananas is hilarious...,text-davinci-002,text-davinci-001,baseline,human,flute,Monkeys foodfighting with bananas is hilarious...,The shade and sarcasm they throw at each other...,Entailment,The way they shade and sarcasm each other is h...
0,A rare stone in a museum is carefully guarded ...,A rare stone is a very rare object and so bein...,The figurative language in the hypothesis is t...,The hypothesis is a metaphor meaning that the ...,human,baseline,text-davinci-001,text-davinci-002,flute,A rare stone in a museum is carefully guarded ...,She is like a rare stone in a museum surrounde...,Contradiction,She is so easily accessible
0,A hundred million fireflies dancing around jus...,The figurative language in the hypothesis is t...,A hundred million fireflies dancing would be a...,The hypothesis is a metaphor for how beautiful...,baseline,text-davinci-001,human,text-davinci-002,flute,A hundred million fireflies dancing would be a...,Its like a hundred million fireflies dancing a...,Entailment,It is beautiful
0,"The hypothesis states that the person felt ""te...",The hypothesis is saying that the person felt ...,Losing a job is a very stressful experience an...,Losing a job is often a very traumatic experie...,text-davinci-002,text-davinci-001,baseline,human,flute,Losing a job is often a very traumatic experie...,I felt terrible when I found out that I was go...,Entailment,I felt pretty down when I found out that I was...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,The hypothesis is a metaphor for how life is c...,"An endless road is a long, long, and long road...","An endless road has no end, so it is not tempo...",The figurative language in the hypothesis is t...,text-davinci-002,baseline,human,text-davinci-001,flute,"An endless road has no end, so it is not tempo...",We're starting to discover life is like an end...,Contradiction,We're starting to realize that life is fleetin...
0,The hypothesis restates the premise using the ...,To make two people means to make them feel the...,"The idiom ""that makes two of us"" means ""the sa...",The hypothesis is saying that the premise and ...,text-davinci-002,baseline,human,text-davinci-001,flute,"The idiom ""that makes two of us"" means ""the sa...",that makes two of us.,Entailment,I feel exactly the same way.
0,Jobs for the boys refers to giving paid employ...,The hypothesis is saying that giving paid empl...,Jobs for the boys are not paid jobs and so the...,The hypothesis is a restatement of the premise...,human,text-davinci-001,baseline,text-davinci-002,flute,Jobs for the boys refers to giving paid employ...,But this isn't a case of jobs for the boys.,Entailment,But this isn't a case of giving paid employmen...
0,"To be rich means to be rich, but in this conte...",The hypothesis is saying that the person who s...,The hypothesis contradicts the premise because...,"Stealing anything, regardless of how small it ...",baseline,text-davinci-001,text-davinci-002,human,flute,"Stealing anything, regardless of how small it ...",I went to the store and stole a small bunch of...,Contradiction,I went to the store and took a small bunch of ...


In [4]:
TASK = 'positive_reframing'
txt="original_text"
gold="reframed_text"
secondary="strategy"
n=4
literal=False
random_state=7

comb = []
for fn in glob(f"hit/input/{TASK}/*.csv"):
    comb.append(pd.read_csv(fn))
df = pd.concat(comb)

comparison_rows = []
for lbl in set(df[txt]):
    consider = df[df[txt]==lbl].copy()
    cr = build_comparison_rows(consider,
                                 txt=txt,
                                 gold=gold,
                                 secondary=secondary,
                                 n=n,
                                 literal=literal,
                                 random_state=random_state,
                                 MODELSET={"human", "text-curie-001", "text-babbage-001", "text-ada-001"}
                                )
    
    if len(cr):
        comparison_rows.append(cr)
    
comparison_df = pd.concat(comparison_rows)
comparison_df.sample(frac=1, random_state=random_state).to_csv(f"hit/input/comparison_balanced_{TASK}.csv", index=False)

3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of position
3 out of p

In [6]:
TASK = 'flute'
pd.read_csv(f"hit/input/comparison_{TASK}.csv")

Unnamed: 0,Generated_1,Generated_2,Generated_3,Generated_4,Model_1,Model_2,Model_3,Model_4,Task,additional_labels,hypothesis,labels,premise
0,The hypothesis is that the less fortunate have...,The hypothesis is an entailment of the premise...,Airing one's opinion means to speak out about ...,The relationship between the hypothesis and th...,text-babbage-001,text-davinci-002,human,text-ada-001,flute,Airing one's opinion means to speak out about ...,She aired her opinions on welfare.,Entailment,She openly give her point of view on the less ...
1,The hypothesis is that the pain is coming from...,The hypothesis contradicts the premise because...,Experiencing back pain is never a good thing a...,The relationship between the premise and the h...,text-babbage-001,text-davinci-002,human,text-ada-001,flute,Experiencing back pain is never a good thing a...,I've been having this back pain for a while no...,Contradiction,My back has been having sharp random pains thr...
2,The hypothesis is that Mella orders while bobb...,The figurative language in the hypothesis is t...,"No sweat means not a problem or difficulty, bu...",The relationship between the premise and the h...,text-babbage-001,text-davinci-002,human,text-ada-001,flute,"No sweat means not a problem or difficulty, bu...","Mella orders, while bobbing her skull up and d...",Contradiction,"Mella orders, while bobbing her skull up and d..."
3,The hypothesis suggests that the smile was a s...,The figurative language in the hypothesis is t...,"An angel is a being of divine nature, and they...",The relationship between the premise and the h...,text-babbage-001,text-davinci-002,human,text-ada-001,flute,"An angel is a being of divine nature, and they...","I looked down at him, and he smiled at me like...",Contradiction,He smiled treacherously up at me.
4,The hypothesis suggests that the protagonist f...,The hypothesis states that the person feels te...,It is natural to feel bad when you accidentall...,The relationship between the premise and the h...,text-babbage-001,text-davinci-002,human,text-ada-001,flute,It is natural to feel bad when you accidentall...,I felt terrible when I accidentally bumped int...,Entailment,I accidentally bumped into someone at the mall...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
493,The hypothesis implies that the flowers stoppe...,The hypothesis describes the flowers as if the...,"To dance means to move nimbly or gracefully, w...",The flowers dance in the gentle breeze,text-babbage-001,text-davinci-002,human,text-ada-001,flute,"To dance means to move nimbly or gracefully, w...",The flowers danced in the gentle breeze.,Contradiction,The flowers stopped in the gentle breeze.
494,The hypothesis suggests that the gaze is like ...,The hypothesis describes the premise using fig...,A strong flame is usually associated with bein...,The hypothesis uses a figurative language to d...,text-babbage-001,text-davinci-002,human,text-ada-001,flute,A strong flame is usually associated with bein...,"His gaze was like a steady, strong flame .",Contradiction,His gaze was neither blazing nor compassionate.
495,The hypothesis suggests that the best friend w...,The hypothesis is hyperbolic and is not meant ...,The beauty of Tahiti is often mentioned and th...,"The hypothesis uses a metaphor when it says ""M...",text-babbage-001,text-davinci-002,human,text-ada-001,flute,The beauty of Tahiti is often mentioned and th...,Tahiti is the most beautiful place on Earth an...,Contradiction,My best friend is going to Tahiti with his fam...
496,The hypothesis is that people around the apart...,The hypothesis is a metaphor for how the peopl...,Cigarette butts are often unsightly and produc...,The relationship between the premise and the h...,text-babbage-001,text-davinci-002,human,text-ada-001,flute,Cigarette butts are often unsightly and produc...,It's not only beautiful but also cool how peop...,Contradiction,I'm so mad at people around my apartment just ...


In [7]:
TASK = 'sbic'
pd.read_csv(f"hit/input/comparison_{TASK}.csv")

Unnamed: 0,post,targetMinority,Generated_1,Model_1,Task,Generated_2,Model_2,Generated_3,Model_3,Generated_4,Model_4,targetStereotype
0,What was the hardest part of the holocaust? My...,"[""Holocaust victims"", ""jewish folks""]",a group of people,flan-t5-small,sbic,The stereotype communicated by this sentence i...,text-davinci-001,"I'm sorry, I cannot provide an explanation for...",chatgpt,Jews are often viewed as victims of the Holocaust,text-davinci-003,"[""jewish people are inferior"", ""jewish people ..."
1,What was the hardest part of the holocaust? My...,"[""Holocaust victims"", ""jewish folks""]",This sentence communicates the stereotype that...,text-davinci-002,sbic,trivializes harm to victims,human,jewish people are unwanted,human,jewish people are inferior,human,"[""jewish people are inferior"", ""jewish people ..."
2,What was the hardest part of the holocaust? My...,"[""Holocaust victims"", ""jewish folks""]",The sentence communicates that the Holocaust w...,text-curie-001,sbic,The hardest part of the holocaust was the geno...,text-babbage-001,The hardest part of the holocaust was the lack...,text-ada-001,[GROUP] are sexist.,flan-ul2,"[""jewish people are inferior"", ""jewish people ..."
3,What was the hardest part of the holocaust? My...,"[""Holocaust victims"", ""jewish folks""]",Nazi,flan-t5-large,sbic,a dick,flan-t5-base,are dick,baseline,Jews are a hateful people,flan-t5-xxl,"[""jewish people are inferior"", ""jewish people ..."
4,"A Cop Told A Suspect To Put His Hands Up, And ...","[""black folks""]","A cop told a suspect to put his hands up, and ...",flan-t5-small,sbic,This sentence communicates the stereotype that...,text-davinci-001,[GROUP] are a group of people,flan-t5-xl,"The sentence ""Cop told suspect to put his hand...",text-ada-001,"[""black folks are inferior"", ""black folks dese..."
...,...,...,...,...,...,...,...,...,...,...,...,...
1912,we should not help poor african children\nthey...,"[""African folks""]",The stereotype communicated by this sentence i...,text-curie-001,sbic,This sentence communicates the stereotype that...,text-davinci-003,[GROUP] are funny.,flan-ul2,poor,flan-t5-xxl,"[""african folks don't deserve to eat"", ""africa..."
1913,What do you call a woman who speaks their mind...,"[""women""]",shouldn't speak,human,sbic,women are c*nts,human,a woman who speaks her mind,flan-t5-large,This sentence communicates the stereotype that...,chatgpt,"[""end up dead"", ""shouldn't speak"", ""women are ..."
1914,What do you call a woman who speaks their mind...,"[""women""]",end up dead,human,sbic,[GROUP] are a group of people who are buried i...,flan-t5-xl,a woman who speaks their mind is a cunt,flan-t5-xxl,Women are often seen as being too outspoken or...,text-davinci-003,"[""end up dead"", ""shouldn't speak"", ""women are ..."
1915,What do you call a woman who speaks their mind...,"[""women""]",women are sex slaves,baseline,sbic,women,flan-t5-base,The stereotype that this sentence communicates...,text-davinci-001,The speaker is an individual who is passionate...,text-ada-001,"[""end up dead"", ""shouldn't speak"", ""women are ..."
