In [1]:
import itertools
import string

import numpy as np
import pandas as pd

In [31]:
seed = 1
np.random.seed(seed)

### Filter generated data
Make sure the name is unique. Fruits, colors, and other should be all lower-case. 

In [3]:
data_df = pd.read_json("data/chat_gpt_generated_data.json")

In [4]:
data_df

Unnamed: 0,name,type
0,Austria,country
1,banana,fruit
2,green,color
3,Mercedes,car
4,airport,other
...,...,...
324,Smalt,color
325,Isabelline,color
326,Malachite,color
327,Crimson,color


In [5]:
data_df["type"].value_counts()

type
country    69
car        68
other      67
fruit      66
color      59
Name: count, dtype: int64

In [6]:
duplicates = data_df.duplicated(subset='name')

In [7]:
deduplicated_df = data_df[~duplicates]

In [8]:
deduplicated_df.reset_index(drop=True, inplace=True)

In [9]:
deduplicated_df["word_length"] = deduplicated_df["name"].apply(
    lambda name: len(name)
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  deduplicated_df["word_length"] = deduplicated_df["name"].apply(


In [10]:
deduplicated_df["name"] = deduplicated_df.apply(
    lambda row: row["name"].lower() if row["type"] in ["color", "fruit", "other"] else row["name"],
    axis=1
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  deduplicated_df["name"] = deduplicated_df.apply(


In [11]:
deduplicated_df.to_csv("data/preprocessed_data.csv", index=False)

In [None]:
counts = deduplicated_df.groupby(['type', 'word_length']).size().reset_index(name='count')

# Output the counts
print(counts)

### Generate anagram data

In [3]:
n_true_anagrams = 4
n_false_anagrams = 4

In [5]:
preprocessed_data = pd.read_csv("data/preprocessed_data.csv")

In [6]:
preprocessed_data_8 = preprocessed_data[preprocessed_data["word_length"] < 9]

In [6]:
preprocessed_data_8["type"].value_counts()

type
country    52
car        49
other      44
color      43
fruit      30
Name: count, dtype: int64

In [7]:
def get_n_anagrams(word, n, without_original=True):
    indices = list(range(len(word)))
    permutations = []
    for _ in range(n):
        while True:
            permutation = np.random.choice(indices, len(indices), replace=False)
            if not without_original or (list(permutation) != indices):
                break
        permutations.append(permutation)
    anagrams = []
    for permutation in permutations:
        anagram = ""
        for idx in permutation:
            anagram += word[idx]
        anagrams.append(anagram)
    return anagrams

In [8]:
def get_true_anagram_rows(row, name, name_anagrams):
    rows = []
    n = min(len(name_anagrams), n_true_anagrams)
    selected_anagrams = np.random.choice(name_anagrams, n, replace=False)
    for anagram in selected_anagrams:
        input_str = f"{name} {anagram}" 
        new_row = {
            'input': input_str,
            'label': True,
            'type': row["type"],
            'word_length': row["word_length"],
            'corruptions': 0
        }
        rows.append(new_row)
    return rows

In [9]:
def corrupt_word(word, corruptions):
    indices_to_replace = np.random.choice(range(len(word)), corruptions, replace=False)
    for idx_to_replace in indices_to_replace:
        character_to_replace = word[idx_to_replace]
        alphabet = string.ascii_letters
        if character_to_replace.isupper():
            alphabet = alphabet.upper()
        else:
            alphabet = alphabet.lower()
        alphabet = alphabet.replace(character_to_replace, "")
        np_alphabet = np.array(list(alphabet), dtype="|S1")
        word = word[:idx_to_replace] + np.random.choice(np_alphabet, 1)[0].decode("utf-8") + word[idx_to_replace+1:]
    return word

In [10]:
def get_false_anagram_rows(row, name, name_anagrams):
    rows = []
    n = min(len(name_anagrams), n_false_anagrams)
    selected_anagrams = np.random.choice(name_anagrams, n, replace=False)
    for idx, anagram in enumerate(selected_anagrams):
        n_corruptions = idx + 2
        if n_corruptions > len(anagram):
            continue
        corrupted_anagram = corrupt_word(anagram, n_corruptions)
        input_str = f"{name} {corrupted_anagram}" 
        new_row = {
            'input': input_str,
            'label': False,
            'type': row["type"],
            'word_length': row["word_length"],
            'corruptions': n_corruptions
        }
        rows.append(new_row)
    return rows

In [11]:
def generate_df(base_df):
    rows = []
    for idx, row in base_df.iterrows():
        name = row["name"]
        true_name_anagrams = get_n_anagrams(name, n_true_anagrams)
        true_anagram_rows = get_true_anagram_rows(row, name, true_name_anagrams)
        false_name_anagrams = get_n_anagrams(name, n_false_anagrams)
        false_anagram_rows = get_false_anagram_rows(row, name, false_name_anagrams)
        rows.extend(true_anagram_rows)
        rows.extend(false_anagram_rows)
    df = pd.DataFrame(rows)
    return df

In [12]:
def generate_datasets(base_df, n_examples=10, n_eval=200):
    anagram_data = generate_df(base_df)
    fruit_true_examples = anagram_data[(anagram_data["label"] == True) & (anagram_data["type"] == "fruit")].sample(n=n_examples, random_state=seed)
    fruit_false_examples = anagram_data[(anagram_data["label"] == False) & (anagram_data["type"] == "fruit")].sample(n=n_examples, random_state=seed)
    car_true_examples = anagram_data[(anagram_data["label"] == True) & (anagram_data["type"] == "car")].sample(n=n_examples, random_state=seed)
    car_false_examples = anagram_data[(anagram_data["label"] == False) & (anagram_data["type"] == "car")].sample(n=n_examples, random_state=seed)
    color_true_examples = anagram_data[(anagram_data["label"] == True) & (anagram_data["type"] == "color")].sample(n=n_examples, random_state=seed)
    color_false_examples = anagram_data[(anagram_data["label"] == False) & (anagram_data["type"] == "color")].sample(n=n_examples, random_state=seed)
    country_true_examples = anagram_data[(anagram_data["label"] == True) & (anagram_data["type"] == "country")].sample(n=n_examples, random_state=seed)
    country_false_examples = anagram_data[(anagram_data["label"] == False) & (anagram_data["type"] == "country")].sample(n=n_examples, random_state=seed)
    other_true_examples = anagram_data[(anagram_data["label"] == True) & (anagram_data["type"] == "other")].sample(n=n_examples, random_state=seed)
    other_false_examples = anagram_data[(anagram_data["label"] == False) & (anagram_data["type"] == "other")].sample(n=n_examples, random_state=seed)

    example_data = pd.concat((
        fruit_true_examples, fruit_false_examples, car_true_examples, car_false_examples, color_true_examples, color_false_examples, 
        country_true_examples, country_false_examples, other_true_examples, other_false_examples
    ))

    anagram_data = anagram_data.drop(example_data.index)
    
    true_eval_data = anagram_data[anagram_data["label"] == True].sample(n=n_eval // 2, random_state=seed)
    false_eval_data = anagram_data[anagram_data["label"] == False].sample(n=n_eval // 2, random_state=seed)
    eval_data = pd.concat((true_eval_data, false_eval_data))
    eval_data = eval_data.sample(frac=1., random_state=seed)
    return eval_data, example_data

In [22]:
def generate_dataset(anagram_data, n_examples=10, n_eval=200):
    anagram_data = anagram_data.sample(frac=1., random_state=seed).reset_index(drop=True)
    eval_rows = []
    true_anagram_data = anagram_data[anagram_data["label"] == True].sample(n=n_eval//2, random_state=seed)
    false_anagram_data = anagram_data[anagram_data["label"] == False].sample(n=n_eval//2, random_state=seed)
    def get_rows_from_anagram_data(data):
        rows = []
        for idx, row in data.iterrows():
            example_rows = anagram_data.drop(data.index).sample(n=n_examples)
            few_shot_prompt = ""
            for _, example in example_rows.iterrows():
                example_text = f"Input: {example['input']}, Label: {example['label']}\n"
                few_shot_prompt += example_text

            user_prompt = f"{few_shot_prompt}Input: {row['input']}, Label:"
            eval_row = {
                "prompt": user_prompt,
                "label": row["label"],
                "type": row["type"],
                "word_length": row["word_length"],
                "corruptions": row["corruptions"]
            }
            rows.append(eval_row)
        return rows
    
    eval_rows.extend(get_rows_from_anagram_data(true_anagram_data))
    eval_rows.extend(get_rows_from_anagram_data(false_anagram_data))

    eval_data = pd.DataFrame(eval_rows[:n_eval])
    return eval_data

In [23]:
def generate_dataset_with_biased_prompt(anagram_data, n_eval=200, bias_types=None, bias_label=False, n_bias=5, n_true=5, n_false=5, interleave_true_and_false=False):
    if bias_types is None:
        bias_types = ["country"] 
    anagram_data = anagram_data.sample(frac=1., random_state=seed).reset_index(drop=True)

    # anagram_data = anagram_data.drop(example_rows.index)
    eval_rows = []
    true_anagram_data = anagram_data[anagram_data["label"] == True].sample(n=n_eval//2, random_state=seed)
    false_anagram_data = anagram_data[anagram_data["label"] == False].sample(n=n_eval//2, random_state=seed)
    def get_rows_from_anagram_data(data, n_true=5, n_false=5):
        bias_rows = anagram_data.drop(data.index)[(anagram_data["type"].isin(bias_types)) & (anagram_data["label"] == bias_label)].sample(n=n_bias)
        n_true = n_true if not bias_label else n_true - n_bias
        n_false = n_false if bias_label else n_false - n_bias
        true_rows = anagram_data[(anagram_data["label"] == True) & ~(anagram_data["type"].isin(bias_types))].sample(n=n_true)
        false_rows = anagram_data[(anagram_data["label"] == False) & ~(anagram_data["type"].isin(bias_types))].sample(n=n_false)
        
        example_rows = pd.concat((bias_rows, true_rows, false_rows)).sample(frac=1.) 
        few_shot_prompt = ""
        for _, example in example_rows.iterrows():
            example_text = f"Input: {example['input']}, Label: {example['label']}\n"
            few_shot_prompt += example_text

        rows = []
        for idx, row in data.iterrows():
            user_prompt = f"{few_shot_prompt}Input: {row['input']}, Label:"
            eval_row = {
                "prompt": user_prompt,
                "label": row["label"],
                "type": row["type"],
                "word_length": row["word_length"],
                "corruptions": row["corruptions"]
            }
            rows.append(eval_row)
        return rows
    eval_rows.extend(get_rows_from_anagram_data(true_anagram_data, n_true=n_true, n_false=n_false))
    eval_rows.extend(get_rows_from_anagram_data(false_anagram_data, n_true=n_true, n_false=n_false))

    eval_data = pd.DataFrame(eval_rows)
    return eval_data

In [32]:
anagram_data = generate_df(preprocessed_data_8)

In [25]:
for n in range(1,4):
    eval_data = generate_dataset(anagram_data)
    eval_data.to_csv(f"data/anagram_eval_data_len_8_0{n}.csv", index=False)

In [33]:
for n_dataset in range(1,4):
    for n_biases in range(1,6):
        eval_data_biased_country_false = generate_dataset_with_biased_prompt(anagram_data, n_bias=n_biases)
        eval_data_biased_country_false.to_csv(f"data/anagram_eval_biased_{n_biases}_0{n_dataset}.csv", index=False)

  bias_rows = anagram_data.drop(data.index)[(anagram_data["type"].isin(bias_types)) & (anagram_data["label"] == bias_label)].sample(n=n_bias)
  bias_rows = anagram_data.drop(data.index)[(anagram_data["type"].isin(bias_types)) & (anagram_data["label"] == bias_label)].sample(n=n_bias)
  bias_rows = anagram_data.drop(data.index)[(anagram_data["type"].isin(bias_types)) & (anagram_data["label"] == bias_label)].sample(n=n_bias)
  bias_rows = anagram_data.drop(data.index)[(anagram_data["type"].isin(bias_types)) & (anagram_data["label"] == bias_label)].sample(n=n_bias)
  bias_rows = anagram_data.drop(data.index)[(anagram_data["type"].isin(bias_types)) & (anagram_data["label"] == bias_label)].sample(n=n_bias)
  bias_rows = anagram_data.drop(data.index)[(anagram_data["type"].isin(bias_types)) & (anagram_data["label"] == bias_label)].sample(n=n_bias)
  bias_rows = anagram_data.drop(data.index)[(anagram_data["type"].isin(bias_types)) & (anagram_data["label"] == bias_label)].sample(n=n_bias)
  bias