In [None]:
!pip install emoji

Collecting emoji
  Downloading emoji-2.14.1-py3-none-any.whl.metadata (5.7 kB)
Downloading emoji-2.14.1-py3-none-any.whl (590 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/590.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m583.7/590.6 kB[0m [31m21.7 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m590.6/590.6 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: emoji
Successfully installed emoji-2.14.1


In [None]:
import pandas as pd
import random
random.seed(40)
import csv
import nltk
from nltk.corpus import brown
nltk.download('brown')
import emoji
import re
import string


[nltk_data] Downloading package brown to /root/nltk_data...
[nltk_data]   Unzipping corpora/brown.zip.


In [None]:
# Functions to generate random sets (lists) for each task
# Arguments: x, y, z are strings representing the three chosen letters/emojis/digits

# Function to generate list of emoji sets given three emojis
def generate_emojis(x,y,z):
    xy_set = frozenset([x, y])
    xz_set = frozenset([x, z])
    yz_set = frozenset([y, z])
    x_set = frozenset([x])
    y_set = frozenset([y])
    z_set = frozenset([z])

    emoji_sets = {xy_set, xz_set, yz_set, x_set, y_set, z_set}

    return [set(s) for s in emoji_sets]

# Function to generate list of words given three letters
def generate_words(x,y,z):
    word_list = set(w.lower() for w in brown.words() if not re.search(r"[^\w]", w))  # Load the words dataset

    # Find words that match the required conditions
    xy_words = [w for w in word_list if x in w and y in w and z not in w]
    xz_words = [w for w in word_list if x in w and z in w and y not in w]
    yz_words = [w for w in word_list if y in w and z in w and x not in w]
    x_words = [w for w in word_list if x in w and y not in w and z not in w]
    y_words = [w for w in word_list if y in w and x not in w and z not in w]
    z_words = [w for w in word_list if z in w and x not in w and y not in w]

    # Randomly select one word from each category
    selected_words = [random.choice(xy_words), random.choice(xz_words), random.choice(yz_words),
                      random.choice(x_words), random.choice(y_words), random.choice(z_words)]

    return selected_words

# Function to generate list of numbers given three numbers
def generate_numbers(x, y, z):
    numbers = {}

    # Helper funciton to generate a number for each condition
    def generate_number(targets, not_targets):
        while True:
            num = "".join(random.choices("0123456789", k=8))
            if all(d in num for d in targets) and all(d not in num for d in not_targets):
                return num

    conditions = {
      "xy": ([x, y], [z]),
      "xz": ([x, z], [y]),
      "yz": ([y, z], [x]),
      "x": ([x], [y, z]),
      "y": ([y], [x, z]),
      "z": ([z], [x, y]),
    }

    for key, (must_have, must_not_have) in conditions.items():
        numbers[key] = str(generate_number(must_have, must_not_have))

    return list(numbers.values())

In [None]:
# function to generate the correct answers to the construction questions
def get_correct_answer(task_list, construction, selected_vals):
    """
    Args:
    - construction (str): Type of construction
    - selected_vals (tuple): The randomly assigned values
    - task_list (list of items): The list of items containing the selected values
    """

    # Experimental conditions
    if construction == "simple_pos":
        return [s for s in task_list if selected_vals[0] in s]
    elif construction == "simple_neg":
        return [s for s in task_list if selected_vals[0] not in s]
    elif construction == "simple_pos_dis":
        or_list = [s for s in task_list if selected_vals[0] in s or selected_vals[1] in s]
        xor_list = [s for s in task_list if (selected_vals[0] in s and selected_vals[1] not in s) or (selected_vals[0] not in s and selected_vals[1] in s)]
        return (or_list, xor_list)
    elif construction == "comp_pos_dis":
        xor_list = [s for s in task_list if (selected_vals[0] in s) ^ (selected_vals[1] in s)]
        or_list = [s for s in task_list if selected_vals[0] in s or selected_vals[1] in s]
        return (xor_list, or_list)
    elif construction == "simple_neg_dis":
        wide_list = [s for s in task_list if selected_vals[0] not in s and selected_vals[1] not in s]
        narrow_list = [s for s in task_list if not (selected_vals[0] in s and selected_vals[1] in s)]
        return (wide_list, narrow_list)
    elif construction == "comp_neg_dis":
        xor_list = [s for s in task_list if (selected_vals[0] in s and selected_vals[1] in s) or (selected_vals[0] not in s and selected_vals[1] not in s)]
        or_list = [s for s in task_list if selected_vals[0] not in s and selected_vals[1] not in s]
        return (xor_list, or_list)
    elif construction == "simple_neg_conj":
        wide_list = [s for s in task_list if not (selected_vals[0] in s and selected_vals[1] in s)]
        narrow_list = [s for s in task_list if selected_vals[0] not in s and selected_vals[1] not in s]
        return (wide_list, narrow_list)

    # Control conditions
    elif construction == "inc_dis":
        return [s for s in task_list if selected_vals[0] in s or selected_vals[1] in s or (selected_vals[0] in s and selected_vals[1] in s)]
    elif construction == "ex_dis":
        return [s for s in task_list if (selected_vals[0] in s) ^ (selected_vals[1] in s)]
    elif construction == "comp_neg":
        return [s for s in task_list if selected_vals[0] not in s and selected_vals[1] not in s]
    elif construction == "simple_pos_conj":
        return [s for s in task_list if selected_vals[0] in s and selected_vals[1] in s]
    elif construction == "comp_neg_conj":
        narrow_list = [s for s in task_list if not (selected_vals[0] in s and selected_vals[1] in s)]
        wide_list = [s for s in task_list if selected_vals[0] not in s and selected_vals[1] not in s]
        return (narrow_list, wide_list)
    else:
        raise ValueError(f"Unknown construction type: {construction}")

In [None]:
# Emoji task function
def generate_emoji_stimuli():
    emoji_set = {'monkey': '🐒', 'gorilla': '🦍', 'dog': '🐕', 'cat': '🐈', 'fox': '🦊', 'racoon': '🦝',
                 'horse': '🐎', 'lion': '🦁', 'pig': '🐖', 'rabbit': '🐇', 'skunk': '🦨'}

    # Select 3 random emoji names
    emoji_names = random.sample(list(emoji_set.keys()), 3)

    # Generate emoji sets using emoji values
    emoji_sets = generate_emojis(emoji_set[emoji_names[0]], emoji_set[emoji_names[1]], emoji_set[emoji_names[2]])
    stimuli_str = ", ".join(map(str, emoji_sets))


    base = f"In the list of emoji sets: {stimuli_str},"

    # Define constructions with placeholders
    constructions = {
        # Experimental constructions
        "simple_pos": "which sets have a {first}?",
        "simple_neg": "which sets do not have a {first}?",
        "simple_pos_dis": "which sets have a {first} or {second}?",
        "comp_pos_dis": "which sets have either a {first} or {second}?",
        "simple_neg_dis": "which sets do not have a {first} or {second}?",
        "comp_neg_dis": "which sets do not have either a {first} or {second}?",
        "simple_neg_conj": "which sets do not have a {first} and {second}?",
        # Control constructions
        "inc_dis": "which sets have a {first} or {second} or both?",
        "ex_dis": "which sets have a {first} or {second}, but not both?",
        "comp_neg": "which sets have neither a {first} nor {second}?",
        "simple_pos_conj": "which sets have a {first} and {second}?",
        "comp_neg_conj": "which sets do not have both a {first} and {second}?",
    }

    # Randomly assign emojis to each construction and generate prompts
    selected_emojis = {} # stores the randomly assigned emojis
    prompts = {}
    for key in constructions.keys():
        if "first" in constructions[key] and "second" in constructions[key]:
            selected_emojis[key] = random.sample(emoji_names, 2)  # Pick two distinct emojis
        else:
            selected_emojis[key] = [random.choice(emoji_names)]  # Pick one emoji
        formatted_sentence = constructions[key].format(first=selected_emojis[key][0], second=selected_emojis[key][-1])
        prompts[key] = f"{base} {formatted_sentence}"

    # Get correct answers
    rows = []
    for key, prompt in prompts.items():
        selected_emoji_values = [emoji_set[name] for name in selected_emojis[key]]
        correct_answer = get_correct_answer(emoji_sets, key, selected_emoji_values)
        rows.append((key, prompt, correct_answer))

    return rows

In [None]:
# Letter task function
def generate_letter_stimuli():
    letters = random.sample(string.ascii_lowercase, 3)
    words = generate_words(letters[0], letters[1], letters[2])
    words_str = ", ".join(words)

    base = f"In the list of words: {words_str},"
    constructions = {
      # Experimental constructions
      "simple_pos": "which words have the letter {first}?",
      "simple_neg": "which words do not have the letter {first}?",
      "simple_pos_dis": "which words have the letters {first} or {second}?",
      "comp_pos_dis": "which words have either the letters {first} or {second}?",
      "simple_neg_dis": "which words do not have the letters {first} or {second}?",
      "comp_neg_dis": "which words do not have either the letters {first} or {second}?",
      "simple_neg_conj": "which words do not have the letters {first} and {second}?",
      # Control constructions
      "inc_dis": "which words have the letter {first} or {second} or both?",
      "ex_dis": "which words have the letter {first} or {second}, but not both?",
      "comp_neg": "which words have neither the letter {first} nor {second}?",
      "simple_pos_conj": "which words have the letters {first} and {second}?",
      "comp_neg_conj": "which words do not have both the letters {first} and {second}?",
    }

    # Randomly assign letters to each construction and generate prompts
    selected_letters = {} # stores the randomly assigned letters
    prompts = {}
    for key in constructions.keys():
        if "first" in constructions[key] and "second" in constructions[key]:
            selected_letters[key] = random.sample(letters, 2)
        else:
            selected_letters[key] = [random.choice(letters)]
        formatted_sentence = constructions[key].format(first=selected_letters[key][0], second=selected_letters[key][-1])
        prompts[key] = f"{base} {formatted_sentence}"

    rows = []
    # Generate sentences
    for key, prompt in prompts.items():
        correct_answer = get_correct_answer(words, key, selected_letters[key])
        rows.append((key, prompt, correct_answer))

    return rows

In [None]:
# Digit task function
def generate_digit_stimuli():
    digits = random.sample(range(10), 3)
    numbers = generate_numbers(str(digits[0]), str(digits[1]), str(digits[2]))
    numbers_str = ", ".join(numbers)

    base = f"In the list of numbers: {numbers_str},"
    constructions = {
      # Experimental constructions
      "simple_pos": "which numbers have the digit {first}?",
      "simple_neg": "which numbers do not have the digit {first}?",
      "simple_pos_dis": "which numbers have the digits {first} or {second}?",
      "comp_pos_dis": "which numbers have either the digits {first} or {second}?",
      "simple_neg_dis": "which numbers do not have the digits {first} or {second}?",
      "comp_neg_dis": "which numbers do not have either the digits {first} or {second}?",
      "simple_neg_conj": "which numbers do not have the digits {first} and {second}?",
      # Control constructions
      "inc_dis": "which numbers have the digit {first} or {second} or both?",
      "ex_dis": "which numbers have the digit {first} or {second}, but not both?",
      "comp_neg": "which numbers have neither the digit {first} nor {second}?",
      "simple_pos_conj": "which numbers have the digits {first} and {second}?",
      "comp_neg_conj": "which numbers do not have both the digits {first} and {second}?",
    }

    # Randomly assign digits to each construction and generate prompts
    selected_digits = {} # stores the randomly assigned digits
    prompts = {}
    for key in constructions.keys():
        if "first" in constructions[key] and "second" in constructions[key]:
            selected_digits[key] = [str(d) for d in random.sample(digits, 2)]
        else:
            selected_digits[key] = str(random.choice(digits))
        formatted_sentence = constructions[key].format(first=selected_digits[key][0], second=selected_digits[key][-1])
        prompts[key] = f"{base} {formatted_sentence}"

    rows = []
    # Generate sentences
    for key, prompt in prompts.items():
        correct_answer = get_correct_answer(numbers, key, selected_digits[key])
        rows.append((key, prompt, correct_answer))

    return rows

In [None]:
# Word task
word_stimuli_csv = "sentence_spreadsheet_exp2.csv"
spreadsheet_df = pd.read_csv(word_stimuli_csv)

def generate_word_stimuli():
    rows = []
    count = 1
    for index, row in spreadsheet_df.iterrows():
        words = [row["w1"], row["w2"], row["w3"]]
        sentences = [row["s1"], row["s2"], row["s3"], row["s4"], row["s5"], row["s6"]]
        sentences_str = ", ".join(sentences)

        base = f"In the list of sentences: {sentences_str},"
        constructions = {
            "simple_pos": "which sentences have the word {first}?",
            "simple_neg": "which sentences do not have the word {first}?",
            "simple_pos_dis": "which sentences have the words {first} or {second}?",
            "comp_pos_dis": "which sentences have either the words {first} or {second}?",
            "simple_neg_dis": "which sentences do not have the words {first} or {second}?",
            "comp_neg_dis": "which sentences do not have either the words {first} or {second}?",
            "simple_neg_conj": "which sentences do not have the words {first} and {second}?",
            "inc_dis": "which sentences have the word {first} or {second} or both?",
            "ex_dis": "which sentences have the word {first} or {second}, but not both?",
            "comp_neg": "which sentences have neither the word {first} nor {second}?",
            "simple_pos_conj": "which sentences have the words {first} and {second}?",
            "comp_neg_conj": "which sentences do not have both the words {first} and {second}?",
        }

        selected_words = {}  # Stores the randomly assigned words
        prompts = {}
        for key in constructions.keys():
            if "first" in constructions[key] and "second" in constructions[key]:
                selected_words[key] = [d for d in random.sample(words, 2)]
            else:
                selected_words[key] = [random.choice(words)]
            formatted_sentence = constructions[key].format(first=selected_words[key][0], second=selected_words[key][-1])
            prompts[key] = f"{base} {formatted_sentence}"

        # Generate sentences
        new_rows = [(count, key, prompt, get_correct_answer(sentences, key, selected_words[key])) for key, prompt in prompts.items()]
        rows.extend(new_rows)  # Append to the global list instead of overwriting
        count += 1

    return rows

In [None]:
# TESTING CODE FOR CORRECT ANSWERS
# prompts = generate_letter_stimuli()
# prompts = generate_digit_stimuli()
# prompts = generate_emoji_stimuli()
prompts = generate_word_stimuli()
for key, prompt, correct_answer in prompts:
    print(f"{key}: {prompt}")
    print(f"Correct Answer: {correct_answer}\n")
    break


simple_pos: In the list of sentences: I like to study on my chair., My professor conducted a study on the life of rats., I enjoy my life as the department chair., There is no life without liberty., I needed to study for this upcoming exam., My dining table was missing a chair., which sentences have the word chair?
Correct Answer: ['I like to study on my chair.', 'I enjoy my life as the department chair.', 'My dining table was missing a chair.']



In [None]:
# Creating stimuli sheet for tasks
# Arguments: task (str) - type of task, filename (str) - name of csv file to output to
def generate_stimuli(task, filename):
    sheet = []

    if task == "word":
        stimuli = generate_word_stimuli()
        for count, key, prompt, correct_answer in stimuli:
            if key in ["simple_pos", "simple_neg", "simple_pos_dis", "comp_pos_dis",
                        "simple_neg_dis", "comp_neg_dis", "simple_neg_conj"]:
                category = "experimental"
            else:
                category = "control"
            sheet.append((count, key, prompt, category, correct_answer))
        # Write to CSV
        stimuli_df = pd.DataFrame(sheet, columns=["Iteration", "Type", "Sentence", "Category", "Correct Response"])
        stimuli_df.to_csv(filename, index=False, encoding="utf-8")

        print(f"CSV file '{filename}' created with {len(sheet)} rows.")
        return None


    for i in range(1, 21):  # Adds numbers 1 to 10 in row to keep track of groups of stimuli
        if task == "digit":
            stimuli = generate_digit_stimuli()
        elif task == "emoji":
            stimuli = generate_emoji_stimuli()
        elif task == "letter":
            stimuli = generate_letter_stimuli()
        else:
            raise ValueError("Invalid task type. Must be 'digit', 'emoji', or 'word'.")

        # construction type
        for key, prompt, correct_answer in stimuli:
            if key in ["simple_pos", "simple_neg", "simple_pos_dis", "comp_pos_dis",
                        "simple_neg_dis", "comp_neg_dis", "simple_neg_conj"]:
                category = "experimental"
            else:
                category = "control"

            sheet.append((i, key, prompt, category, correct_answer))

    # Write to CSV
    stimuli_df = pd.DataFrame(sheet, columns=["Iteration", "Type", "Sentence", "Category", "Correct Response"])
    stimuli_df.to_csv(filename, index=False, encoding="utf-8")

    print(f"CSV file '{filename}' created with {len(sheet)} rows.")

    return None


generate_stimuli("word", "word_task_stimuli.csv")


CSV file 'word_task_stimuli.csv' created with 240 rows.
