In [None]:
import spacy
import pandas as pd
import random
import csv

# Load spaCy's English model
nlp = spacy.load("en_core_web_sm")



In [None]:
# Function to insert modals into given sentences
def modals(P, Q, P_type_bool, Q_type_bool, connective):
    doc_p = nlp(P)
    doc_q = nlp(Q)

    # Initialize subject and complement lists
    subject_p, complement_p = [], []
    subject_q, complement_q = [], []

    # Map auxiliary verbs to their modal complements
    aux_modal_map = {
        "have": f"{connective} have",
        "has": f"{connective} have",
        "do": f"{connective} do",
        "does": f"{connective} do",
        "did": f"{connective} do",
        "am": f"{connective} be",
        "is": f"{connective} be",
        "are": f"{connective} be",
        "was": f"{connective} be",
        "were": f"{connective} be",
    }

    aux_modal_neg_map = {
        "have": f"{connective} not have",
        "has": f"{connective} not have",
        "do": f"{connective} not do",
        "does": f"{connective} not do",
        "did": f"{connective} not do",
        "am": f"{connective} not be",
        "is": f"{connective} not be",
        "are": f"{connective} not be",
        "was": f"{connective} not be",
        "were": f"{connective} not be",
    }

    aux_verbs_do = {"do", "does", "did"}

    def process_doc(doc, is_negated):
        subject, complement = [], []
        modal_inserted = False

        for token in doc:
            # Getting the subject
            if "subj" in token.dep_:
                # Collect the main subject and its modifiers
                subject_parts = [token]
                for child in token.children:
                    if child.dep_ in {"det", "poss", "amod", "compound"}:
                        subject_parts.append(child)
                        complement.remove(child.text)

                # Sort modifiers and the main token by their position in the sentence
                subject_parts = sorted(subject_parts, key=lambda x: x.idx)
                subject.append(" ".join([t.text for t in subject_parts]))
            elif token.dep_ == "expl":  # Existential "There"
                subject.append(token.text)
            # Checking next token is an auxiliary verb
            elif token.text.lower() in aux_modal_map and not modal_inserted:
                if is_negated:
                    # Special case for "do" verbs if it is not the root
                    if token.dep_ != "ROOT" and token.text.lower() in aux_verbs_do:
                        modal = f"{connective} not"
                    else:
                        modal = aux_modal_neg_map[token.text.lower()]
                else:
                    modal = aux_modal_map[token.text.lower()]
                complement.append(modal)
                modal_inserted = True
            # Modifying the root verbs
            elif token.dep_ == "ROOT" and not modal_inserted:
                lemma = token.lemma_
                if is_negated:
                    modal = f"{connective} not {lemma}"
                else:
                    modal = f"{connective} {lemma}"
                complement.append(modal)
                modal_inserted = True
            elif token.text.lower() == "not":
                continue  # Skip explicit "not" as it's already handled
            else:
                complement.append(token.text)

        return " ".join(subject), " ".join(complement)

    # Process positive and negated sentences
    subject_p, complement_p = process_doc(doc_p, P_type_bool)
    subject_q, complement_q = process_doc(doc_q, Q_type_bool)

    # Return the formatted output
    return f"{subject_p} {complement_p} and {subject_q} {complement_q}"

In [None]:
# Function to lowercase and capitalize sentences
def create_sentence(P, Q, connective, P_type_bool, Q_type_bool):
    # Joining sentences
    if connective in connectives1:
        stimuli = f"{P} {connective} {Q}"
    elif connective in connectives2:
        stimuli = f"{connective} {P}, {connective} {Q}"
    elif connective in connectives3: # Calls the modal function to insert modal
        stimuli = modals(P, Q, P_type_bool, Q_type_bool, connective)
    else:
        stimuli = f"Either {P}, or {Q}"

    doc = nlp(stimuli)
    # Create a list to hold the transformed sentence
    new_sentence = [doc[0].text.capitalize()]  # First word gets capitalized

    # Iterate through the tokens starting from the second word
    for token in doc[1:]:
        # If the token is a proper noun
        if token.pos_ in {'PROPN'} or token.text == "I":
            new_sentence.append(token.text)
        elif token.is_punct:
            if (token.text == "." and token.i == len(doc) - 1) or token.text != ".":
                new_sentence[-1] += token.text
        else:
            new_sentence.append(token.text.lower())

    # Join the words into a new sentence
    return f"{' '.join(new_sentence)}"

In [None]:
connectives1 = ["and", "if", "so", "therefore", "but", "when", "although", "or"] # easy insert connectives
connectives2 = ["maybe", "perhaps"] # connectives before both sentences
connectives3 = ["might"] # modals inserted within sentences
connectives = connectives1 + connectives2 + connectives3 # all connectives

In [None]:
# Reading statements from CSV and outputting to CSV
csv_path = "exp1_prompts.csv" # replace with correct filename
fields = ["P", "Not P"]
row_order = 1

stimuli_df = pd.read_csv(csv_path, usecols = fields)

with open('exp1_prompts.csv', 'a', newline='') as csvFile:
    writer = csv.writer(csvFile)
    for i in range(100): # Change for the number of control stimuli needed
        P_row = random.randint(0, len(stimuli_df) - 1)
        Q_row = random.randint(0, len(stimuli_df) - 1)
        while P_row == Q_row: # If they are the same
            Q_row = random.randint(0, len(stimuli_df) - 1)

        P_type = random.choice(["P", "Not P"])
        Q_type = random.choice(["P", "Not P"])
        P = stimuli_df.iloc[P_row][P_type]
        Q = stimuli_df.iloc[Q_row][Q_type]
        # Checks to see if sentence is negated
        P_type_bool = P_type == "Not P"
        Q_type_bool = Q_type == "Not P"

        connective = random.choice(connectives)
        sentence = create_sentence(P, Q, connective, P_type_bool, Q_type_bool)
        writer.writerow([sentence, "control", "N/A", "N/A", "0", connective])
csvFile.close()