In [10]:
import spacy
import pandas as pd

# Load spaCy's English model
nlp = spacy.load("en_core_web_sm")

In [11]:
# Function to get the negation of a sentence
def negate_statement(sentence):
    sentence = sentence.replace(".", "")  # Remove the period for processing
    doc = nlp(sentence)
    negated_tokens = []
    negation_inserted = False  # To prevent multiple negations

    for token in doc:
        # Handle frequency adverbs
        if token.tag_ == "RB" and token.text.lower() in {"always", "often", "sometimes"} and not negation_inserted:
            negated_tokens.append("do not")
            negated_tokens.append(token.text)
            negation_inserted = True

        # Special case for "has"
        elif token.text.lower() == "has" and not negation_inserted:
            if token.dep_ == "ROOT":
                negated_tokens.append("does not have")
                negation_inserted = True
            else:
                negated_tokens.append("has not")
                negation_inserted = True
        # Special case for auxiliary verbs
        elif token.text.lower() in {"am", "is", "are", "has", "was"} and not negation_inserted:
            negated_tokens.append(f"{token.text} not")
            negation_inserted = True

        # Handle the root verb
        elif token.pos_ == "VERB" and token.dep_ == "ROOT" and not negation_inserted:
            lemma = token.lemma_  # Lemmatize to base form
            if token.tag_ == "VBD":  # Past tense
                negated_tokens.append(f"did not {lemma}")
            elif token.tag_ == "VBZ":  # Present tense singular
                negated_tokens.append(f"does not {lemma}")
            elif token.tag_ in {"VBP", "VB"}:  # Present tense plural or base form
                negated_tokens.append(f"do not {lemma}")
            negation_inserted = True

        # Ensure auxiliary verbs modifying the root verb are included correctly
        elif token.dep_ in {"aux", "auxpass"} and not negation_inserted:
            negated_tokens.append(f"{token.text} not")
            negation_inserted = True

        # Keep other tokens as they are
        else:
            negated_tokens.append(token.text)

    # Join the tokens into the negated sentence
    negated_sentence = " ".join(negated_tokens)
    return f"{negated_sentence}."



In [12]:
# Function to see if connective is a contradiction connective
def is_contradiction(connective):
    connect_contradict = ["and", "if", "so", "therefore", "but", "when", "although"]
    if connective in connect_contradict:
        return 1
    else:
        return 0

In [13]:
# Function to insert modals into given sentences
def modals(sentence, negation, connective):
    doc_s = nlp(sentence)
    doc_n = nlp(negation)

    # Initialize subject and complement lists
    subject_s, complement_s = [], []
    subject_n, complement_n = [], []

    # Map auxiliary verbs to their modal complements
    aux_modal_map = {
        "have": f"{connective} have",
        "has": f"{connective} have",
        "do": f"{connective} do",
        "does": f"{connective} do",
        "did": f"{connective} do",
        "am": f"{connective} be",
        "is": f"{connective} be",
        "are": f"{connective} be",
        "was": f"{connective} be",
        "were": f"{connective} be",
    }

    aux_modal_neg_map = {
        "have": f"{connective} not have",
        "has": f"{connective} not have",
        "do": f"{connective} not do",
        "does": f"{connective} not do",
        "did": f"{connective} not do",
        "am": f"{connective} not be",
        "is": f"{connective} not be",
        "are": f"{connective} not be",
        "was": f"{connective} not be",
        "were": f"{connective} not be",
    }

    aux_verbs_do = {"do", "does", "did"}

    def process_doc(doc, is_negated):
        subject, complement = [], []
        modal_inserted = False

        for token in doc:
            # Getting the subject
            if "subj" in token.dep_:
                # Collect the main subject and its modifiers
                subject_parts = [token]
                for child in token.children:
                    if child.dep_ in {"det", "poss", "amod", "compound"}:
                        subject_parts.append(child)
                        complement.remove(child.text)

                # Sort modifiers and the main token by their position in the sentence
                subject_parts = sorted(subject_parts, key=lambda x: x.idx)
                subject.append(" ".join([t.text for t in subject_parts]))
            elif token.dep_ == "expl":  # Existential "There"
                subject.append(token.text)
            # Checking next token is an auxiliary verb
            elif token.text.lower() in aux_modal_map and not modal_inserted:
                if is_negated:
                    # Special case for "do" verbs if it is not the root
                    if token.dep_ != "ROOT" and token.text.lower() in aux_verbs_do:
                        modal = f"{connective} not"
                    else:
                        modal = aux_modal_neg_map[token.text.lower()]
                else:
                    modal = aux_modal_map[token.text.lower()]
                complement.append(modal)
                modal_inserted = True
            # Modifying the root verbs
            elif token.dep_ == "ROOT" and not modal_inserted:
                lemma = token.lemma_
                if is_negated:
                    modal = f"{connective} not {lemma}"
                else:
                    modal = f"{connective} {lemma}"
                complement.append(modal)
                modal_inserted = True
            elif token.text.lower() == "not":
                continue  # Skip explicit "not" as it's already handled
            else:
                complement.append(token.text)

        return " ".join(subject), " ".join(complement)

    # Process positive and negated sentences
    subject_s, complement_s = process_doc(doc_s, is_negated=False)
    subject_n, complement_n = process_doc(doc_n, is_negated=True)

    # Return the formatted output
    return f"{subject_s} {complement_s} and {subject_n} {complement_n}"

In [14]:
# Function to lowercase and capitalize sentences
def create_sentence(sentence, negation, connective):
    # Joining sentences
    if connective in connectives1:
        stimuli = f"{sentence} {connective} {negation}"
    elif connective in connectives2:
        stimuli = f"{connective} {sentence}, {connective} {negation}"
    elif connective in connectives3: # Calls the modal function to insert modal
        stimuli = modals(sentence, negation, connective)
    else:
        stimuli = f"Either {sentence}, or {negation}"

    doc = nlp(stimuli)
    # Create a list to hold the transformed sentence
    new_sentence = [doc[0].text.capitalize()]  # First word gets capitalized

    # Iterate through the tokens starting from the second word
    for token in doc[1:]:
        # If the token is a proper noun
        if token.pos_ in {'PROPN'} or token.text == "I":
            new_sentence.append(token.text)
        elif token.is_punct:
            if (token.text == "." and token.i == len(doc) - 1) or token.text != ".":
                new_sentence[-1] += token.text
        else:
            new_sentence.append(token.text.lower())

    # Join the words into a new sentence
    return f"{' '.join(new_sentence)}"

In [15]:
connectives1 = ["and", "if", "so", "therefore", "but", "when", "although", "or"] # easy insert connectives
connectives2 = ["maybe", "perhaps"] # connectives before both sentences
connectives3 = ["might"] # modals inserted within sentences
connectives = connectives1 + connectives2 + connectives3 # all connectives

In [16]:
# Reading statements from CSV and outputting to CSV
csv_path = "exp1_propositions.csv" # replace with correct filename
fields = ["Stimuli"]
row_order = 1

stimuli_df = pd.read_csv(csv_path, usecols = fields)

csv_output = []

for _, row in stimuli_df.iterrows():
    statement = row["Stimuli"]
    for connective in connectives:
        p = statement
        not_p = negate_statement(statement)
        sentence = create_sentence(p, not_p, connective)
        contradiction = is_contradiction(connective)
        csv_output.append({"Sentence": sentence, "Type": "experimental", "P": p, "Not P": not_p, "Contradiction": contradiction, "Connective": connective})

output_df = pd.DataFrame(csv_output)
output_df.to_csv("exp1_prompts.csv", index=False)