In [19]:
!pip install transformers
!pip install datasets



In [43]:
from transformers import pipeline
import torch
import random
import pandas as pd
from datasets import Dataset, DatasetDict

model_id = "meta-llama/Llama-3.2-1B-Instruct"
# model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"

HF_TOKEN = "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"

pipe = pipeline(
    "text-generation",
    model=model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    token=HF_TOKEN
)

In [41]:
# CSV path (change accordingly)
csv_path = "lungu_stimuli_testing.csv"
prompts = []
fields = ['Instructions', 'Condition', 'Study', 'Type', 'Proposition', 'NAND', 'NOR', 'Positive AND']  # Columns used in CSV
row_order = 1

# Reading CSV to dataframe
prompts_df = pd.read_csv(csv_path, usecols=fields)
for _, row in prompts_df.iterrows():
    prompt = {}

    if (row['Condition'] == "Control"):
        text_parts = [row['Instructions'], row['Proposition']]

    else:
        # Check if either "NAND" or "NOR" or "AND" exists in the row
        choices = {"NAND":row["NAND"], "NOR":row["NOR"], "Contradictory":row["Positive AND"]}
        available_choices = {key: value for key, value in choices.items() if not pd.isna(value)}

        if available_choices:
            # Randomly choose one from available choices
            chosen_column, content = random.choice(list(available_choices.items()))
        else:
            chosen_column, content = '', ''  # "Control" rows without continuations "A" or "B" or "C"

        text_parts = [row['Instructions'], row['Proposition'], str(content)]

    # Filter out empty strings before joining
    prompt["text"] = ' '.join(filter(None, text_parts))
    prompt["condition"] = row['Condition']
    prompt["study"] = row['Study']
    prompt["type"] = row['Type']
    if chosen_column:
        prompt["choice"] = chosen_column
    else:
        prompt["choice"] = "none"

    prompt["order"] = row_order
    row_order += 1
    prompts.append(prompt)

random.shuffle(prompts)

print(f"Found {len(prompts)} prompts in the given CSV file.")

Found 16 prompts in the given CSV file.


In [49]:
outputs = []

dataset = Dataset.from_pandas(pd.DataFrame(prompts))

# Calling Llama and appending responses to outputs list
for prompt in dataset:
  messages=[
        {"role": "system", "content": "Respond with a single value. For example, only respond with '6' if you believe it is 6 on the scale. Do not give any explanations. Interpret both sentences together, not separately."},
        {"role": "user", "content": prompt["text"]},
    ]
  output = pipe(messages, max_new_tokens=256, pad_token_id=128001)
  response = output[0]["generated_text"][-1]["content"]
  outputs.append(response)
  print(prompt["text"])
  print(response)

On a scale of 1 to 7, 1 being very unnatural and 7 being very natural, rate how natural the following sentence(s) is: John saw two performances by this artist. He thinks he is the best opera singer he ever listened to.
6
On a scale of 1 to 7, 1 being very unnatural and 7 being very natural, rate how natural the following sentence(s) is: John doubts that Sue played football or tennis. He thinks she never did any ball sports.
1
On a scale of 1 to 7, 1 being very unnatural and 7 being very natural, rate how natural the following sentence(s) is: My grandmother made these cookies without rum or cinnamon. Instead of those ingredients she added homemade brandy.
1
On a scale of 1 to 7, 1 being very unnatural and 7 being very natural, rate how natural the following sentence(s) is: Kate used to live without coffee or chocolate. I don't remember which.
3
On a scale of 1 to 7, 1 being very unnatural and 7 being very natural, rate how natural the following sentence(s) is: Mary doubts that Paul read

In [27]:
df = pd.DataFrame({"Prompt": [prompt["text"] for prompt in prompts],
                   "Continuation": [prompt["choice"] for prompt in prompts],
                   "Condition": [prompt["condition"] for prompt in prompts],
                   "Study": [prompt["study"] for prompt in prompts],
                   "Type": [prompt["type"] for prompt in prompts],
                   "Original order": [prompt["order"] for prompt in prompts],
                   "Response": outputs})

# Save the DataFrame to a CSV file
output_csv_path = "output_responses_llama_test.csv"
df.to_csv(output_csv_path, index=False)

print(f"Responses saved to {output_csv_path}")

Responses saved to output_responses_llama_test.csv
