In [1]:
import pandas as pd
import random
import torch
from transformers import LlamaForCausalLM, LlamaTokenizer, pipeline

model_name = "huggingface/llama-7b"

try:
    tokenizer = LlamaTokenizer.from_pretrained(model_name)
    model = LlamaForCausalLM.from_pretrained(model_name)
    print("model and tokenizer loaded successfully.")
except Exception as e:
    print(f"An error occurred: {e}")

text_generation_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.float16,
    device_map="auto",
)

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


ModuleNotFoundError: No module named 'transformers'

In [None]:
# CSV path (change accordingly)
csv_path = "lungu_scale_updated.csv" 
prompts = []
fields = ['Instructions', 'Condition', 'Study', 'Type', 'Proposition', 'NAND', 'NOR', 'Positive AND']  # Columns used in CSV
row_order = 1
 
# Reading CSV to dataframe
prompts_df = pd.read_csv(csv_path, usecols=fields)
for _, row in prompts_df.iterrows():
    prompt = {}

    if (row['Condition'] == "Control"):
        text_parts = [row['Instructions'], row['Proposition']]
    
    else:
        # Check if either "NAND" or "NOR" or "AND" exists in the row
        choices = {"NAND":row["NAND"], "NOR":row["NOR"], "Contradictory":row["Positive AND"]}
        available_choices = {key: value for key, value in choices.items() if not pd.isna(value)}

        if available_choices:
            # Randomly choose one from available choices
            chosen_column, content = random.choice(list(available_choices.items()))
        else:
            chosen_column, content = '', ''  # "Control" rows without continuations "A" or "B" or "C"

        text_parts = [row['Instructions'], row['Proposition'], str(content)]
        
    # Filter out empty strings before joining
    prompt["text"] = ' '.join(filter(None, text_parts))
    prompt["condition"] = row['Condition']
    prompt["study"] = row['Study']
    prompt["type"] = row['Type']
    if chosen_column:
        prompt["choice"] = chosen_column
    else:
        prompt["choice"] = "none"
    
    prompt["order"] = row_order
    row_order += 1
    prompts.append(prompt)

random.shuffle(prompts)

print(f"Found {len(prompts)} prompts in the given CSV file.")

In [None]:
outputs = []

# Function to generate responses using the chosen model
def generate_response(prompt_text):
    system_message = "If prompted to respond with a number on a scale, only respond with the whole number. For example, only respond with '6' if you believe it is 6 on the scale. Do not give any explanations. Interpret both sentences together, not separately."
    user_message = prompt_text

    # Combine system and user messages into a single string
    combined_prompt = f"{system_message}\nUser: {user_message}\n"

    inputs = tokenizer(combined_prompt, return_tensors="pt")
    outputs = model.generate(inputs["input_ids"], max_length=50, num_return_sequences=1)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # response = text_generation_pipeline(combined_prompt, max_length=50, num_return_sequences=1)
    return response # response[0]['generated_text']

# Generate responses
for prompt in prompts:
    answer = generate_response(prompt["text"])
    outputs.append(answer)

print("messages generated")


In [None]:
df = pd.DataFrame({"Prompt": [prompt["text"] for prompt in prompts], 
                   "Continuation": [prompt["choice"] for prompt in prompts], 
                   "Condition": [prompt["condition"] for prompt in prompts],
                   "Study": [prompt["study"] for prompt in prompts],
                   "Type": [prompt["type"] for prompt in prompts],
                   "Original order": [prompt["order"] for prompt in prompts],
                   "Response": outputs})

# Save the DataFrame to a CSV file
output_csv_path = "output_responses_llama.csv"
df.to_csv(output_csv_path, index=False)

print(f"Responses saved to {output_csv_path}")