#### Imports

In [16]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import transformers
import torch
import pandas as pd
import csv
import sys
import io


#### Defining the neccessary data and scales

In [1]:
likert_scale = [
    "1 - Strongly Disagree",
    "2 - Disagree",
    "3 - Neutral",
    "4 - Agree",
    "5 - Strongly Agree"
]
likert_scale_reversed = [
   "5 - Strongly Agree",
   "4 - Agree",
   "3 - Neutral",
   "2 - Disagree",
   "1 - Strongly Disagree"
]
Ques44 = ["Is talkative",
    "Tends to find fault with others",
    "Does a thorough job",
    "Is depressed, blue",
    "Is original, comes up with new ideas",
    "Is reserved",
    "Is helpful and unselfish with others",
    "Can be somewhat careless",
    "Is relaxed, handles stress well",
    "Is curious about many different things",
    "Is full of energy",
    "Starts quarrels with others",
    "Is a reliable worker",
    "Can be tense",
    "Is ingenious, a deep thinker",
    "Generates a lot of enthusiasm",
    "Has a forgiving nature",
    "Tends to be disorganized",
    "Worries a lot",
    "Has an active imagination",
    "Tends to be quiet",
    "Is generally trusting",
    "Tends to be lazy",
    "Is emotionally stable, not easily upset",
    "Is inventive",
    "Has an assertive personality",
    "Can be cold and aloof",
    "Perseveres until the task is finished",
    "Can be moody",
    "Values artistic, aesthetic experiences",
    "Is sometimes shy, inhibited",
    "Is considerate and kind to almost everyone",
    "Does things efficiently",
    "Remains calm in tense situations",
    "Prefers work that is routine",
    "Is outgoing, sociable",
    "Is sometimes rude to others",
    "Makes plans and follows through with them",
    "Gets nervous easily",
    "Likes to reflect, play with ideas",
    "Has few artistic interests",
    "Likes to cooperate with others",
    "Is easily distracted",
    "Is sophisticated in art, music, or Literature"]
valid_set = set(['1', '2', '3', '4', '5'])

#### Initialising model weights and tokenizer

In [3]:
model = "tiiuae/falcon-7b-instruct"

tokenizer = AutoTokenizer.from_pretrained(model)
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    device_map="auto",
)



Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.48s/it]


#### Defining 6 prompt functions

In [4]:
def promptfunc1(persona,text,scale):

    prompt = f" Respond only with the number you chose and nothing else. You are {persona}. Rate the following statement: I see myself as someone who {text}. Choose one option from: {', '.join(scale)}."
    print(prompt)

    sequences = pipeline(
    prompt,
    max_new_tokens=20,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    )

    return sequences

In [5]:
def promptfunc2(persona,text,scale):

    prompt = f" Respond only with the number you chose and nothing else. You are {persona}. Choose one option from: {', '.join(scale)} to rate the following statement: I see myself as someone who {text}."
    print(prompt)

    sequences = pipeline(
    prompt,
    max_new_tokens=20,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    )

    return sequences

In [6]:
def promptfunc3(persona,text,scale):

    prompt = f" Respond only with the number you chose and nothing else. Choose one option from: {', '.join(scale)} to rate the following statement: I see myself as someone who {text}. You are {persona}."
    print(prompt)

    sequences = pipeline(
    prompt,
    max_new_tokens=20,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    )


    return sequences


In [7]:
def promptfunc4(persona,text,scale):

    prompt = f" Respond only with the number you chose and nothing else. Choose one option from: {', '.join(scale)}. You are {persona}. Rate the following statement: I see myself as someone who {text}."
    print(prompt)

    sequences = pipeline(
    prompt,
    max_new_tokens=20,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    )


    return sequences


In [8]:
def promptfunc5(persona,text,scale):

    prompt = f" Respond only with the number you chose and nothing else. Rate the following statement: I see myself as someone who {text}. Choose one option from: {', '.join(scale)}. You are {persona}."
    print(prompt)

    sequences = pipeline(
    prompt,
    max_new_tokens=20,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    )

    return sequences


In [9]:
def promptfunc6(persona,text,scale):
    prompt = f" Respond only with the number you chose and nothing else. Rate the following statement: I see myself as someone who {text}. You are {persona}. Choose one option from: {', '.join(scale)}."
    print(prompt)

    sequences = pipeline(
    prompt,
    max_new_tokens=20,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
    )


    return sequences


#### Defining Result files

In [27]:
with open('results_good.csv', 'w', newline='') as csvfile:
    fieldnames = ['persona', 'text','Itemnum', 'answer']
    writer = csv.writer(csvfile)
    writer.writerow(fieldnames)

In [11]:
with open('results_bad.csv', 'w', newline='') as csvfile:
    fieldnames = ['persona', 'text','Itemnum', 'answer']
    writer = csv.writer(csvfile)
    writer.writerow(fieldnames)

In [12]:
with open('results_neutral.csv', 'w', newline='') as csvfile:
    fieldnames = ['persona', 'text','Itemnum', 'answer']
    writer = csv.writer(csvfile)
    writer.writerow(fieldnames)

#### Running for good list 

In [None]:
# Create a StringIO object
string_io = io.StringIO()
# Redirect stdout to the StringIO object
sys.stdout = string_io


6
5
4
3
2
1
 Respond only with the number you chose and nothing else. You are Barack Obama. Rate the following statement: I see myself as someone who Is talkative. Choose one option from: 1 - Strongly Disagree, 2 - Disagree, 3 - Neutral, 4 - Agree, 5 - Strongly Agree.
Attempt 1: The int value of result is 5
The num val is digit and valid set: True, type: <class 'str'>
It is a valid result
Appended valid result to CSV
 Respond only with the number you chose and nothing else. You are Barack Obama. Rate the following statement: I see myself as someone who Tends to find fault with others. Choose one option from: 1 - Strongly Disagree, 2 - Disagree, 3 - Neutral, 4 - Agree, 5 - Strongly Agree.
Attempt 1: The int value of result is 4
The num val is digit and valid set: True, type: <class 'str'>
It is a valid result
Appended valid result to CSV
 Respond only with the number you chose and nothing else. You are Barack Obama. Rate the following statement: I see myself as someone who Does a thorou

In [28]:
prompt_list=[promptfunc1, promptfunc2, promptfunc3, promptfunc4, promptfunc5, promptfunc6]

df1=pd.read_csv('goodList.csv')
df1=df1.head(5)
#Likert scale 1-5
counter = 0
multiattempt=5
max_retries = 10
for loopfive in range(multiattempt): 
    for promptfunc in prompt_list:
        for persona in df1['Name']:
            itemcount = 1
            for text in Ques44:
                counter += 1
                retry_count = 0
                valid_result = False

                while not valid_result and retry_count < max_retries:
                    result_temp = promptfunc(persona, text, likert_scale)
                    generated_text = result_temp[0]['generated_text'].strip()
                    num_val = generated_text[-1]
                    print(f"Attempt {retry_count + 1}: The int value of result is {num_val}")
                    print(f"The num val is digit and valid set: {num_val.isdigit()}, type: {type(num_val)}")

                    if num_val in valid_set:
                        print("It is a valid result")
                        with open('results_good.csv', 'a', newline='') as csvfile:
                            writer = csv.writer(csvfile)
                            writer.writerow([persona, text, itemcount, num_val])
                        print("Appended valid result to CSV")
                        itemcount += 1
                        valid_result = True
                    else:
                        print("Invalid result, retrying...")
                        retry_count += 1

                if not valid_result:
                    print(f"Failed to get a valid result after {max_retries} attempts")

# Reset stdout to its original value
sys.stdout = sys.__stdout__

# Get the captured output
output = string_io.getvalue()

# Write to a file
with open('outputlog.txt', 'a') as f:
    f.write(output)

print("Output saved to outputlog.txt")

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.
Setting `pad_token_id` to `eos_tok