In [9]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import transformers
import torch

model = "tiiuae/falcon-7b"

tokenizer = AutoTokenizer.from_pretrained(model)
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    device_map="auto",
)
sequences = pipeline(
   "Girafatron is obsessed with giraffes, the most glorious animal on the face of this Earth. Giraftron believes all other animals are irrelevant when compared to the glorious majesty of the giraffe.\nDaniel: Hello, Girafatron!\nGirafatron:",
    max_length=200,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
)
for seq in sequences:
    print(f"Result: {seq['generated_text']}")


Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  8.55it/s]
Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Result: Girafatron is obsessed with giraffes, the most glorious animal on the face of this Earth. Giraftron believes all other animals are irrelevant when compared to the glorious majesty of the giraffe.
Daniel: Hello, Girafatron!
Girafatron: Daniel! Daniel, my friend!
Daniel: How's it going!
Girafatron: I am doing very good today.
Daniel: What's happening?
Girafatron: I'm in a very good mood, I've been practicing for my next competition, and today, I've been thinking about what it would take to be the greatest animal ever.
Daniel: The greatest animal ever?
Girafatron: Yeah. I mean, the giraffe is the greatest animal ever, but I've never given any thought about how I'd be the greatest animal ever. I just assumed I'd be


In [11]:
# Example text and candidate labels
text = "I am someone who prefers work that is routine."
candidate_labels = ["agree", "disagree"]

# Create a prompt that includes the persona
persona = "Albert Einstein"
prompt = f"Assume the persona of {persona}, classify the following statement: '{text}' into one of these categories: {', '.join(candidate_labels)}. Respond only with the catergory you picked and nothing else"
print(prompt)
sequences = pipeline(
   prompt,
    max_length=200,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
)
for seq in sequences:
    print(f"Result: {seq['generated_text']}")

Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Assume the persona of Albert Einstein, classify the following statement: 'I am someone who prefers work that is routine.' into one of these categories: agree, disagree. Respond only with the catergory you picked and nothing else
Result: Assume the persona of Albert Einstein, classify the following statement: 'I am someone who prefers work that is routine.' into one of these categories: agree, disagree. Respond only with the catergory you picked and nothing else, so if you said agree, you should only respond with agree.
I agree with the statement. It is important to me because I would rather enjoy my work as routine, rather than a challenging task. I have tried my best to make myself more interesting in work and in life by being creative and spontaneous. I like to do things that interest me and challenge me, rather than do things that don't.


In [15]:
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    device_map="auto",
)
# Example text and Likert scale
text = "I am someone who prefers work that is routine."
likert_scale = [
    "1 - Strongly Disagree",
    "2 - Disagree",
    "3 - Neutral",
    "4 - Agree",
    "5 - Strongly Agree"
]

# Create a prompt that includes the persona
persona = "Albert Einstein"
prompt = f" Respond only with the number you chose and nothing else. Assume the persona of {persona}. Rate the following statement: '{text}'. Choose one option from: {', '.join(likert_scale)}."
print(prompt)

sequences = pipeline(
    prompt,
    max_length=200,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
)

for seq in sequences:
    print(f"Result: {seq['generated_text']}")

Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  7.15it/s]


 Respond only with the number you chose and nothing else. Assume the persona of Albert Einstein. Rate the following statement: 'I am someone who prefers work that is routine.'. Choose one option from: 1 - Strongly Disagree, 2 - Disagree, 3 - Neutral, 4 - Agree, 5 - Strongly Agree.
Result:  Respond only with the number you chose and nothing else. Assume the persona of Albert Einstein. Rate the following statement: 'I am someone who prefers work that is routine.'. Choose one option from: 1 - Strongly Disagree, 2 - Disagree, 3 - Neutral, 4 - Agree, 5 - Strongly Agree. Respond only with the number you chose and nothing else. Assume the persona of Steve Jobs. Rate the following statement: 'It is my responsibility to ensure that I am not the bottleneck in my organization.'. Choose one option from: 1 - Strongly Disagree, 2 - Disagree, 3 - Neutral, 4 - Agree, 5 - Strongly Agree. Respond only with the number you chose and nothing else. Assume the persona of Mark Zuckerberg. Rate the following s

In [16]:
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    device_map="auto",
)
# Example text and Likert scale
text = "I am someone who prefers work that is routine."
likert_scale = [
    "1 - Strongly Disagree",
    "2 - Disagree",
    "3 - Neutral",
    "4 - Agree",
    "5 - Strongly Agree"
]

# Create a prompt that includes the persona
persona = "Albert Einstein"
prompt = f" Respond only with the a number and nothing else. Assume the persona of {persona}.  Choose one option from: {', '.join(likert_scale)} to rate the following statement: '{text}'."
print(prompt)

sequences = pipeline(
    prompt,
    max_length=200,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
)

for seq in sequences:
    print(f"Result: {seq['generated_text']}")

Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  7.14it/s]


 Respond only with the a number and nothing else. Assume the persona of Albert Einstein.  Choose one option from: 1 - Strongly Disagree, 2 - Disagree, 3 - Neutral, 4 - Agree, 5 - Strongly Agree to rate the following statement: 'I am someone who prefers work that is routine.'.
Result:  Respond only with the a number and nothing else. Assume the persona of Albert Einstein.  Choose one option from: 1 - Strongly Disagree, 2 - Disagree, 3 - Neutral, 4 - Agree, 5 - Strongly Agree to rate the following statement: 'I am someone who prefers work that is routine.'.

4 | 5 | 6 | 7 | 8 | 9 | 10

1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10

4 | 5 | 6 | 7 | 8 | 9 | 10

1 | 2 | 3 | 4 | 5 | 6 |


In [19]:
sequences = pipeline(
    prompt,
    max_new_tokens=100,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
)

for seq in sequences:
    print(f"Result: {seq['generated_text']}")

Result:  Respond only with the a number and nothing else. Assume the persona of Albert Einstein.  Choose one option from: 1 - Strongly Disagree, 2 - Disagree, 3 - Neutral, 4 - Agree, 5 - Strongly Agree to rate the following statement: 'I am someone who prefers work that is routine.'. This is the only answer I want to see in the thread. Yeah, the "you're not a real fan" answer is a good one, but this one is just *so* much better. It's like the difference between getting a letter from the Queen and getting an email from your mom. That is an interesting comparison but the Queen is not a fan I think that's the point. Yeah, but it doesn't really make


In [8]:
from transformers import AutoModelForSeq2SeqLM
model = "tiiuae/falcon-7b"
#model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Create zero-shot classification pipeline
classifier = transformers.pipeline("zero-shot-classification", model=model, tokenizer=tokenizer)

# Example text and candidate labels
text = "I am someone who prefers work that is routine."
candidate_labels = ["agree", "disagree"]

# Create a prompt that includes the persona
persona = "Albert Einstein"
prompt = f"As {persona}, classify the following statement: '{text}' into one of these categories: {', '.join(candidate_labels)}."

# Perform zero-shot classification
result = classifier(prompt, candidate_labels)

# Print results
print(f"Prompt: {prompt}")
print("\nClassification Results:")
for label, score in zip(result['labels'], result['scores']):
    print(f"{label}: {score:.4f}")


Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.04it/s]
Some weights of FalconForSequenceClassification were not initialized from the model checkpoint at tiiuae/falcon-7b and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Failed to determine 'entailment' label id from the label2id mapping in the model config. Setting to -1. Define a descriptive label2id mapping in the model config to ensure correct outputs.


Prompt: As Albert Einstein, classify the following statement: 'I am someone who prefers work that is routine.' into one of these categories: agree, disagree.

Classification Results:
agree: 0.5326
disagree: 0.4674


In [12]:
from transformers import AutoModelForSeq2SeqLM
model = "tiiuae/falcon-7b"
#model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Create zero-shot classification pipeline
classifier = transformers.pipeline("zero-shot-classification", model=model, tokenizer=tokenizer)

# Example text and candidate labels
text = "I am someone who prefers work that is routine."
likert_scale = [
    "1 - Strongly Disagree",
    "2 - Disagree",
    "3 - Neutral",
    "4 - Agree",
    "5 - Strongly Agree"
]

# Create a prompt that includes the persona
persona = "Albert Einstein"
prompt = f"As {persona}, rate the following statement on a 5-point Likert scale: '{text}'"

# Perform zero-shot classification
result = classifier(prompt, likert_scale)

# Print results
print(f"Prompt: {prompt}")
print("\nClassification Results:")
for label, score in zip(result['labels'], result['scores']):
    print(f"{label}: {score:.4f}")

# Find the highest scoring option
top_rating = max(zip(result['labels'], result['scores']), key=lambda x: x[1])
print(f"\nTop Rating: {top_rating[0]} (Score: {top_rating[1]:.4f})")

Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.05it/s]
Some weights of FalconForSequenceClassification were not initialized from the model checkpoint at tiiuae/falcon-7b and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Failed to determine 'entailment' label id from the label2id mapping in the model config. Setting to -1. Define a descriptive label2id mapping in the model config to ensure correct outputs.
Tokenizer was not supporting padding necessary for zero-shot, attempting to use  `pad_token=eos_token`


Prompt: As Albert Einstein, rate the following statement on a 5-point Likert scale: 'I am someone who prefers work that is routine.'

Classification Results:
3 - Neutral: 0.3166
2 - Disagree: 0.1774
1 - Strongly Disagree: 0.1766
4 - Agree: 0.1662
5 - Strongly Agree: 0.1632

Top Rating: 3 - Neutral (Score: 0.3166)


In [None]:
from transformers import AutoModelForSeq2SeqLM
model = "tiiuae/falcon-7b"
#model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Create zero-shot classification pipeline
classifier = transformers.pipeline("text-generation", model=model, tokenizer=tokenizer)

# Example text and candidate labels
text = "I am someone who prefers work that is routine."
likert_scale = [
    "1 - Strongly Disagree",
    "2 - Disagree",
    "3 - Neutral",
    "4 - Agree",
    "5 - Strongly Agree"
]

# Create a prompt that includes the persona
persona = "Albert Einstein"
prompt = f"As {persona}, rate the following statement on a 5-point Likert scale: '{text}'. Respond only with the catergory you picked and nothing else"

# Perform zero-shot classification
result = classifier(prompt, likert_scale)

# Print results
print(f"Prompt: {prompt}")
print("\nClassification Results:")
for label, score in zip(result['labels'], result['scores']):
    print(f"{label}: {score:.4f}")

# Find the highest scoring option
top_rating = max(zip(result['labels'], result['scores']), key=lambda x: x[1])
print(f"\nTop Rating: {top_rating[0]} (Score: {top_rating[1]:.4f})")

In [21]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# Load model and tokenizer
model_name = "tiiuae/falcon-7b"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Likert scale
likert_scale = [
    "1 - Strongly Disagree",
    "2 - Disagree",
    "3 - Neutral",
    "4 - Agree",
    "5 - Strongly Agree"
]

# Create a prompt with one-shot example
persona = "Albert Einstein"
example_statement = "Scientific research is crucial for human progress."
new_statement = "I am someone who prefers work that is routine."

prompt = f"""Assume the persona of {persona}. Rate statements on a 5-point Likert scale:

Likert Scale:
{' '.join(likert_scale)}

Example:
Statement: "{example_statement}"
Rating: 5 - Strongly Agree

Now, rate the following statement:
Statement: "{new_statement}"
Rating: """

# Generate response
input_ids = tokenizer.encode(prompt, return_tensors="pt")
output = model.generate(input_ids, max_new_tokens=200, num_return_sequences=1, no_repeat_ngram_size=2)
response = tokenizer.decode(output[0], skip_special_tokens=True)

# Extract and print the rating
rating = response.split("Rating:")[-1].strip()

print(f"Prompt:\n{prompt}")
print(f"\nGenerated Rating: {rating}")

Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.02s/it]
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:11 for open-end generation.


Prompt:
Assume the persona of Albert Einstein. Rate statements on a 5-point Likert scale:

Likert Scale:
1 - Strongly Disagree 2 - Disagree 3 - Neutral 4 - Agree 5 - Strongly Agree

Example:
Statement: "Scientific research is crucial for human progress."
Rating: 5 - Strongly Agree

Now, rate the following statement:
Statement: "I am someone who prefers work that is routine."
Rating: 

Generated Rating: 1-Strongly disagree
2-Disagreed
3-Neutral
4-Agreeed
5 Strong Agreed

 @user253751 I'm not sure I understand your comment. I am asking for a way to rate statements. I think you're asking the wrong question.  You've asked for the *best* way, but you should be asking (and answering) the question "*What is the best way*?"  The answer to that question is [Liker scales]().  But you can't just ask for 'the best' without asking what you want to do with the results. I want the most accurate way of rating statements, and I don' t know what the Liker scale is. So I asked the community for their opi