# **Prompting for NLI & Multiple-choice QA**


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import pandas as pd
import numpy as np

In [None]:
# define computational device
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"Device: {device}")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
    print(f"Device: {device}")
else:
    device = torch.device("cpu")
    print(f"Device: {device}")

Device: cpu


In [None]:
#Pythia-410m
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/Pythia-410m")
model = AutoModelForCausalLM.from_pretrained(
    "EleutherAI/Pythia-410m",
    torch_dtype=torch.float16,
).to(device)

#Pythia-1.4b
tokenizer2 = AutoTokenizer.from_pretrained("EleutherAI/Pythia-1.4b")
model2 = AutoModelForCausalLM.from_pretrained(
    "EleutherAI/Pythia-1.4b",
    torch_dtype=torch.float16,
).to(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/396 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/911M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/396 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.93G [00:00<?, ?B/s]

In [None]:
from transformers import pipeline
# few shot prompting

few_shot_prompt = """
Input: A soccer game with multiple males playing. Hypothesis: Some men are playing a sport. Label: entailment
Input: A black race car starts up in front of a crowd of people. Hypothesis: A man is driving a toy car. Label: contradiction
Input: A group of men are playing a sport. Hypothesis: A group of people are playing a sport. Label: neutral
"""
# input_text = "The class is my favourite!"

test_cases = [
    ("A person on a horse jumps over a broken down airplane.", "A person is training his horse for a competition.", "neutral"),
    ("A person on a horse jumps over a broken down airplane.", "A person is outdoors, on a horse.", "entailment"),
    ("Children smiling and waving at camera.", "There are children present.", "entailment"),
    ("A boy is jumping on skateboard in the middle of a red bridge.", "The boy skates down the sidewalk.", "contradiction"),
    ("An older man sits with his orange juice at a small table in a coffee shop while employees in bright colored shirts smile in the background.", "An older man drinks his juice as he waits for his daughter to get off work.", "neutral"),
    ("High fashion ladies wait outside a tram beside a crowd of people in the city.", "The women do not care what clothes they wear.", "contradiction")
]


# Define the pipeline
def get_prediction(premise, hypothesis,model, tokenizer):
    # few_shot_prompt = "Entailment: A train carriage containing controlled nuclear materials was stolen in Cincinnati today."
    # input_text = f"Premise: {premise}\nHypothesis: {hypothesis}\nLabel: "
    # full_prompt = few_shot_prompt + input_text
    input_text = f"Premise: {premise}\nHypothesis: {hypothesis}\nLabel: "
    full_prompt = few_shot_prompt + input_text

    # Define the pipeline with the specific model and tokenizer
    text_generation_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)

    # Generate text
    generated_text = text_generation_pipeline(full_prompt, max_length=250, do_sample=True, temperature=0.4)

    # Extract prediction
    output = generated_text[0]['generated_text']
    prediction = output.split("Label:")[-1].strip().split()[0]
    return prediction

# Evaluate the model on the test cases
correct_predictions = 0
for premise, hypothesis, gold_label in test_cases:
    prediction = get_prediction(premise, hypothesis, model, tokenizer)
    print(f"Premise: {premise}\nHypothesis: {hypothesis}\nPrediction: {prediction}\nGold Label: {gold_label}\n")
    if prediction.lower() == gold_label.lower():
        correct_predictions += 1

accuracy = correct_predictions / len(test_cases)
print(f"Accuracy:Pythia-410m  {accuracy * 100:.2f}%")


correct_predictions = 0
for premise, hypothesis, gold_label in test_cases:
    prediction = get_prediction(premise, hypothesis, model2, tokenizer2)
    print(f"Premise: {premise}\nHypothesis: {hypothesis}\nPrediction: {prediction}\nGold Label: {gold_label}\n")
    if prediction.lower() == gold_label.lower():
        correct_predictions += 1

accuracy = correct_predictions / len(test_cases)
print(f"Accuracy:Pythia-1.4b {accuracy * 100:.2f}%")

Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Premise: A person on a horse jumps over a broken down airplane.
Hypothesis: A person is training his horse for a competition.
Prediction: Input:
Gold Label: neutral



Setting `pad_token_id` to `eos_token_id`:0 for open-end generation.


Premise: A person on a horse jumps over a broken down airplane.
Hypothesis: A person is outdoors, on a horse.
Prediction: Premise:
Gold Label: entailment



IndexError: list index out of range

In [None]:
examples = [
    ("The only baggage the woman checked was a drawstring bag, where was she heading with it?", ["garbage can", "military", "jewelry store", "safe", "airport"], "airport"),
    ("To prevent any glare during the big football game he made sure to clean the dust of his what?", ["television", "attic", "corner", "they cannot clean corner and library during football match they cannot need that", "ground"], "television"),
    ("The president is the leader of what institution?", ["walmart", "white house", "country", "corporation", "government"], "country"),
    ("What kind of driving leads to accidents?", ["stressful", "dangerous", "fun", "illegal", "deadly"], "dangerous"),
    ("Can you name a good reason for attending school?", ["get smart", "boredom", "colds and flu", "taking tests", "spend time"], "get smart"),
    ("Stanley had a dream that was very vivid and scary. He had trouble telling it from what?", ["imagination", "reality", "dreamworker", "nightmare", "awake"], "reality")
]

generator = pipeline("text-generation", model=model, tokenizer=tokenizer,max_length=100)

# Iterate through each question and answer
for question, answers, correct_answer in examples:
    # Generate knowledge statements
    few_shot_prompt = f"We know that {correct_answer}"
    knowledge_statements = generator(few_shot_prompt, max_length=100, num_return_sequences=1)

    knowledge = knowledge_statements[0]['generated_text'].split("We know that ")[-1].strip()
    print("Generated knowledge: ", knowledge)


    # Score each answer to the question based on the knowledge statements
    answer_log_probs = []

    for a in answers:
        prompt = f"{knowledge} {question} {a}"
        context_prompt = f"{knowledge} {question}"
        # preds = generator(prompt, context=context_prompt, return_full_text=False)
        preds = generator(prompt, max_length=250, num_return_sequences=1)
        # For simplicity, we consider log probability as -1 * generated_text
        log_p = -1 * len(preds[0]['generated_text'])
        answer_log_probs.append(log_p)
        print(f"Answer: {a}, Log Probability: {log_p}")

    # Retrieve the answer option with the highest score
    max_prob_idx = np.argmax(answer_log_probs)
    selected_answer = answers[max_prob_idx]
    print(f"Question: {question}")
    print(f"Selected Answer: {selected_answer}, Correct Answer: {correct_answer}\n")