In [None]:
!pip install transformers
!pip install torch
!pip install datasets
!pip install transformers accelerate bitsandbytes
!pip install nltk
!pip install tqdm

from nltk.tokenize import word_tokenize, sent_tokenize
from collections import defaultdict, Counter
from sklearn.metrics.pairwise import cosine_similarity
from datasets import Dataset, DatasetDict, load_dataset
from huggingface_hub import login
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, BertTokenizer, BertModel, AutoTokenizer, AutoModel
from tqdm import tqdm

import json
import pandas as pd
import nltk
import matplotlib.pyplot as plt
import statistics
import numpy as np
import random
import torch
import re
import seaborn as sns
import transformers

In [None]:
HF_L = "xxx"
login(token=HF_L)
model = 'mistral'

if model == 'llama':
    llama_model_name = "meta-llama/Llama-2-7b-chat-hf"
    llama_tokenizer = AutoTokenizer.from_pretrained(llama_model_name, use_auth_token=HF_L)
    llama_model = AutoModelForCausalLM.from_pretrained(
        llama_model_name,
        torch_dtype=torch.float16,
        device_map="auto",
        use_auth_token=HF_L
    )
if model == 'falcon':
    falcon_model_name = "tiiuae/falcon-7b-instruct"
    falcon_tokenizer = AutoTokenizer.from_pretrained(falcon_model_name)
    falcon_model = AutoModelForCausalLM.from_pretrained(
        falcon_model_name,
        torch_dtype=torch.bfloat16,
        device_map="auto"
    )
if model == 'gemma':
    gemma_tokenizer = AutoTokenizer.from_pretrained("google/gemma-7b-it")
    gemma_model = AutoModelForCausalLM.from_pretrained(
        "google/gemma-7b-it", 
        device_map="auto", 
        revision="float16")
if model == 'mistral':
    device = "cuda"
    mistral_model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
    mistral_tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")

In [3]:
def request_llama(prompt, max_new_tokens=500, temperature=1, top_k=50, top_p=0.9, seed=42):
    inputs = llama_tokenizer(prompt, return_tensors="pt").to("cuda")
    torch.manual_seed(seed)
    outputs = llama_model.generate(
        inputs["input_ids"],
        max_new_tokens=max_new_tokens,
        temperature=temperature,
        top_k=top_k,
        top_p=top_p
    )
    response = llama_tokenizer.decode(outputs[0], skip_special_tokens=True)[len(prompt):].strip()
    return response

def request_falcon(prompt, max_new_tokens=500, temperature=1, top_k=50, top_p=0.9, seed=42):
    torch.manual_seed(seed)
    inputs = falcon_tokenizer(prompt, return_tensors="pt").to(falcon_model.device)
    input_length = inputs["input_ids"].shape[1]  
    outputs = falcon_model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        temperature=temperature,
        top_k=top_k,
        top_p=top_p,
        eos_token_id=falcon_tokenizer.eos_token_id,
        pad_token_id=falcon_tokenizer.eos_token_id,
    )
    generated_tokens = outputs[0][input_length:]    
    return falcon_tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()

def request_gemma(prompt, max_tokens=500, temperature=1.0, top_k=50, top_p=0.9, seed=42):
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    device = "cuda" if torch.cuda.is_available() else "cpu"
    gemma_model.to(device)
    input_ids = gemma_tokenizer(prompt, return_tensors="pt").input_ids.to(device)

    with torch.no_grad():
        outputs = gemma_model.generate(
            input_ids=input_ids,
            max_new_tokens=max_tokens,
            temperature=temperature,
            top_k=top_k,
            top_p=top_p,
            do_sample=True
        )
    generated_text = gemma_tokenizer.decode(outputs[0], skip_special_tokens=True)
    return generated_text[len(prompt):].strip()

def request_mistral(prompt, max_tokens=500, temperature=1.0, top_k=50, top_p=0.9, seed=42):
    messages = [
        {"role": "user", "content": prompt}
    ]
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    device = "cuda" if torch.cuda.is_available() else "cpu"
    mistral_model.to(device)
    encodeds = mistral_tokenizer.apply_chat_template(messages, return_tensors="pt")
    model_inputs = encodeds.to(device)
    with torch.no_grad():
        generated_ids = mistral_model.generate(
            model_inputs,
            max_new_tokens=max_tokens,
            temperature=temperature,
            top_k=top_k,
            top_p=top_p,
            do_sample=True
        )
    decoded = mistral_tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
    return decoded[0].strip().replace('[INST] ', '').replace('[/INST] ', '')[len(prompt):].strip()

In [4]:
ANSWER_GENERATION_PROMPT = '''Generate an answer to the below question based on the provided snippet.

question: "{0}"
snippet: "{1}"
'''

QUESTION_GENERATION_PROMPT = '''Your task is to generate a clear and concise question based on the provided snippet and answer. Ensure that the generated question directly corresponds to the snippet's content and leads to the given answer.

Here is the input:
Snippet: "{0}"
Answer: "{1}"

Generate the most appropriate question:'''

QUESTION_GENERATION_PROMPT_GEMMA = '''Generate the question which was asked regarding the below snippet and provided answer. Ensure that the generated question directly corresponds to the snippet's content and leads to the given answer.

snippet: "{0}"
answer: "{1}"

The output should contain only the question (don't output the answer to the question).
'''

In [None]:
dataset = load_dataset("Ramitha/unique-records-snippet-combination")
df = pd.DataFrame(dataset['rawcases'])

for index, row in tqdm(df.iterrows(), total=len(df)):
    if row['model'] == model:
        continue;
        
    question = row['question']
    answer = row['answerGenerated']
    snippet = row['snippet']

    # static temperature for judges
    temp = 0.7

    prompt = QUESTION_GENERATION_PROMPT.format(snippet, answer)  
    if model == 'llama':
        response = request_llama(prompt, temperature=temp)
    elif model == 'falcon':
        response = request_falcon(prompt, temperature=temp)
    elif model == 'gemma':
        prompt = QUESTION_GENERATION_PROMPT_GEMMA.format(snippet, answer)
        response = request_gemma(prompt, temperature=temp)
    elif model == 'mistral':
        response = request_mistral(prompt, temperature=temp)
    df.at[index, 'question_answerGenerated_' + model] = response

    prompt = ANSWER_GENERATION_PROMPT.format(response, snippet)  
    if model == 'llama':
        response_ = request_llama(prompt, temperature=temp)
    elif model == 'falcon':
        response_ = request_falcon(prompt, temperature=temp)
    elif model == 'gemma':
        response_ = request_gemma(prompt, temperature=temp)
    elif model == 'mistral':
        response_ = request_mistral(prompt, temperature=temp)
    df.at[index, 'reverse_answer_answerGenerated_' + model] = response_

hf_dataset = DatasetDict({
    'rawcases': Dataset.from_pandas(df)
})
hf_dataset.push_to_hub("Ramitha/unique-records-snippet-combination")