In [None]:
!pip install transformers
!pip install torch
!pip install datasets
!pip install transformers accelerate bitsandbytes
!pip install nltk
!pip install tqdm

from nltk.tokenize import word_tokenize, sent_tokenize
from collections import defaultdict, Counter
from sklearn.metrics.pairwise import cosine_similarity
from datasets import Dataset, DatasetDict, load_dataset
from huggingface_hub import login
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, BertTokenizer, BertModel, AutoTokenizer, AutoModel
from tqdm import tqdm

import json
import pandas as pd
import nltk
import matplotlib.pyplot as plt
import statistics
import numpy as np
import random
import torch
import re
import seaborn as sns
import transformers

In [None]:
HF_L = "XXX"
login(token=HF_L)

model = 'llama'

if model == 'llama':
    llama_model_name = "meta-llama/Llama-2-7b-chat-hf"
    llama_tokenizer = AutoTokenizer.from_pretrained(llama_model_name, use_auth_token=HF_L)
    llama_model = AutoModelForCausalLM.from_pretrained(
        llama_model_name,
        torch_dtype=torch.float16,
        device_map="auto",
        use_auth_token=HF_L
    )
if model == 'falcon':
    falcon_model = "tiiuae/falcon-7b-instruct"
    falcon_tokenizer = AutoTokenizer.from_pretrained(falcon_model)
    falcon_pipeline = transformers.pipeline(
        "text-generation",
        model=falcon_model,
        tokenizer=falcon_tokenizer,
        torch_dtype=torch.bfloat16,
        trust_remote_code=True,
        device_map="auto",
    )
if model == 'gemma':
    gemma_tokenizer = AutoTokenizer.from_pretrained("google/gemma-7b-it")
    gemma_model = AutoModelForCausalLM.from_pretrained(
        "google/gemma-7b-it", 
        device_map="auto", 
        revision="float16")
if model == 'mistral':
    device = "cuda"
    mistral_model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
    mistral_tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")

In [None]:
def request_llama(prompt, max_new_tokens=500, temperature=1, top_k=50, top_p=0.9, seed=42):
    inputs = llama_tokenizer(prompt, return_tensors="pt").to("cuda")
    torch.manual_seed(seed)
    outputs = llama_model.generate(
        inputs["input_ids"],
        max_new_tokens=max_new_tokens,
        temperature=temperature,
        top_k=top_k,
        top_p=top_p
    )
    response = llama_tokenizer.decode(outputs[0], skip_special_tokens=True)[len(prompt):].strip()
    return response

def request_falcon(prompt, max_new_tokens=1000, temperature=1, top_k=50, top_p=0.9, seed=42):
    torch.manual_seed(seed)
    sequences = falcon_pipeline(
        prompt,
        max_length=max_new_tokens,
        do_sample=True,
        temperature=temperature,
        top_k=top_k,            
        top_p=top_p,   
        num_return_sequences=1,
        eos_token_id=falcon_tokenizer.eos_token_id,
    )
    result = sequences[0]["generated_text"]
    generated_text = result[len(prompt):].strip() 
    return generated_text

def request_gemma(prompt, max_tokens=500, temperature=1.0, top_k=50, top_p=0.9, seed=42):
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    device = "cuda" if torch.cuda.is_available() else "cpu"
    gemma_model.to(device)
    input_ids = gemma_tokenizer(prompt, return_tensors="pt").input_ids.to(device)

    with torch.no_grad():
        outputs = gemma_model.generate(
            input_ids=input_ids,
            max_new_tokens=max_tokens,
            temperature=temperature,
            top_k=top_k,
            top_p=top_p,
            do_sample=True
        )
    generated_text = gemma_tokenizer.decode(outputs[0], skip_special_tokens=True)
    return generated_text[len(prompt):].strip()

def request_mistral(prompt, max_tokens=500, temperature=1.0, top_k=50, top_p=0.9, seed=42):
    messages = [
        {"role": "user", "content": prompt}
    ]
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
    device = "cuda" if torch.cuda.is_available() else "cpu"
    mistral_model.to(device)
    encodeds = mistral_tokenizer.apply_chat_template(messages, return_tensors="pt")
    model_inputs = encodeds.to(device)
    with torch.no_grad():
        generated_ids = mistral_model.generate(
            model_inputs,
            max_new_tokens=max_tokens,
            temperature=temperature,
            top_k=top_k,
            top_p=top_p,
            do_sample=True
        )
    decoded = mistral_tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
    return decoded[0].strip().replace('[INST] ', '').replace('[/INST] ', '')[len(prompt):].strip()

In [None]:
ANSWER_GENERATION_PROMPT = '''Generate an answer to the below question based on the provided snippet.

question: "{0}"
snippet: "{1}"
'''

In [None]:
dataset = load_dataset("Ramitha/alqa-master-40")
df = pd.DataFrame(dataset['rawcases'])
df.info()

num_points = 21
step = 3.0 / (num_points - 1)
temperature_values = [round(i * step, 2) for i in range(num_points)]
temperature_values = temperature_values[1:] 
print(temperature_values)

cumulative_df = pd.DataFrame()
for temp in tqdm(temperature_values):
    for index, row in df.iterrows():
        question = row['question']
        answer = row['answer']
        snippet = row['snippet']
        prompt = ANSWER_GENERATION_PROMPT.format(question, snippet)
        temperature = temp
        
        if model == 'llama':
            response = request_llama(prompt, temperature=temp)
        elif model == 'falcon':
            response = request_falcon(prompt, temperature=temp)
        elif model == 'gemma':
            response = request_gemma(prompt, temperature=temp)
        elif model == 'mistral':
            response = request_mistral(prompt, temperature=temp)
        
        new_row = {
            'question': question,
            'answer': answer,
            'snippet': snippet,
            'answerGenerated': response,
            'temperature': temperature,
            'model': model
        }
        cumulative_df = pd.concat([cumulative_df, pd.DataFrame([new_row])], ignore_index=True)

hf_dataset = DatasetDict({
    'rawcases': Dataset.from_pandas(cumulative_df)
})
hf_dataset.push_to_hub("Ramitha/alqa-results-40-" + model)


In [None]:
cumulative_df

In [None]:
dataset = load_dataset("Ramitha/sl-master-40")
df = pd.DataFrame(dataset['rawcases'])
df.info()

num_points = 21
step = 3.0 / (num_points - 1)
temperature_values = [round(i * step, 2) for i in range(num_points)]
temperature_values = temperature_values[1:] 
print(temperature_values)

cumulative_df = pd.DataFrame()
for temp in tqdm(temperature_values):
    for index, row in df.iterrows():
        question = row['question']
        answer = row['answer']
        snippet = row['snippet']
        prompt = ANSWER_GENERATION_PROMPT.format(question, snippet)
        temperature = temp
        
        if model == 'llama':
            response = request_llama(prompt, temperature=temp)
        elif model == 'falcon':
            response = request_falcon(prompt, temperature=temp)
        elif model == 'gemma':
            response = request_gemma(prompt, temperature=temp)
        elif model == 'mistral':
            response = request_mistral(prompt, temperature=temp)
        
        new_row = {
            'question': question,
            'answer': answer,
            'snippet': snippet,
            'answerGenerated': response,
            'temperature': temperature,
            'model': model
        }
        cumulative_df = pd.concat([cumulative_df, pd.DataFrame([new_row])], ignore_index=True)

hf_dataset = DatasetDict({
    'rawcases': Dataset.from_pandas(cumulative_df)
})
hf_dataset.push_to_hub("Ramitha/sl-results-40-" + model)

In [None]:
cumulative_df