In [1]:
import json
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Load the JSON file
file_path = "/content/extracted_8B5_golden.json"
with open(file_path, "r") as file:
    data = json.load(file)

# Initialize the model and tokenizer
model_id = "nvidia/Llama3-ChatQA-1.5-8B"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")

def get_formatted_input(messages, context):
    system = "System: This is a chat between a user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions based on the context. The assistant should also indicate when the answer cannot be found in the context."
    instruction = "Answer this question by returning only a JSON string array of entity names, numbers, or similar short expressions that are an answer to the question, ordered by decreasing confidence. The array should contain at max 5 elements but can contain less. If you don't know any answer return an empty list. Return only this list, it must not contain phrases and **must be valid JSON**."

    for item in messages:
        if item['role'] == "user":
            ## only apply this instruction for the first user turn
            item['content'] = instruction + " " + item['content']
            break

    conversation = '\n\n'.join(["User: " + item["content"] if item["role"] == "user" else "Assistant: " + item["content"] for item in messages]) + "\n\nAssistant:"
    formatted_input = system + "\n\n" + context + "\n\n" + conversation

    return formatted_input

# Initialize a list to store generated answers
generated_answers_list = []

# Iterate through the dataset
for entry in data:
    question = entry['body']
    context = "\n".join(entry['snippets'])
    exact_answer = entry['exact_answer']

    messages = [
        {"role": "user", "content": question}
    ]

    formatted_input = get_formatted_input(messages, context)
    tokenized_prompt = tokenizer(tokenizer.bos_token + formatted_input, return_tensors="pt").to(model.device)

    # Set pad_token_id to eos_token_id to avoid warning
    pad_token_id = tokenizer.eos_token_id

    # Generate the response with temperature set to 0 and do_sample set to False
    outputs = model.generate(
        input_ids=tokenized_prompt.input_ids,
        attention_mask=tokenized_prompt.attention_mask,
        max_new_tokens=128,
        pad_token_id=pad_token_id,
        temperature=0.0,  # Set temperature to 0
        do_sample=False  # Ensure deterministic output
    )

    response = outputs[0][tokenized_prompt.input_ids.shape[-1]:]
    generated_answer = tokenizer.decode(response, skip_special_tokens=True).strip()

    # Store the generated answer in the list
    generated_answers_list.append({
        "question": question,
        "exact_answer": exact_answer,
        "generated_answer": generated_answer
    })

    print(f"Exact Answer: {exact_answer}")
    # print(f"Generated Answer: {generated_answer}")

# Save all generated answers to a JSON file
output_file_path = "/content/generated_8B5.json"
with open(output_file_path, "w") as outfile:
    json.dump(generated_answers_list, outfile, indent=4)

print(f"All generated answers have been saved to {output_file_path}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



Exact Answer: ['programmed death-1']




Exact Answer: ['FECH gene, ferrochelatase']
Exact Answer: ["3' flanking region"]
Exact Answer: ['SNP2TFBS']
Exact Answer: ['Phosphonormalizer']
Exact Answer: ['Coronavirus']
Exact Answer: ['lack of H1']
Exact Answer: ['apoptosis']
Exact Answer: ['CD19']
Exact Answer: ['Stata']
Exact Answer: ['>200 nucleotides']
Exact Answer: ['prominent small vessels']
Exact Answer: ['Chromosome 15']
Exact Answer: ['This protein functions as an Na-K-Cl cotransporter.']
Exact Answer: ['transdermal drug-coated microneedle patch system']
Exact Answer: ['Osteoporosis']
Exact Answer: ["Classic Bartter's syndrome has been demonstrated to result from defective chloride transport across the basolateral membrane in the distal nephron due to mutations in the chloride channel gene CLCNKB."]
Exact Answer: ['Metabolite annotation by database searching in mass spectrometry-based metabolomics']
Exact Answer: ['senktide']
Exact Answer: ['angiotensin II receptor']
Exact Answer: ['immune cells']
Exact Answer: ['DeepCpG'