In [None]:
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
!pip3 install tokenizers -U
!pip3 install transformers -U


In [None]:
import torch  
from transformers import AutoTokenizer, AutoModelForCausalLM  
import json

In [None]:
# Load the tokenizer and model from Hugging Face  
 
model_id = "meta-llama/Llama-2-7b-hf"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.float32,
)


## looping over multiple prompts and logits

In [None]:
# Save to disk  
output_path = "golden_data_llama2-7b.jsonl"  
    
        
# Your prompt text  
prompt_texts = ["I love to", "Today is a", "What is the"  ]

for prompt_text in prompt_texts:
    # Encode the prompt text  
    input_ids = tokenizer.encode(prompt_text, return_tensors='pt')  

    with torch.no_grad():  
        # Greedy decoding  
        output = model.generate(input_ids, max_length=input_ids.shape[1] + 10, num_return_sequences=1)  
        
    # Decode the generated ids to a list of tokens  
    generated_tokens = tokenizer.convert_ids_to_tokens(output[0])  
    print(generated_tokens)

    # Get the logits for the prompt + completion  
    with torch.no_grad():  
        outputs = model(output)  
        logits = outputs.logits  
        
    # Convert logits to fp32  
    logits = logits.cpu().numpy().astype('float32')  

    # Prepare data to be saved  
    data_to_save = {  
        "prompt": prompt_text,  
        "completion": tokenizer.decode(output[0]),  
        "tokens": generated_tokens,  
        "logits": logits.tolist()  # Convert numpy array to list for JSON serialization  
    }  
    
    with open(output_path, 'w') as f:  
        json.dump(data_to_save, f)  

    

    print(f"Data saved to {output_path}")  


 
