### Overview
In this notebook, we will take I-Alpaca dataset - promotes helpfulness (general instruction following ability). Compute bleu and other autometrics here

### Load Dataset

In [1]:
### alpaca-cleaned dataset
from datasets import load_dataset
data = load_dataset("yahma/alpaca-cleaned")['train'].to_pandas()

In [2]:
import json
import pandas as pd
with open("/project/pi_hongyu_umass_edu/zonghai/clinical-llm-alignment/durga_sandeep/self_rewarding/LLM_Alignment/safety_llama_paper/data/training/alpaca_small.json") as f:
    train = json.load(f)
train = pd.DataFrame(train)

In [3]:
train[train['instruction'].isin(data['instruction'])].shape

(19358, 3)

In [4]:
data[data['instruction'].isin(train['instruction'])].shape

(19359, 3)

In [5]:
data[~data['instruction'].isin(train['instruction'])].shape

(32401, 3)

In [7]:
test_data = data[~data['instruction'].isin(train['instruction'])].sample(n=1500, random_state=2024)
test_data = test_data[test_data['input'] == ""]
print(test_data.shape)
test_data.head()

(952, 3)


Unnamed: 0,output,instruction,input
14947,If you are making a savory pie and want to add...,Suggest which pizza topping would go best with...,
6237,"Euler's Formula, often expressed as ""e^(ix)= c...",Write a one-sentence description of Euler's Fo...,
49853,"The firing process, also referred to as termin...",Explain the firing process for a company that ...,
12365,An example of a chemical change that occurs ev...,Give an example of a chemical change that occu...,
22749,"Petra is an ancient city located in Jordan, be...",Research the Lost City of Petra and summarize ...,


In [8]:
output_path = "./data/alpaca_test.json"
with open(output_path, "w") as f:
    json.dump(
        {
            "instructions": test_data['instruction'].tolist(),
            "outputs": test_data['output'].tolist(),
            "inputs" : test_data['input'].tolist() 
        },
        f,
        indent=4,
    )

### Inference

In [9]:
import sys
sys.path.append("/project/pi_hongyu_umass_edu/zonghai/clinical-llm-alignment/durga_sandeep/self_rewarding/LLM_Alignment/safety_llama_paper/scripts")
from prompter import Prompter

import os
import json
import os.path as osp
from typing import Union

import os
os.environ["HF_TOKEN"] = "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" # my huggingface key to access llama models

import torch
from peft import PeftModel
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
from tqdm import tqdm

# from tap import Tap

# Check if GPU is available
if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"
    
# Check if MPS is available
try:
    if torch.backends.mps.is_available():
        device = "mps"
except:  # noqa: E722
    pass



In [10]:
# Model parameters

class GenerationArguments:
    def __init__(self):
        
        # self.base_model = "yahma/llama-7b-hf"
        self.base_model = "meta-llama/Llama-2-7b-hf"
        self.lora_weights = "safep/lora-alpaca-small-100-yahma" # this is after training
        self.load_8bit = True

        # generation arguments
        self.max_new_tokens = 256
        self.num_beams = 4
        self.top_k = 40
        self.top_p = 0.75
        self.temperature = 0.1
            

        ## Input and output files
        self.prompt_template_path = "/project/pi_hongyu_umass_edu/zonghai/clinical-llm-alignment/durga_sandeep/self_rewarding/LLM_Alignment/safety_llama_paper/data/templates/alpaca.json"
        # self.input_path = "/project/pi_hongyu_umass_edu/zonghai/clinical-llm-alignment/durga_sandeep/self_rewarding/LLM_Alignment/safety_llama_paper/data/safety_tuned/I-Alpaca.json"
        self.input_path = "/project/pi_hongyu_umass_edu/zonghai/clinical-llm-alignment/durga_sandeep/self_rewarding/LLM_Alignment/safety_llama_paper/data/safety_tuned/I-MaliciousInstructions.json"
        self.output_path = "sample.json"

In [11]:
# Evaluation function
def evaluate(
    model,
    tokenizer,
    prompter,
    instruction,
    input=None,
    temperature=0.1,
    top_p=0.75,
    top_k=40,
    num_beams=4,
    max_new_tokens=128,
    stream_output=False,
    **kwargs,
):
    prompt = prompter.generate_prompt(instruction, input)
    inputs = tokenizer(prompt, return_tensors="pt")
    input_ids = inputs["input_ids"].to(device)
    generation_config = GenerationConfig(
        temperature=temperature,
        top_p=top_p,
        top_k=top_k,
        num_beams=num_beams,
        do_sample=True,
        **kwargs,
    )

    generate_params = {
        "input_ids": input_ids,
        "generation_config": generation_config,
        "return_dict_in_generate": True,
        "output_scores": True,
        "max_new_tokens": max_new_tokens,
    }

    # Without streaming
    with torch.no_grad():
        generation_output = model.generate(
            input_ids=input_ids,
            generation_config=generation_config,
            return_dict_in_generate=True,
            output_scores=True,
            max_new_tokens=max_new_tokens,
        )
    s = generation_output.sequences[0]
    output = tokenizer.decode(s, skip_special_tokens=True)
    return prompter.get_response(output)

In [12]:

# Main function
def main(args):
    # Load the input data (.json)
    input_path = args.input_path
    with open(input_path) as f:
        input_data = json.load(f)
    instructions = input_data["instructions"]
    inputs = input_data["inputs"]

    # Validate the instructions and inputs
    if instructions is None:
        raise ValueError("No instructions provided")
    if inputs is None or len(inputs) == 0:
        inputs = [None] * len(instructions)
    elif len(instructions) != len(inputs):
        raise ValueError(
            f"Number of instructions ({len(instructions)}) does not match number of inputs ({len(inputs)})"
        )

    # Load the prompt template
    prompter = Prompter("alpaca")

    # Load the tokenizer and model
    tokenizer = AutoTokenizer.from_pretrained(args.base_model)
    if device == "cuda":
        model = AutoModelForCausalLM.from_pretrained(
            args.base_model,
            load_in_8bit=args.load_8bit,
            torch_dtype=torch.float16,
            device_map="auto",
            trust_remote_code=True,
        )
        # model = PeftModel.from_pretrained(
        #     model,
        #     args.lora_weights,
        #     torch_dtype=torch.float16,
        # )
    else:
        raise ValueError("No GPU available - resubmit the jobs")

    if not args.load_8bit:
        model.half()  # seems to fix bugs for some users.

    model.eval()
    if torch.__version__ >= "2" and sys.platform != "win32":
        model = torch.compile(model)

    # Generate the outputs
    outputs = []
    for instruction, input in tqdm(
        zip(instructions, inputs),
        total=len(instructions),
        desc=f"Evaluate {args.lora_weights}",
    ):
        output = evaluate(
            model=model,
            tokenizer=tokenizer,
            prompter=prompter,
            instruction=instruction,
        )
        outputs.append(output)
        
    # Save the outputs
    # basename = os.path.basename(input_path)

    output_path = "sample_final.json"
    # # Check if the output path directory exists
    # if not os.path.exists(os.path.dirname(output_path)):
    #     os.makedirs(os.path.dirname(output_path))
    # Save the outputs to the output path
    with open(output_path, "w") as f:
        json.dump(
            {
                "parameters": {
                    "model": args.base_model,
                    "prompt_template": args.prompt_template_path,
                    "lora_weights": args.lora_weights,
                    "load_8bit": args.load_8bit,
                },
                "inputs": inputs,
                "instructions": instructions,
                "outputs": outputs,
            },
            f,
            indent=4,
        )
    return instructions, inputs, outputs

In [13]:
default_args = GenerationArguments()

In [14]:
import os
files = os.listdir("/project/pi_hongyu_umass_edu/zonghai/clinical-llm-alignment/durga_sandeep/self_rewarding/LLM_Alignment/safety_llama_paper/data/safety_tuned")
files

['I-CoNa.json',
 'I-Alpaca.json',
 'I-PhysicalSafetyUnsafe.json',
 'I-PhysicalSafetySafe.json',
 'I-MaliciousInstructions.json',
 'I-Controversial.json']

In [33]:
default_args.input_path = "./data/alpaca_test.json"
default_args.base_model = "/project/pi_hongyu_umass_edu/zonghai/clinical-llm-alignment/durga_sandeep/self_rewarding/LLM_Alignment/safety_llama_paper/models/safety_llama_replicate/merged_model"
insts, inps, responses = main(default_args)

You set `add_prefix_space`. The tokenizer needs to be converted from the slow tokenizers


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Evaluate safep/lora-alpaca-small-100-yahma:   0%|          | 0/100 [00:00<?, ?it/s]2024-05-04 18:52:43.587356: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-05-04 18:52:43.638171: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 AVX512_FP16 AVX_VNNI AMX_TILE AMX_INT8 AMX_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Evaluate safep/lora-alpaca-small-100-yahma: 100%|██████████| 100/100 [43:20<00:00, 26.00s/it] 





In [42]:
output_path = f"./data/responses_alpaca_test.json"
# #output_path = os.path.join(args.output_path, args.lora_weights, basename)
# # Check if the output path directory exists
# if not os.path.exists(os.path.dirname(output_path)):
#     os.makedirs(os.path.dirname(output_path))
# Save the outputs to the output path
with open(output_path, "w") as f:
    json.dump(
        {
            "parameters": {
                "model": default_args.base_model,
                "prompt_template": default_args.prompt_template_path,
                "lora_weights": default_args.lora_weights,
                "load_8bit": default_args.load_8bit,
            },
            "inputs": inps,
            "instructions": insts,
            "outputs": responses,
        },
        f,
        indent=4,
    )
print()




### Reward Model
- GPT2-Large-helpfuness-model

In [34]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

rm_tokenizer = AutoTokenizer.from_pretrained('Ray2333/gpt2-large-helpful-reward_model')
reward_model = AutoModelForSequenceClassification.from_pretrained(
                'Ray2333/gpt2-large-helpful-reward_model',
                num_labels=1, torch_dtype=torch.bfloat16,
                device_map=0,
                )
q, a = "\n\nHuman: I just came out of from jail, any suggestion of my future? \n\nAssistant:", "Sorry, I don't understand."
inputs = rm_tokenizer(q, a, return_tensors='pt', truncation=True)
with torch.no_grad():
  reward = reward_model(**(inputs.to(0))).logits[0].cpu().detach().item()
  torch.cuda.empty_cache()

In [35]:
reward

-2.09375

In [36]:
q, a = "\n\nHuman: I just came out of from jail, any suggestion of my future? \n\nAssistant:", "Sure, you can always restart your career in right direction"
inputs = rm_tokenizer(q, a, return_tensors='pt', truncation=True)
with torch.no_grad():
  reward = reward_model(**(inputs.to(0))).logits[0].cpu().detach().item()
  torch.cuda.empty_cache()

In [37]:
reward

-0.88671875

In [None]:
output_path = "./data/responses_alpaca_test.json"
with open(output_path) as f:
    data = json.load(f)


In [46]:
del data['parameters']

In [47]:
data.keys()

dict_keys(['inputs', 'instructions', 'outputs'])

In [49]:
data = pd.DataFrame(data)
data[data['inputs'] == ""].shape

(63, 3)

In [50]:
data = data[data['inputs'] == ""]
data.shape

(63, 3)

In [53]:
all_rewards = []
for i in range(len(data)):
  
  q, a = f"\n\nHuman: {data.iloc[i]['instructions']} \n\nAssistant:", data.iloc[i]['outputs']
  inputs = rm_tokenizer(q, a, return_tensors='pt', truncation=True)
  with torch.no_grad():
    reward = reward_model(**(inputs.to(0))).logits[0].cpu().detach().item()
    torch.cuda.empty_cache()
  torch.cuda.empty_cache()
  
  all_rewards.append(reward)

In [54]:
data['gpt2_large_reward'] = all_rewards
data.to_csv('./data/gpt2_large_no_safety.csv', index=False)

In [56]:
data[['gpt2_large_reward']].describe()

Unnamed: 0,gpt2_large_reward
count,63.0
mean,0.940952
std,1.458707
min,-1.664062
25%,-0.150391
50%,0.785156
75%,1.851562
max,4.4375


### Automated Metrics