## Reward score at each step

In [3]:
import json
import numpy as np

import os

def save_json(data, file_path):
    os.makedirs(os.path.dirname(file_path), exist_ok=True)
    with open(file_path, 'w', encoding='utf-8') as file:
        json.dump(data, file, ensure_ascii=False, indent=4)


def load_records(record_path):
    print(f'loading records for file:{record_path}')
    with open(record_path,'r') as f:
        records = json.load(f)
    return records

def deserialize(records,step):
    reward = []
    ref_reward = []
    model_output = []
    reference_output = []
    for record in records:
        reward.append(record["reward"])
        ref_reward.append(record["ref_reward"])

        model_output.append({'instruction':record["query"],'output':record["response"]})
        reference_output.append({'instruction':record["query"],'output':record["ref_response"]})

    # save_json(model_output,f'{root}/clean_data/output_{step}.json')
    # save_json(model_output,f'{root}/clean_data/reference_{step}.json')


    return reward, ref_reward

root = "output/rl/LLama-2-7b-oasst-baseline_rl_bs16_kl_001_clip_04_512_max_token_with_pad_eos_lr_141e5/eval_output"
# root = "output/rl/LLama-2-7b-oasst-baseline_rl_bs16_kl_015_clip_035_512_max_token_with_pad_eos_lr_741e5/eval_output"
# root = "output/rl/LLama-2-7b-oasst-baseline_rl_bs16_kl_001_clip_04_512_max_token_with_pad_eos_lr_141e5_10p_data_rank/eval_output"
root = "output/rl/LLama-2-7b-oasst-baseline_rl_bs16_kl_002_clip_04_512_max_token_with_pad_eos_lr_141e5/eval_output"
# root = "output/rl/LLama-2-7b-oasst-baseline_rl_abs_quality_rw_075_bs16_kl_002_clip_04_512_max_token_with_pad_eos_lr_141e5/eval_output"
# root = "output/rl/LLama-2-7b-oasst-baseline_rl_abs_quality_rw_075_bs16_kl_002_clip_04_512_max_token_with_pad_eos_lr_141e5_r32_alpha_64/eval_output"
# root = "output/rl/LLama-2-7b-oasst-baseline_rl_abs_quality_rw_075_bs16_kl_002_clip_04_512_max_token_with_pad_eos_lr_141e5_logits/eval_output"
# root = "output/rl/LLama-2-7b-oasst-baseline_rl_abs_quality_rw_075_bs16_kl_002_clip_04_512_max_token_with_pad_eos_lr_141e5_logits_adjusted/eval_output"
for step in [75,150,225,300,375,450,525]:
    try:
        record_path = f"{root}/responses_rewards_step_{step}.json"
        records = load_records(record_path)
        reward,ref_reward = deserialize(records,step)
        print(f'reward for step {step} from active model is :{np.mean(reward),np.std(reward)} and ref model is: {np.mean(ref_reward),np.std(ref_reward)}')
        print('***'*10)
    except Exception as e:
        pass

loading records for file:output/rl/LLama-2-7b-oasst-baseline_rl_bs16_kl_002_clip_04_512_max_token_with_pad_eos_lr_141e5/eval_output/responses_rewards_step_75.json
reward for step 75 from active model is :(0.8056396484375, 0.8193853493887262) and ref model is: (0.47811279296875, 1.1169018467594098)
******************************
loading records for file:output/rl/LLama-2-7b-oasst-baseline_rl_bs16_kl_002_clip_04_512_max_token_with_pad_eos_lr_141e5/eval_output/responses_rewards_step_150.json
reward for step 150 from active model is :(0.810809326171875, 0.6753196021763054) and ref model is: (1.1474609375, 1.095375901589811)
******************************
loading records for file:output/rl/LLama-2-7b-oasst-baseline_rl_bs16_kl_002_clip_04_512_max_token_with_pad_eos_lr_141e5/eval_output/responses_rewards_step_225.json
reward for step 225 from active model is :(1.18369140625, 0.7941681548908883) and ref model is: (0.486181640625, 1.570982226373815)
******************************
loading record

In [24]:
# alpaca_eval --model_outputs 'alpaca_eval/clean_data/output_150.json' --reference_outputs 'alpaca_eval/clean_data/output_300.json' --annotators_config 'alpaca_eval_gpt4_turbo_fn'

## Prepare data for alpaca_eval RLHF vs SFT

In [23]:

import json
import numpy as np
from glob import glob
import os


def save_json(data, file_path):
    os.makedirs(os.path.dirname(file_path), exist_ok=True)
    with open(file_path, 'w', encoding='utf-8') as file:
        json.dump(data, file, ensure_ascii=False, indent=4)


def load_records(record_path):
    print(f'loading records for a file at path: {record_path}')
    with open(record_path,'r') as f:
        records = json.load(f)
    return records


def deserialize(records,suffix):
    reward = []
    res_len = []
    model_output = []
    for record in records:
        reward.append(record["reward"])
        res_len.append(len(record["response"].split()))
        q = record["query"]
        q = q.split("<|im_start|>user\n")[-1].split("<|im_end|>\n<|im_start|>assistant\n")[0]
        model_output.append({'instruction':q,'output':record["response"]})
    save_json(model_output,f'{save_dir}/alpaca_{suffix}')
    return reward,res_len


root = "output/rl/LLama-2-7b-oasst-baseline_rl_bs16_kl_001_clip_04_512_max_token_with_pad_eos_lr_141e5/eval_output"
# root = "output/rl/LLama-2-7b-oasst-baseline_rl_bs16_kl_002_clip_04_512_max_token_with_pad_eos_lr_141e5/eval_output"
root = "output/rl/LLama-2-7b-oasst-baseline_rl_abs_quality_rw_075_bs16_kl_002_clip_04_512_max_token_with_pad_eos_lr_141e5_logits_adjusted/eval_output"
 #_adjusted "
save_dir = f"alpaca_eval/{root.split('/')[2]}"

rlhf_output = list(glob(root+'/*_rlhf_output.json'))
sft_output = list(glob(root+'/*_sft_output.json'))
predicted_files = {"rlhf":rlhf_output,
         "sft":sft_output}
for type, paths in predicted_files.items():
    for record_path in paths:
        fn = record_path.split('/')[-1]
        try:
            records = load_records(record_path)
            reward,res_len = deserialize(records,fn)
            print(f'Mean of {len(reward)} reward for responses in {fn} is {np.mean(reward)}, median is {np.median(reward)} and standard deviation is {np.std(reward)}')
            print(f'Mean of {len(res_len)} response is {np.mean(res_len)}, median is {np.median(res_len)} and standard deviation is {np.std(res_len)}')
            print('==='*10)
        except Exception as e:
            print(e)
            print('==='*10)
        

loading records for a file at path: output/rl/LLama-2-7b-oasst-baseline_rl_abs_quality_rw_075_bs16_kl_002_clip_04_512_max_token_with_pad_eos_lr_141e5_logits_adjusted/eval_output/tuning_p_01_run_1_rlhf_output.json
Mean of 50 reward for responses in tuning_p_01_run_1_rlhf_output.json is 0.71272705078125, median is 0.779296875 and standard deviation is 0.6236641596637422
Mean of 50 response is 166.68, median is 170.5 and standard deviation is 68.18693129918665
loading records for a file at path: output/rl/LLama-2-7b-oasst-baseline_rl_abs_quality_rw_075_bs16_kl_002_clip_04_512_max_token_with_pad_eos_lr_141e5_logits_adjusted/eval_output/tuning_p_09_t_08_rlhf_output.json
Mean of 50 reward for responses in tuning_p_09_t_08_rlhf_output.json is 0.8115625, median is 0.833984375 and standard deviation is 0.6419359728297602
Mean of 50 response is 175.96, median is 191.0 and standard deviation is 78.35788664837764
loading records for a file at path: output/rl/LLama-2-7b-oasst-baseline_rl_abs_qualit

In [None]:
# alpaca_eval --model_outputs 'alpaca_eval/LLama-2-7b-oasst-baseline_rl_abs_quality_rw_075_bs16_kl_002_clip_04_512_max_token_with_pad_eos_lr_141e5_logits/alpaca_tuning_p_01_run_1_rlhf_output.json' --reference_outputs 'alpaca_eval/LLama-2-7b-oasst-baseline_rl_abs_quality_rw_075_bs16_kl_002_clip_04_512_max_token_with_pad_eos_lr_141e5_logits_adjusted/alpaca_tuning_p_01_run_1_rlhf_output.json' --annotators_config 'alpaca_eval_gpt4_turbo_fn'

## Evaluation using direct API

In [49]:
instruction = "I like to host guests at my home from time to time, and I am gathering  recipes of different dishes and drinks to keep things interesting. I am interested in trying some Danish dishes. Can you give me a recipe for Flæskesteg?"

#rlhf
output_1 = """Certainly! Flæskesteg is a traditional Danish roast pork dish that is typically served with potatoes and red cabbage. Here's a recipe for you to try:\n\nIngredients:\n\n    1.5 kg (3.3 lbs) boneless pork shoulder, trimmed of fat\n    2 tbsp oil\n    1 large onion, chopped\n    1 carrot, chopped\n    1 celery stalk, chopped\n    1 tsp ground allspice\n    1 tsp ground black pepper\n    1 tsp salt\n    3 cloves garlic, minced\n    2 bay leaves\n    1 tbsp vinegar\n    2 tbsp beer\n    1 tbsp molasses\n    1 tbsp Worcestershire sauce\n    Potatoes, peeled and cut into wedges\n    Red cabbage, shredded\n\nInstructions:\n\n    Heat the oil in a large Dutch oven or roasting pan over medium heat. Add the onion, carrot, celery, and allspice and cook until softened, about 5 minutes.\n\n    Add the pork to the pan and cook, breaking it up with a spoon, until browned, about 10 minutes.\n\n    Add the garlic, bay leaves, vinegar, beer, molasses, Worcestershire sauce, and salt and pepper to taste. Bring to a boil, then reduce the heat to low and simmer, covered, for 1 hour.\n\n    Transfer the pork to a plate and set aside to rest for 10 minutes.\n\n    Add the potatoes to the pan and cook, stirring occasionally, until tender, about 20 minutes.\n\n    Add the red cabbage to the pan and cook, stirring occasionally, until heated through, about 5 minutes.\n\n    Slice the pork against the grain into thin slices. Serve the pork with the potatoes and cabbage, and enjoy!\n\nThis recipe should serve 4-6 people."""
output_2 = """Flæskesteg is a classic Danish roast pork dish that is usually served as a main course during special occasions or holidays. Here's a recipe that you can try at home:\n\nIngredients:\n\n    2 pork loins (about 3-4 pounds each)\n    2 tablespoons butter\n    2 teaspoons salt\n    1 teaspoon black pepper\n    2 tablespoons brown sugar\n    1/2 cup beer\n    2 tablespoons cider vinegar\n    1 large onion, sliced\n    2 cloves garlic, minced\n    1 tablespoon all-purpose flour\n    1/2 cup heavy cream\n    1/4 cup unsalted butter\n    1/2 cup breadcrumbs\n    1/4 cup chopped parsley\n\nInstructions:\n\n    Preheat the oven to 325°F (160°C).\n    Place the pork loins in a large roasting pan and rub them with the butter.\n    Sprinkle them with the salt, pepper, and brown sugar.\n    Roast the pork for 1 hour, or until it is golden brown and slightly pink in the center.\n    Remove the pork from the oven and cover it with foil.\n    In a small saucepan, combine the beer, vinegar, onion, and garlic. Bring to a boil, reduce the heat to low, and simmer for 10 minutes.\n    Remove the pan from the heat and let it cool for a few minutes.\n    Whisk the flour into the cooled beer mixture until it forms a thick paste.\n    In a separate saucepan, whisk the heavy cream until it thickens slightly.\n    Pour the cream into the beer mixture and stir until well combined.\n    Bring the mixture back to a simmer and cook for 2-3 minutes, or until it thickens slightly.\n    Remove the pan from the heat and let it cool for a few minutes.\n    Stir the butter into the sauce until it melts and combines with the sauce.\n    Remove the pork from the foil and pour the sau"""

instruction="How do you become an author?"
output_1="Becoming an author involves a combination of talent, hard work, and dedication. Here are some steps you can take to become an author:\n\n1. Write regularly: Practice your writing skills by writing every day, even if it's just for a few minutes. Write about anything that interests you, and don't be afraid to experiment with different genres and styles.\n\n2. Read widely: Reading widely will help you to develop your own writing style and to gain a deeper understanding of the craft of writing. Read books in your chosen genre, as well as books from other genres and from different time periods.\n\n3. Develop your ideas: Come up with ideas for stories, articles, or essays that you want to write. You can get ideas from your own life experiences, from current events, or from your own imagination.\n\n4. Revise and edit: Once you have written a draft, take the time to revise and edit it carefully. This will help you to identify areas that need improvement and to make your writing more cohesive and polished.\n\n5. Get feedback: Share your writing with others and ask for feedback. This can be from friends, family, or from writing groups or workshops. Use their feedback to improve your writing.\n\n6. Submit your work: Once you are confident in your writing, start submitting it to publishers, magazines, or websites that accept unsolicited submissions. You may need to submit many pieces before you are published, but perseverance is key.\n\n7. Promote your work: Once your work is published, promote it through social media, websites, and other channels. This will help you to build a following and to sell more copies of your book.\n\nRemember, becoming an author is a long-term process that requires dedication and hard work. Don't be discouraged by rejections or setbacks, and keep writing and improving your craft."
#rlhf
output_2="Becoming an author involves a combination of hard work, dedication, and persistence. Here are some steps you can take to become an author:\n\nWrite regularly: The key to becoming an author is to write regularly. Set aside time each day or each week to work on your writing projects. You can start by writing for a few minutes a day and gradually increase the amount of time you spend writing.\n\nStudy the craft of writing: Learn the basics of grammar, punctuation, and syntax. Read widely in your chosen genre to learn from established authors. Take writing workshops, attend conferences, and join writing groups to get feedback on your work and learn from other writers.\n\nDevelop a writing routine: Establish a routine for writing, such as setting aside a specific time each day or each week to work on your writing. This can help you build discipline and make writing a priority in your life.\n\nWrite what you love: Write about topics and themes that you are passionate about. If you enjoy what you are writing, it will show in your work and it will be easier to maintain your motivation.\n\nDevelop a thick skin: Writing is subjective, and not everyone will love your work. Learn to develop a thick skin and ignore negative feedback. Focus on improving your craft and creating work that you are proud of.\n\nPublish your work: Once you have written a manuscript, consider self-publishing or traditional publishing. Self-publishing allows you to control the process and get your work out there quickly, while traditional publishing can provide more recognition and support.\n\nRemember, becoming an author is a process that takes time and effort. Don't be discouraged by rejection or setbacks, and keep writing and improving your craft."
input_prompt = f"""<|im_start|>system
You are a helpful assistant, that makes a leaderboard of models based on the quality of their answers.
<|im_end|>
<|im_start|>user
I want you to create a leaderboard of different of large-language models. To do so, I will give you the instructions (prompts) given to the models, and the responses of two models. To make a leaderboard, first make a list ranking the models based on which responses would be preferred by humans, then give the resulting list of JSON to `make_leaderboard`.

Here is the prompt:
{{
    "instruction": \"""{instruction}\""",
}}

Here are the outputs of the models:
[
    {{
        "model": "model_1",
        "answer": \"""{output_1}\"""
    }},
    {{
        "model": "model_2",
        "answer": \"""{output_2}\"""
    }}
]

Now make the leaderboard by ranking the models by the quality of their answers, so that the model with rank 1 has the best output.
<|im_end|>
"""

print(input_prompt)

<|im_start|>system
You are a helpful assistant, that makes a leaderboard of models based on the quality of their answers.
<|im_end|>
<|im_start|>user
I want you to create a leaderboard of different of large-language models. To do so, I will give you the instructions (prompts) given to the models, and the responses of two models. To make a leaderboard, first make a list ranking the models based on which responses would be preferred by humans, then give the resulting list of JSON to `make_leaderboard`.

Here is the prompt:
{
    "instruction": """How do you become an author?""",
}

Here are the outputs of the models:
[
    {
        "model": "model_1",
        "answer": """Becoming an author involves a combination of talent, hard work, and dedication. Here are some steps you can take to become an author:

1. Write regularly: Practice your writing skills by writing every day, even if it's just for a few minutes. Write about anything that interests you, and don't be afraid to experiment wi

In [50]:

from openai import OpenAI
import openai

openai_api_keys = "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"
base_url = openai.base_url

client = OpenAI(base_url=base_url,api_key=openai_api_keys)

client_kwargs = {
    "model": "gpt-4-1106-preview",
    "max_tokens": 100,
    "temperature": 0,
    "top_p":1.0,
    "function_call": {
      "name": "make_leaderboard"
    },
    "functions": [
      {
        "name": "make_leaderboard",
        "description": "Make a leaderboard of models given a list of the models ordered by the preference of their outputs.",
        "parameters": {
          "type": "object",
          "properties": {
            "ordered_models": {
              "type": "array",
              "description": "A list of models ordered by the preference of their outputs",
              "items": {
                "type": "object",
                "properties": {
                  "model": {
                    "type": "string",
                    "description": "The name of the model"
                  },
                  "rank": {
                    "type": "number",
                    "description": "Order of preference of the model, 1 has the best output"
                  }
                }
              }
            }
          }
        },
        "required": ["ordered_models"]
      }
    ]
  }





response = client.chat.completions.create(
        messages=[{"role": "user", "content":input_prompt}],
        **client_kwargs
    )
print(response.choices[0].message.function_call.arguments)


In [62]:
import tiktoken
a = "Lets try to fool vocabultary. Mean giahslflal tripallav vakunama tirakutti lafuddu"
enc = tiktoken.encoding_for_model(client_kwargs["model"])
assert enc.decode(enc.encode(a)) == a
