In [86]:
import os, sys, json, pprint, datetime
import requests

from dotenv import load_dotenv

load_dotenv()

True

In [92]:
def get_llm_response(messages: list, model: str="mistralai/mistral-7b-instruct:free") -> str:
    OPENROUTER_API_KEY = os.environ['OPENROUTER_API_KEY']

    response = requests.post(
        url="https://openrouter.ai/api/v1/chat/completions",
        headers={
            "Authorization": f"Bearer {OPENROUTER_API_KEY}",
        },
        data = json.dumps({
            "model": model,
            "messages": messages
        })
    )

    # if response.status_code != 200:
    #     print(f"There was an error processing your request ({response.status_code})")
    #     print(response.json())
    #     return None
        
    # print("Response successfully received")
    # return response.json()['choices'][0]['message']['content']

    result = response.json()
    print(result.keys())
    return result

In [93]:
prompts_path = os.path.join('.', 'ifeval', 'data', 'input_response_data_gpt4_20231107_145030.jsonl')

def get_base_prompts(file: str=prompts_path) -> list:
    with open(file, mode='r', encoding='utf-8') as jsonl:
        prompts = jsonl.read().splitlines()
        # print(prompts)

        return_list = []
        for jline in prompts:
            return_dict = json.loads(jline)
            del return_dict['response']
            return_list.append(return_dict)

        return return_list

In [94]:
prompts = get_base_prompts()
pprint.pprint(prompts)

[{'prompt': 'Write a 300+ word summary of the wikipedia page '
            '"https://en.wikipedia.org/wiki/Raymond_III,_Count_of_Tripoli". Do '
            'not use any commas and highlight at least 3 sections that has '
            'titles in markdown format, for example *highlighted section part '
            '1*, *highlighted section part 2*, *highlighted section part 3*.'},
 {'prompt': 'I am planning a trip to Japan, and I would like thee to write an '
            'itinerary for my journey in a Shakespearean style. You are not '
            'allowed to use any commas in your response.'},
 {'prompt': 'Write a resume for a fresh high school graduate who is seeking '
            'their first job. Make sure to include at least 12 placeholder '
            'represented by square brackets, such as [address], [name].'},
 {'prompt': 'Write an email to my boss telling him that I am quitting. The '
            'email must contain a title wrapped in double angular brackets, '
            'i.e

In [95]:
def get_ifeval_input_data(model: str, prompts: list) -> list:
    # get llm responses to every prompt
    prompt_response_list = []
    for i, prompt in enumerate(prompts):
        prompt = prompt['prompt']
        print(f"{i+1}. Prompt: {prompt}")
        
        messages = [{"role": "user", "content": prompt}]
        # print(messages)

        response = {'error': 'error'}
        while 'error' in response:
            # keep requesting a response until the rate limit subsides
            response = get_llm_response(messages=messages, model=model)
            # print(f"\tResponse: {response}")

        prompt_response = {
            "prompt": prompt,
            "response": response
        }
        prompt_response_list.append(prompt_response)
    
    return prompt_response_list

In [109]:
def save_ifeval_input_data(model: str, prompt_responses: list) -> str:
    model_name = model.split('/')[-1]
    filename = '_'.join(['input_data', model_name, datetime.datetime.today().strftime('%F')]) + '.jsonl'
    save_filepath = os.path.join('.', 'instruction_following_eval', 'data', filename)

    with open(save_filepath, mode='w', encoding='utf-8') as save_file:
        for prompt_response in prompt_responses:
            print(prompt_response)
            save_file.write(json.dumps(prompt_response) + "\n")
    
    return save_filepath

## Generate and save responses

In [97]:
model = "mistralai/mistral-7b-instruct:nitro"

input_data = get_ifeval_input_data(model, prompts)
saved_file = save_ifeval_input_data(model, input_data)

1. Prompt: Write a 300+ word summary of the wikipedia page "https://en.wikipedia.org/wiki/Raymond_III,_Count_of_Tripoli". Do not use any commas and highlight at least 3 sections that has titles in markdown format, for example *highlighted section part 1*, *highlighted section part 2*, *highlighted section part 3*.
dict_keys(['choices', 'model', 'id', 'object', 'created', 'usage'])
2. Prompt: I am planning a trip to Japan, and I would like thee to write an itinerary for my journey in a Shakespearean style. You are not allowed to use any commas in your response.
dict_keys(['choices', 'model', 'id', 'object', 'created', 'usage'])
3. Prompt: Write a resume for a fresh high school graduate who is seeking their first job. Make sure to include at least 12 placeholder represented by square brackets, such as [address], [name].
dict_keys(['choices', 'model', 'id', 'object', 'created', 'usage'])
4. Prompt: Write an email to my boss telling him that I am quitting. The email must contain a title wr

In [110]:
# adjust response to only include response content
with open('/Users/dgwon/repos/wpi/cs534-project/DINOS/ifeval/instruction_following_eval/data/input_data_mistral-7b-instruct:nitro_2024-03-09.jsonl', mode='r', encoding='utf-8') as f:
    jlines = f.read().splitlines()
    adj_jlines = []
    for jline in jlines:
        line = json.loads(jline)
        line_obj = {
            "prompt": line["prompt"],
            "response": line['response']['choices'][0]['message']['content']
        }
        adj_jlines.append(line_obj)

saved_file = save_ifeval_input_data(model, adj_jlines)    

{'prompt': 'Write a 300+ word summary of the wikipedia page "https://en.wikipedia.org/wiki/Raymond_III,_Count_of_Tripoli". Do not use any commas and highlight at least 3 sections that has titles in markdown format, for example *highlighted section part 1*, *highlighted section part 2*, *highlighted section part 3*.', 'response': " **Raymond III, Count of Tripoli**\n\nRaymond III, born around 1187, was a pivotal figure in the Crusader States of the Levant in the late 12th century. He held the title of Count of Tripoli, a significant coastal city in present-day Lebanon.\n\n**Early Life and Succession**\n\nBorn to Raymond II and Constance of Antioch, Raymond III came to power following his father's death in 1152. He ruled under regency until reaching adulthood. In 1161, Raymond III established an alliance with the Kingdom of Jerusalem by marrying Alice of Montferrat, the sister of Raymond III of Tripoli and Baldwin IV of Jerusalem. Raymond III's reign thus started with a strong political 

## Analyze results

In [112]:
with open('./instruction_following_eval/data/eval_results_strict.jsonl', mode='r', encoding='utf-8') as f:
    jlines = f.read().splitlines()
    lines = [json.loads(jline) for jline in jlines]

    total = len(lines)
    correct = 0
    for line in lines:
        if line['follow_all_instructions'] == True:
            correct += 1
    
    print(f"Grade (strict): {(correct / total) * 100: .2f}%")


with open('./instruction_following_eval/data/eval_results_loose.jsonl', mode='r', encoding='utf-8') as f:
    jlines = f.read().splitlines()
    lines = [json.loads(jline) for jline in jlines]

    total = len(lines)
    correct = 0
    for line in lines:
        if line['follow_all_instructions'] == True:
            correct += 1
    
    print(f"Grade (loose): {(correct / total) * 100: .2f}%")

Grade (strict):  47.87%
Grade (loose):  53.23%
