In [1]:
import pandas as pd

In [2]:
train_df = pd.read_parquet("resource/IF/InfoBench/InFoBench.parquet")
# This is the information for the final evaluation.
SYS_MSG = """Based on the provided Input (if any) and Generated Text, answer the ensuing Questions with either a YES or NO choice. Your selection should be based on your judgment as well as the following rules:\n\n- YES: Select 'YES' if the generated text entirely fulfills the condition specified in the question. However, note that even minor inaccuracies exclude the text from receiving a 'YES' rating. As an illustration. consider a question that asks. \"Does each sentence in the generated text use a second person?” If even one sentence does not use the second person, the answer should NOT be 'YES'. To qualify for a 'YES' rating, the generated text must be entirely accurate and relevant to the question\n\n- NO: Opt for 'NO' if the generated text fails to meet the question's requirements or provides no information that could be utilized to answer the question. For instance, if the question asks. \"Is the second sentence in the generated text a compound sentence?\" and the generated text only has one sentence. it offers no relevant information to answer the question. Consequently, the answer should be 'NO'.'''"""

In [3]:
system_message = "You are a conscientious assistant, and you must answer my questions exactly as my questions require."
initial_prompt = """
Instruction: {system_message}

Input: 
{INSTRUCTION}
{INPUT}
"""

mixtral_instruct_initial_prompt = """<s>[INST]
{system_message}
{INSTRUCTION}
{INPUT}
[/INST] 
</s>"""

llama2_chat_initial_prompt = """
<s>[INST] <<SYS>>
{system_message}
<</SYS>>
{INSTRUCTION}
{INPUT}
[/INST]
"""

llama3_chat_init_prompt = """
<|begin_of_text|><|start_header_id|>system<|end_header_id|>
{system_message}
<|eot_id|><|start_header_id|>user<|end_header_id|>
{INSTRUCTION}
{INPUT}
<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""

In [13]:
from langchain import PromptTemplate

# instaiate the template prompt
initial_prompt_template = PromptTemplate.from_template(initial_prompt)
llama2_chat_initial_prompt_template = PromptTemplate.from_template(
    llama2_chat_initial_prompt
)
mixtral_instruct_initial_prompt_template = PromptTemplate.from_template(
    mixtral_instruct_initial_prompt
)
llama3_chat_initial_prompt_template = PromptTemplate.from_template(
    llama3_chat_init_prompt
)


def generate_initial_prompts(row):
    input = row["input"]
    instruction = row["instruction"]
    initial_prompt = initial_prompt_template.format(
        INSTRUCTION=instruction, INPUT=input, system_message=system_message
    )

    llama2_chat_initial_prompt = llama2_chat_initial_prompt_template.format(
        INSTRUCTION=instruction, INPUT=input, system_message=system_message
    )

    llama3_chat_initial_prompt = llama3_chat_initial_prompt_template.format(
        INSTRUCTION=instruction, INPUT=input, system_message=system_message
    )

    mixtral_instruct_initial_prompt = mixtral_instruct_initial_prompt_template.format(
        INSTRUCTION=instruction, INPUT=input, system_message=system_message
    )

    row["initial_prompt"] = initial_prompt
    row["llama2_chat_initial_prompt"] = llama2_chat_initial_prompt
    row["mixtral_instruct_initial_prompt"] = mixtral_instruct_initial_prompt
    row["llama3_chat_initial_prompt"] = llama3_chat_initial_prompt
    row["INSTRUCTION"] = input
    row["INPUT"] = instruction
    return row


train_df = train_df.apply(generate_initial_prompts, axis=1)

In [14]:
import numpy as np

temperatures = np.arange(0.1, 2.0, 0.3)
temperature_df = pd.DataFrame({"Temperature": temperatures})
merged_df = temperature_df.merge(train_df, how="cross")

In [15]:
merged_df.to_csv("resource/IF/IF_experiment_prompt.csv")

In [12]:
train_df.shape

(500, 13)