In [1]:
import pandas as pd
from datasets import Dataset, DatasetDict
from datasets import load_dataset
import sys, re, os
sys.path.append("src/utils/")
import text_processing_utils
import evaluate_utils
from transformers import AutoTokenizer

  from pandas.core import (


In [2]:
logs_dir = os.getenv("LOGS_DIR")
data_hub_path = os.getenv("DATA_HUB_PATH")
subset = os.getenv("SUBSET")
dataset_name = data_hub_path.split("/")[-1]
finetuning_data_dir = os.path.join(logs_dir, f"finetuning_data/{os.path.basename(dataset_name)}_{os.path.basename(subset)}")

In [3]:
train_df = pd.read_csv(os.path.join(finetuning_data_dir, "finetuning_df_train.csv"))
test_df = pd.read_csv(os.path.join(finetuning_data_dir, "finetuning_df_test.csv"))

FileNotFoundError: [Errno 2] No such file or directory: 'logs/finetuning_data/OpenThoughts-correct_math/finetuning_df_train.csv'

In [None]:
planner_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-7B-Instruct")
executor_tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B")

In [5]:
# Chat instruct planner
def prepare_dataset_for_chat_instruct_planner(df):
    def add_messages(row):
        row["messages"] = [
            {"role": "system", "content": row["SystemPrompt"]},
            {"role": "user", "content": row["UserPrompt"]},
            {"role": "assistant", "content": row["ExpectedOutput"]}
        ]
        return row

    df = df[df["SystemPrompt"].str.contains("You are the planner")]
    df["ExpectedOutput"] = df["ExpectedOutput"].apply(lambda x: x.replace("**Final Answer**\n", ""))
    dataset = Dataset.from_pandas(df)
    dataset = dataset.map(add_messages)
    return dataset, df

# R1-Distill planner
def prepare_dataset_for_R1_Distill_planner(df, planner_prompt):
    def add_messages(row):
        row["messages"] = [
            {"role": "user", "content": row["UserPrompt"]},
            {"role": "assistant", "content": row["ExpectedOutput"]}
        ]
        return row

    def convert_plan_to_R1_style(plan):
        execution = "<think>\n"+plan.strip()+"\n</think>"
        return execution
    
    df = df[df["SystemPrompt"].str.contains("You are the planner")]
    df["ExpectedOutput"] = df["ExpectedOutput"].apply(lambda x: convert_plan_to_R1_style(x))
    df["ExpectedOutput"] = df["ExpectedOutput"].apply(lambda x: x.replace("**Final Answer**\n", ""))
    df["UserPrompt"] = df.apply(lambda x: x["SystemPrompt"]+"\n\n"+x["UserPrompt"], axis=1)
    del df["SystemPrompt"]

    dataset = Dataset.from_pandas(df)
    dataset = dataset.map(add_messages)
    return dataset, df

# Chat instruct executor
def prepare_dataset_for_chat_instruct_executor(df, executor_prompt):
    def add_messages(row):
        row["messages"] = [
            {"role": "system", "content": row["SystemPrompt"]},
            {"role": "user", "content": row["UserPrompt"]},
            {"role": "assistant", "content": row["ExpectedOutput"]}
        ]
        return row

    df = df[df["SystemPrompt"].str.contains("You are one of the executors")]
    df["SystemPrompt"] = executor_prompt
    dataset = Dataset.from_pandas(df)
    dataset = dataset.map(add_messages)
    return dataset, df

# R1-Distill executor
def prepare_dataset_for_R1_Distill_executor(df, executor_prompt):
    def add_messages(row):
        row["messages"] = [
            {"role": "user", "content": row["UserPrompt"]},
            {"role": "assistant", "content": row["ExpectedOutput"]}
        ]
        return row
    def convert_execution_to_R1_style(execution):
        execution = re.sub(r'<execution.*?>', '', execution, flags=re.IGNORECASE)
        execution = re.sub(r'</execution.*?>', '', execution, flags=re.IGNORECASE)
        execution = "<think>\n"+execution.strip()+"\n</think>"
        return execution
    
    df = df[df["SystemPrompt"].str.contains("You are one of the executors")]
    df["ExpectedOutput"] = df["ExpectedOutput"].apply(lambda x: convert_execution_to_R1_style(x))
    del df["SystemPrompt"]
    df["UserPrompt"] = df["UserPrompt"].apply(lambda x: executor_prompt+"\n\n"+x)

    dataset = Dataset.from_pandas(df)
    dataset = dataset.map(add_messages)
    return dataset, df

In [6]:
executor_prompt = text_processing_utils.read_file_as_string("src/prompts/shortened_executor_prompt_r1_distill.txt")

dataset_dict = DatasetDict({
    "train": prepare_dataset_for_R1_Distill_executor(train_df, executor_prompt)[0],
    "test": prepare_dataset_for_R1_Distill_executor(test_df, executor_prompt)[0]
})
dataset_dict.push_to_hub("emilbiju/Shortened-Execution-Data-Math-think", "math")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["ExpectedOutput"] = df["ExpectedOutput"].apply(lambda x: convert_execution_to_R1_style(x))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["UserPrompt"] = df["UserPrompt"].apply(lambda x: executor_prompt+"\n\n"+x)


Map:   0%|          | 0/15889 [00:00<?, ? examples/s]

Map:   0%|          | 0/1779 [00:00<?, ? examples/s]

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/16 [00:00<?, ?ba/s]

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/2 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/710 [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/datasets/emilbiju/Shortened-Execution-Data-Math-think/commit/47ed4cef141d1d32cacfc242bd5388a6fb068724', commit_message='Upload dataset', commit_description='', oid='47ed4cef141d1d32cacfc242bd5388a6fb068724', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/emilbiju/Shortened-Execution-Data-Math-think', endpoint='https://huggingface.co', repo_type='dataset', repo_id='emilbiju/Shortened-Execution-Data-Math-think'), pr_revision=None, pr_num=None)

In [None]:
dataset_dict = DatasetDict({
    "train": prepare_dataset_for_chat_instruct_executor(train_df)[0],
    "test": prepare_dataset_for_chat_instruct_executor(test_df)[0]
})
dataset_dict.push_to_hub("emilbiju/Execution-Data-Math", "math")

In [7]:
dataset_dict = DatasetDict({
    "train": prepare_dataset_for_chat_instruct_planner(train_df)[0],
    "test": prepare_dataset_for_chat_instruct_planner(test_df)[0]
})
dataset_dict.push_to_hub("emilbiju/Shortened-Planning-Data-Math", "math")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["ExpectedOutput"] = df["ExpectedOutput"].apply(lambda x: x.replace("**Final Answer**\n", ""))


Map:   0%|          | 0/7334 [00:00<?, ? examples/s]

Map:   0%|          | 0/835 [00:00<?, ? examples/s]

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/8 [00:00<?, ?ba/s]

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/751 [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/datasets/emilbiju/Shortened-Planning-Data-Math/commit/d20d64233753c83a0ad4fc749c373c6dc36c0ca5', commit_message='Upload dataset', commit_description='', oid='d20d64233753c83a0ad4fc749c373c6dc36c0ca5', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/emilbiju/Shortened-Planning-Data-Math', endpoint='https://huggingface.co', repo_type='dataset', repo_id='emilbiju/Shortened-Planning-Data-Math'), pr_revision=None, pr_num=None)

In [None]:
prepare_dataset_for_R1_Distill_executor(train_df, executor_prompt)[0].head(10)

In [None]:
dataset_dict = DatasetDict({
    "train": prepare_dataset_for_R1_Distill_planner(train_df)[0],
    "test": prepare_dataset_for_R1_Distill_planner(test_df)[0]
})

dataset_dict.push_to_hub("emilbiju/Planning-Data-Math-Full-Soln-think", "math")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["ExpectedOutput"] = df["ExpectedOutput"].apply(lambda x: convert_plan_to_R1_style(x))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["UserPrompt"] = df.apply(lambda x: x["SystemPrompt"]+"\n\n"+x["UserPrompt"], axis=1)


Map:   0%|          | 0/8706 [00:00<?, ? examples/s]

Map:   0%|          | 0/945 [00:00<?, ? examples/s]

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/9 [00:00<?, ?ba/s]

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/749 [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/datasets/emilbiju/Planning-Data-Math-Full-Soln-think/commit/0c73d8b651169bec08a5e15e85d218f4f0637d20', commit_message='Upload dataset', commit_description='', oid='0c73d8b651169bec08a5e15e85d218f4f0637d20', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/emilbiju/Planning-Data-Math-Full-Soln-think', endpoint='https://huggingface.co', repo_type='dataset', repo_id='emilbiju/Planning-Data-Math-Full-Soln-think'), pr_revision=None, pr_num=None)

In [5]:
def filter_for_dagger_data(df):
    df = df[(df["ExpectedOutput"].str.contains(r"\$#\$", na=False)) | (df["ExpectedOutput"].str.contains("<Final_answer>", na=False))]
    df["ExpectedOutput"] = df["ExpectedOutput"].str.replace(r"\$#\$ ", "", regex=True)
    df["UserPrompt"] = df["UserPrompt"].str.replace(r"\$#\$ ", "", regex=True)
    return df

In [6]:
train_df = filter_for_dagger_data(train_df)
test_df = filter_for_dagger_data(test_df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["ExpectedOutput"] = df["ExpectedOutput"].str.replace(r"\$#\$ ", "", regex=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["UserPrompt"] = df["UserPrompt"].str.replace(r"\$#\$ ", "", regex=True)


In [7]:
len(train_df)

2001

In [8]:
dataset_dict = DatasetDict({
    "train": prepare_dataset_for_chat_instruct_planner(train_df)[0],
    "test": prepare_dataset_for_chat_instruct_planner(test_df)[0]
})
dataset_dict.push_to_hub("emilbiju/Planning-Dagger-Data-Math", "math")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["ExpectedOutput"] = df["ExpectedOutput"].apply(lambda x: x.replace("**Final Answer**\n", ""))


Map:   0%|          | 0/846 [00:00<?, ? examples/s]

Map:   0%|          | 0/96 [00:00<?, ? examples/s]

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

CommitInfo(commit_url='https://huggingface.co/datasets/emilbiju/Planning-Dagger-Data-Math/commit/768bcc4a9fe38397d08a8e52541a5b4ea02c09c1', commit_message='Upload dataset', commit_description='', oid='768bcc4a9fe38397d08a8e52541a5b4ea02c09c1', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/emilbiju/Planning-Dagger-Data-Math', endpoint='https://huggingface.co', repo_type='dataset', repo_id='emilbiju/Planning-Dagger-Data-Math'), pr_revision=None, pr_num=None)

In [9]:
dataset_dict = DatasetDict({
    "train": prepare_dataset_for_R1_Distill_executor(train_df)[0],
    "test": prepare_dataset_for_R1_Distill_executor(test_df)[0]
})
dataset_dict.push_to_hub("emilbiju/Execution-Dagger-Data-Math-think", "math")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["ExpectedOutput"] = df["ExpectedOutput"].apply(lambda x: convert_execution_to_R1_style(x))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["UserPrompt"] = df["UserPrompt"].apply(lambda x: executor_prompt+"\n\n"+x)


Map:   0%|          | 0/1155 [00:00<?, ? examples/s]

Map:   0%|          | 0/110 [00:00<?, ? examples/s]

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/2 [00:00<?, ?ba/s]

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

CommitInfo(commit_url='https://huggingface.co/datasets/emilbiju/Execution-Dagger-Data-Math-think/commit/3941ae60b7e2c0c25a4ab5e2b9d2bd3b2b872437', commit_message='Upload dataset', commit_description='', oid='3941ae60b7e2c0c25a4ab5e2b9d2bd3b2b872437', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/emilbiju/Execution-Dagger-Data-Math-think', endpoint='https://huggingface.co', repo_type='dataset', repo_id='emilbiju/Execution-Dagger-Data-Math-think'), pr_revision=None, pr_num=None)

In [97]:
check_df

Unnamed: 0,ProblemIdx,SystemPrompt,UserPrompt,ExpectedOutput,FinalAnswer,Phase,PromptCount
0,1,You are the planner of an AI system that follo...,<Question>\nSteve has an isosceles triangle wi...,"<Plan_1>\nOkay, so Steve has this isosceles tr...",22.21,1.0,1
2,1,You are the planner of an AI system that follo...,<Question>\nSteve has an isosceles triangle wi...,"<Plan_2>\nBased on execution_1.1:\nFirst, I ne...",22.21,2.0,5
8,1,You are the planner of an AI system that follo...,<Question>\nSteve has an isosceles triangle wi...,<Plan_3>\nBased on execution_2.1:\nSo each of ...,22.21,3.0,1
10,1,You are the planner of an AI system that follo...,<Question>\nSteve has an isosceles triangle wi...,"<Plan_4>\nBased on execution_3.1:\nNow, since ...",22.21,4.0,3
14,1,You are the planner of an AI system that follo...,<Question>\nSteve has an isosceles triangle wi...,"<Plan_5>\nBased on execution_4.2:\nSimilarly, ...",22.21,5.0,2
...,...,...,...,...,...,...,...
22791,4999,You are the planner of an AI system that follo...,<Question>\nLark has forgotten her locker comb...,"<Plan_1>\nFirst, I'll break it down into each ...",2250,1.0,1
22793,4999,You are the planner of an AI system that follo...,<Question>\nLark has forgotten her locker comb...,<Plan_2>\nBased on execution_1.1:\nStarting wi...,2250,2.0,3
22797,4999,You are the planner of an AI system that follo...,<Question>\nLark has forgotten her locker comb...,"<Plan_3>\nBased on execution_2.1, execution_2....",2250,3.0,2
22800,4999,You are the planner of an AI system that follo...,<Question>\nLark has forgotten her locker comb...,<Plan_4>\nBased on execution_3.2:\nI think tha...,2250,4.0,0


In [12]:
ds = load_dataset("emilbiju/Plan-Execution-Data-Math-Full-Soln", "math")
df = ds["train"].to_pandas()

In [15]:
trajectory = text_processing_utils.read_file_as_string("logs/trajectory_data/OpenThoughts-correct_math/trajectory_data_1.txt")

In [16]:
print(trajectory)

<Question>
Steve has an isosceles triangle with base 8 inches and height 10 inches. He wants to cut it into eight pieces that have equal areas, as shown below. To the nearest hundredth of an inch what is the number of inches in the greatest perimeter among the eight pieces? [asy]
size(150);
defaultpen(linewidth(0.7));
draw((0,0)--(8,0));
for(int i = 0; i < 9; ++i){
draw((4,10)--(i,0));
}
draw((0,-0.5)--(8,-0.5),Bars(5));
label("$8''$",(0,-0.5)--(8,-0.5),S);
[/asy]
</Question>

<Plan_1>
Okay, so Steve has this isosceles triangle with a base of 8 inches and a height of 10 inches. He wants to cut it into eight pieces, each with equal area. The Asymptote code provided seems to show the triangle being divided by lines from the apex (the top vertex) down to the base, which is divided into eight equal segments. The question is asking for the number of inches in the greatest perimeter among these eight pieces, rounded to the nearest hundredth.

<prompt_1.1> Summarize the problem statement and 

In [36]:
def prepare_dataset_for_R1_Distill_full_trajectory(df):
    def add_messages(row):
        row["messages"] = [
            {"role": "user", "content": row["UserPrompt"]},
            {"role": "assistant", "content": row["ExpectedOutput"]}
        ]
        return row
    df["ExpectedOutput"] = df["ExpectedOutput"].apply(lambda x: x.replace("**Final Answer**\n", ""))
    dataset = Dataset.from_pandas(df)
    dataset = dataset.map(add_messages)
    return dataset, df

In [37]:
from tqdm import tqdm

In [38]:
import re
df = []
trajectory_dirs = ["logs/trajectory_data/OpenThoughts-correct_olympiads", "logs/trajectory_data/OpenThoughts-correct_math"]
# trajectory_dirs = ["logs/trajectory_data/OpenThoughts-correct_math"]

for trajectory_dir in trajectory_dirs:
    for filename in tqdm(sorted(os.listdir(trajectory_dir))):
        problem_idx = filename.split('.')[0].split('_')[-1]
        trajectory = text_processing_utils.read_file_as_string(os.path.join(trajectory_dir, filename))
        question_content = re.search(r"<Question>(.*?)</Question>", trajectory, re.DOTALL).group(1)
        trajectory = re.sub(r"<Question>.*?</Question>", "", trajectory, flags=re.DOTALL).strip()
        system_prompt = text_processing_utils.read_file_as_string("src/prompts/global_agent_prompt.txt")
        system_prompt += f"\n\n<Question>\n{question_content.strip()}\n</Question>\n\n"
        user_prompt = system_prompt
        expected_output = "<think>\n"+trajectory.strip()+"\n</think>"
        final_answer_line = evaluate_utils.get_final_answer_line(trajectory)
        if "boxed" in final_answer_line:
            final_answer = evaluate_utils.remove_boxed(evaluate_utils.last_boxed_only_string(final_answer_line))
        else:
            raise Exception(f"Final answer not found for {filename}")
        df.append((problem_idx, user_prompt, expected_output, final_answer))
df = pd.DataFrame(df, columns=["ProblemIdx", "UserPrompt", "ExpectedOutput", "FinalAnswer"])
df = df.sort_values(by="ProblemIdx", key=lambda col: col.astype(int))

100%|██████████| 3059/3059 [00:08<00:00, 371.13it/s]
100%|██████████| 1521/1521 [00:02<00:00, 721.14it/s]


In [39]:
from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(df, test_size=0.1, random_state=42)

In [40]:
dataset_dict = DatasetDict({
    "train": prepare_dataset_for_R1_Distill_full_trajectory(train_df)[0],
    "test": prepare_dataset_for_R1_Distill_full_trajectory(test_df)[0]
})
dataset_dict.push_to_hub("emilbiju/Trajectory-Data-MATH-Olympiad-Merged-Chains-with-prompt")

Map:   0%|          | 0/4122 [00:00<?, ? examples/s]

Map:   0%|          | 0/458 [00:00<?, ? examples/s]

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/5 [00:00<?, ?ba/s]

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

CommitInfo(commit_url='https://huggingface.co/datasets/emilbiju/Trajectory-Data-MATH-Olympiad-Merged-Chains-with-prompt/commit/e83b8fee14ec77818e99d4dd40ea20d1474a945f', commit_message='Upload dataset', commit_description='', oid='e83b8fee14ec77818e99d4dd40ea20d1474a945f', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/emilbiju/Trajectory-Data-MATH-Olympiad-Merged-Chains-with-prompt', endpoint='https://huggingface.co', repo_type='dataset', repo_id='emilbiju/Trajectory-Data-MATH-Olympiad-Merged-Chains-with-prompt'), pr_revision=None, pr_num=None)

In [None]:

df = df[df["ExpectedOutput"].str.contains("<Final_answer>")]
df.head()
system_prompt = text_processing_utils.read_file_as_string("src/prompts/global_agent_prompt.txt")
df["Trajectory"] = df.apply(lambda x: x["UserPrompt"]+"\n\n"+x["ExpectedOutput"], axis=1)

Unnamed: 0,ProblemIdx,SystemPrompt,UserPrompt,ExpectedOutput,FinalAnswer,Phase,__index_level_0__,messages
20,1,You are the planner of an AI system that follo...,<Question>\nSteve has an isosceles triangle wi...,"<Final_answer>\nTherefore, the greatest perime...",22.21,7.0,20,[{'content': 'You are the planner of an AI sys...
28,2,You are the planner of an AI system that follo...,<Question>\nSimplify $\frac{1}{1+\sqrt{2}}\cdo...,"<Final_answer>\nTherefore, the simplified expr...",-1.0,3.0,7,[{'content': 'You are the planner of an AI sys...
42,5,You are the planner of an AI system that follo...,<Question>\nSuppose that all four of the numbe...,"<Final_answer>\nTherefore, the smallest possib...",6.0,6.0,13,[{'content': 'You are the planner of an AI sys...
52,7,You are the planner of an AI system that follo...,<Question>\nHow many positive integers less th...,"<Final_answer>\nTherefore, there are $\boxed{5...",52.0,4.0,9,[{'content': 'You are the planner of an AI sys...
70,10,You are the planner of an AI system that follo...,<Question>\nPalindromes are numbers that read ...,"<Final_answer>\nTherefore, the least possible ...",1221.0,7.0,17,[{'content': 'You are the planner of an AI sys...


In [None]:
def prepare_dataset_for_chat_instruct_full_trajectory_generator(df):
    def add_messages(row):
        row["messages"] = [
            {"role": "system", "content": row["SystemPrompt"]},
            {"role": "user", "content": row["UserPrompt"]},
            {"role": "assistant", "content": row["ExpectedOutput"]}
        ]
        return row

    df = df[df["ExpectedOutput"].str.contains("<Final_answer>")]
    system_prompt = text_processing_utils.read_file_as_string("src/prompts/global_agent_prompt.txt")
    df["Trajectory"] = df.apply(lambda x: x["UserPrompt"]+"\n\n"+x["ExpectedOutput"], axis=1)
    df["SystemPrompt"] = system_prompt

    del df["UserPrompt"]
    del df["ExpectedOutput"]

    dataset = Dataset.from_pandas(df)
    dataset = dataset.map(add_messages)
    return dataset, df

In [121]:
print(check_df[check_df["ExpectedOutput"].str.contains("Final Answer")]["ExpectedOutput"].iloc[4])

<Final_answer>
To find out how many eggs will be left over after selling them in cartons of 12, we first add up all the eggs that Abigail, Beatrice, and Carson have and then divide that total by 12. The remainder of this division will tell us how many eggs are left over.

Step 1: Add up all the eggs.
\[
37 + 49 + 14 = 100
\]

Step 2: Find the remainder when the total number of eggs is divided by 12. This can be done by looking at the congruence of each individual's eggs modulo 12 and then summing those congruences.

Abigail's eggs modulo 12:
\[
37 \equiv 1 \pmod{12}
\]
Beatrice's eggs modulo 12:
\[
49 \equiv 1 \pmod{12}
\]
Carson's eggs modulo 12:
\[
14 \equiv 2 \pmod{12}
\]

Step 3: Sum the congruences.
\[
1 + 1 + 2 = 4
\]

Therefore, when we divide the total number of eggs by 12, the remainder is 4. This means there will be 4 eggs left over after selling them in cartons of 12.

Final Answer:
\[
\boxed{4}
\]
</Final_answer>


In [114]:
check_df = prepare_dataset_for_chat_instruct_planner(train_df)[1]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["ExpectedOutput"] = df["ExpectedOutput"].apply(lambda x: x.replace("**Final Answer**\n", ""))


Map:   0%|          | 0/8706 [00:00<?, ? examples/s]

In [90]:
check_df

Unnamed: 0,ProblemIdx,SystemPrompt,UserPrompt,ExpectedOutput,FinalAnswer,Phase
0,1,You are the planner of an AI system that follo...,<Question>\nSteve has an isosceles triangle wi...,"<Plan_1>\nOkay, so Steve has this isosceles tr...",22.21,1.0
2,1,You are the planner of an AI system that follo...,<Question>\nSteve has an isosceles triangle wi...,"<Plan_2>\nBased on execution_1.1:\nFirst, I ne...",22.21,2.0
8,1,You are the planner of an AI system that follo...,<Question>\nSteve has an isosceles triangle wi...,<Plan_3>\nBased on execution_2.1:\nSo each of ...,22.21,3.0
10,1,You are the planner of an AI system that follo...,<Question>\nSteve has an isosceles triangle wi...,"<Plan_4>\nBased on execution_3.1:\nNow, since ...",22.21,4.0
14,1,You are the planner of an AI system that follo...,<Question>\nSteve has an isosceles triangle wi...,"<Plan_5>\nBased on execution_4.2:\nSimilarly, ...",22.21,5.0
...,...,...,...,...,...,...
22791,4999,You are the planner of an AI system that follo...,<Question>\nLark has forgotten her locker comb...,"<Plan_1>\nFirst, I'll break it down into each ...",2250,1.0
22793,4999,You are the planner of an AI system that follo...,<Question>\nLark has forgotten her locker comb...,<Plan_2>\nBased on execution_1.1:\nStarting wi...,2250,2.0
22797,4999,You are the planner of an AI system that follo...,<Question>\nLark has forgotten her locker comb...,"<Plan_3>\nBased on execution_2.1, execution_2....",2250,3.0
22800,4999,You are the planner of an AI system that follo...,<Question>\nLark has forgotten her locker comb...,<Plan_4>\nBased on execution_3.2:\nI think tha...,2250,4.0


In [92]:
check_df["PromptCount"] = check_df["ExpectedOutput"].str.count("<prompt")
check_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  check_df["PromptCount"] = check_df["ExpectedOutput"].str.count("<prompt")


Unnamed: 0,ProblemIdx,SystemPrompt,UserPrompt,ExpectedOutput,FinalAnswer,Phase,PromptCount
0,1,You are the planner of an AI system that follo...,<Question>\nSteve has an isosceles triangle wi...,"<Plan_1>\nOkay, so Steve has this isosceles tr...",22.21,1.0,1
2,1,You are the planner of an AI system that follo...,<Question>\nSteve has an isosceles triangle wi...,"<Plan_2>\nBased on execution_1.1:\nFirst, I ne...",22.21,2.0,5
8,1,You are the planner of an AI system that follo...,<Question>\nSteve has an isosceles triangle wi...,<Plan_3>\nBased on execution_2.1:\nSo each of ...,22.21,3.0,1
10,1,You are the planner of an AI system that follo...,<Question>\nSteve has an isosceles triangle wi...,"<Plan_4>\nBased on execution_3.1:\nNow, since ...",22.21,4.0,3
14,1,You are the planner of an AI system that follo...,<Question>\nSteve has an isosceles triangle wi...,"<Plan_5>\nBased on execution_4.2:\nSimilarly, ...",22.21,5.0,2
...,...,...,...,...,...,...,...
22791,4999,You are the planner of an AI system that follo...,<Question>\nLark has forgotten her locker comb...,"<Plan_1>\nFirst, I'll break it down into each ...",2250,1.0,1
22793,4999,You are the planner of an AI system that follo...,<Question>\nLark has forgotten her locker comb...,<Plan_2>\nBased on execution_1.1:\nStarting wi...,2250,2.0,3
22797,4999,You are the planner of an AI system that follo...,<Question>\nLark has forgotten her locker comb...,"<Plan_3>\nBased on execution_2.1, execution_2....",2250,3.0,2
22800,4999,You are the planner of an AI system that follo...,<Question>\nLark has forgotten her locker comb...,<Plan_4>\nBased on execution_3.2:\nI think tha...,2250,4.0,0


In [93]:
check_df["PromptCount"].value_counts()

PromptCount
1     2800
0     2451
2     1541
3      896
4      530
5      247
6      124
7       44
8       29
9       10
10       8
11       5
15       4
13       2
31       2
22       2
89       1
20       1
17       1
24       1
60       1
81       1
64       1
37       1
28       1
29       1
21       1
Name: count, dtype: int64

In [95]:
print(check_df[check_df["PromptCount"] == 10]["ExpectedOutput"].iloc[0])

<Plan_1>
Okay, so I need to find an integer n between 0 and 6 such that n is congruent to 100000 modulo 7. Hmm, let's see. Modular arithmetic, right? So this means when 100000 is divided by 7, the remainder should be n. But 100000 is a big number. Dividing that directly by 7 would be tedious. There must be a smarter way to do this without having to perform long division on such a large number.

First, I remember that in modular arithmetic, especially modulo 7, we can break down the number into smaller parts and take the modulus at each step to simplify calculations. Maybe using properties of exponents or something related to the divisibility rules for 7? Wait, the number here is 100000. Let's write that out as 10^5. So 10^5 modulo 7. That seems manageable.

But how does 10 modulo 7 behave? Let's start there.
10 divided by 7 is 1 with a remainder of 3. So 10 ≡ 3 mod 7.

Then, 10^2 would be (10)^2 ≡ 3^2 = 9 ≡ 2 mod 7 because 9 divided by 7 is 1 with a remainder of 2. Okay, so 10^2 ≡ 2 mo

In [None]:
def prepare_dataset_for_chat_instruct_full_trajectory_generator(df):
    def add_messages(row):
        row["messages"] = [
            {"role": "system", "content": row["SystemPrompt"]},
            {"role": "user", "content": row["UserPrompt"]},
            {"role": "assistant", "content": row["ExpectedOutput"]}
        ]
        return row

    df = df[df["ExpectedOutput"].str.contains("<Final_answer>")]
    system_prompt = text_processing_utils.read_file_as_string("src/prompts/global_agent_prompt.txt")
    df["Trajectory"] = df.apply(lambda x: x["UserPrompt"]+"\n\n"+x["ExpectedOutput"], axis=1)
    df["SystemPrompt"] = system_prompt

    del df["UserPrompt"]
    del df["ExpectedOutput"]

    dataset = Dataset.from_pandas(df)
    dataset = dataset.map(add_messages)
    return dataset, df