## 1. Prepare training data

In [1]:
import json
import os

from tqdm import tqdm

from tapeagents.core import TrainingText
from math_agent import ActionExecutionFailure, MathAgent, MathTape
from tapeagents.llms import LLAMA

In [2]:
# We need the agent to cut tapes into training samples
agent = MathAgent(
    llms={
        "default": LLAMA(
            base_url="https://api.together.xyz",
            model_name="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
            tokenizer_name="meta-llama/Meta-Llama-3.1-8B-Instruct",
        )
    }
)

In [6]:
def get_training_samples_from_tapes(tapes_path: str) -> list[TrainingText]:
    """
    Make training samples from tapes that were solved successfully,
    does not contain action execution failures, and do not have repeated steps.
    """
    training_samples: list[TrainingText] = []
    failures = 0
    not_solved = 0
    with_duplicates = 0
    tape_files = [os.path.join(tapes_path, f) for f in os.listdir(tapes_path) if f.endswith(".json")]
    for tape_file in tqdm(tape_files):
        with open(tape_file) as f:
            tape_dict = json.load(f)
        tape = MathTape.model_validate(tape_dict)
        step_types = set(type(step) for step in tape)

        # detect repeated steps
        last_step_view = None
        duplicate = False
        for step in tape:
            view = step.llm_view()
            if view == last_step_view:
                duplicate = True
                break
            last_step_view = view
        if duplicate:
            with_duplicates += 1
            continue

        if ActionExecutionFailure in step_types:
            failures += 1
            continue
        if not tape.metadata.result["solved"]:
            not_solved += 1
            continue
        for sample in agent.make_training_data(tape):
            training_samples.append(sample)
    print(f"Skipped failures: {failures}")
    print(f"Skipped not solved: {not_solved}")
    print(f"Skipped with duplicates: {with_duplicates}")
    print(f"Training samples: {len(training_samples)}")
    return training_samples

In [None]:
tapes_path = "gsm8k/tuning/llama31_70b_train_t02/tapes"
training_samples = get_training_samples_from_tapes(tapes_path)

In [None]:
print(training_samples[0].prompt_str)
print("=====")
print(training_samples[0].completion_str)

In [None]:
# We can store the training samples in a file and load them later:

# from tapeagents.finetune.data import load_samples, save_samples

# train_samples_file = "/gsm8k/tuning/llama31_70b_train_t02/training_samples_3k.jsonl"
# save_samples(training_samples, train_samples_file)

# training_samples = load_samples(train_samples_file)

print(len(training_samples))

## 2. Finetune Llama model on the training data

In [9]:
from tapeagents.finetune.finetune import load_config, run_finetuning_loop

In [None]:
# Load and inspect lora tuning config for Llama 3.1 8B
cfg = load_config("llama31_8b")
dict(cfg.finetune)

In [None]:
run_finetuning_loop(cfg=cfg, training_samples=training_samples)