In [1]:
import os
import sys
import torch
import importlib

from peft import LoraConfig
from dotenv import load_dotenv
from transformers import AutoModelForCausalLM, AutoTokenizer
from datasets import Dataset

In [2]:
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

# Load environment variables
load_dotenv()

device = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'
device

'mps'

In [4]:
import src.train
import src.model
import data.countdown

importlib.reload(src.train)
importlib.reload(src.model)
importlib.reload(data.countdown)

from src.train import sft_train_lora
from src.model import identify_target_modules
from data.countdown import Countdown
from data.format import chat_format_qa_instance, lm_format_qa_instance

In [None]:
dataset = Countdown(json_file=os.environ["COUNTDOWN_DATASET"])

use_chat_format = True

In [6]:
use_chat_format

True

In [7]:
few_shot_example = dataset[0]
few_shot_prompt = (
    f"Here is an example:\n"
    f"{few_shot_example['question']}"
    f"{few_shot_example['answer']}"
)

In [8]:
few_shot_example

{'question': 'The target is: 100\nThe available numbers are: [70, 63, 75, 32]\nDescribe how to reach the target using the given numbers.',
 'answer': "The search path used for this problem was: Current State: 100:[70, 63, 75, 32], Operations: []\nExploring Operation: 75-70=5, Resulting Numbers: [63, 32, 5]\nGenerated Node #0,0: 100:[63, 32, 5] Operation: 75-70=5\nMoving to Node #0,0\nCurrent State: 100:[63, 32, 5], Operations: ['75-70=5']\nExploring Operation: 63+32=95, Resulting Numbers: [5, 95]\nGenerated Node #0,0,0: 100:[5, 95] Operation: 63+32=95\nMoving to Node #0,0,0\nCurrent State: 100:[5, 95], Operations: ['75-70=5', '63+32=95']\nExploring Operation: 5+95=100, Resulting Numbers: [100]\n100,100 equal: Goal Reached\n\nThe optimal path was: Current State: 100:[70, 63, 75, 32], Operations: []\nExploring Operation: 70-63=7, Resulting Numbers: [75, 32, 7]\nGenerated Node #2: [75, 32, 7] from Operation: 70-63=7\nCurrent State: 100:[75, 32, 7], Operations: ['70-63=7']\nExploring Opera

In [9]:
few_shot_prompt

"Here is an example:\nThe target is: 100\nThe available numbers are: [70, 63, 75, 32]\nDescribe how to reach the target using the given numbers.The search path used for this problem was: Current State: 100:[70, 63, 75, 32], Operations: []\nExploring Operation: 75-70=5, Resulting Numbers: [63, 32, 5]\nGenerated Node #0,0: 100:[63, 32, 5] Operation: 75-70=5\nMoving to Node #0,0\nCurrent State: 100:[63, 32, 5], Operations: ['75-70=5']\nExploring Operation: 63+32=95, Resulting Numbers: [5, 95]\nGenerated Node #0,0,0: 100:[5, 95] Operation: 63+32=95\nMoving to Node #0,0,0\nCurrent State: 100:[5, 95], Operations: ['75-70=5', '63+32=95']\nExploring Operation: 5+95=100, Resulting Numbers: [100]\n100,100 equal: Goal Reached\n\nThe optimal path was: Current State: 100:[70, 63, 75, 32], Operations: []\nExploring Operation: 70-63=7, Resulting Numbers: [75, 32, 7]\nGenerated Node #2: [75, 32, 7] from Operation: 70-63=7\nCurrent State: 100:[75, 32, 7], Operations: ['70-63=7']\nExploring Operation: 7

In [10]:
def format_with_few_shot(example, use_chat_format=True):
    task_description = (
        "You are tasked to solve arithmetic reasoning problems. "
        "Given a set of numbers and a target, describe the steps in the path to reach the target using those numbers."
    )
    guidelines = (
        "Using arithmetic operations such as addition (+), subtraction (-), multiplication (*) and division (/), "
        "use the initial set of numbers to gather new numbers that eventually reach the target in the end."
    )

    # Format the dataset using the appropriate format
    if use_chat_format:
        return [
                {"role": "user", "content": f"{task_description}\n{guidelines}\n{few_shot_prompt}\n{example["question"]}"},
                {"role": "assistant", "content": example["answer"]}
            ]
        
    else:
        return (
            f"### Question {task_description}\n{guidelines}\n{few_shot_prompt}\n{example["question"]}\n"
            f"### Answer {example["answer"]}"
        )

In [11]:
formatted_data = [format_with_few_shot(example, use_chat_format) for example in dataset]

In [12]:
if use_chat_format:
    MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct"
else:
    MODEL_NAME = "facebook/opt-125m"

In [13]:
MODEL_NAME

'meta-llama/Llama-3.2-1B-Instruct'

In [None]:
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, token=os.environ["HF_TOKEN"])
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=os.environ["HF_TOKEN"])

# Create a Dataset object with formatted text
dataset = Dataset.from_dict({"chat": formatted_data})
dataset = dataset.map(
    lambda x: {"formatted_text": tokenizer.apply_chat_template(x["chat"], tokenize=False, add_generation_prompt=False)})

Map:   0%|          | 0/172552 [00:00<?, ? examples/s]

In [15]:
dataset[0]

{'chat': [{'content': "You are tasked to solve arithmetic reasoning problems. Given a set of numbers and a target, describe the steps in the path to reach the target using those numbers.\nUsing arithmetic operations such as addition (+), subtraction (-), multiplication (*) and division (/), use the initial set of numbers to gather new numbers that eventually reach the target in the end.\nHere is an example:\nThe target is: 100\nThe available numbers are: [70, 63, 75, 32]\nDescribe how to reach the target using the given numbers.The search path used for this problem was: Current State: 100:[70, 63, 75, 32], Operations: []\nExploring Operation: 75-70=5, Resulting Numbers: [63, 32, 5]\nGenerated Node #0,0: 100:[63, 32, 5] Operation: 75-70=5\nMoving to Node #0,0\nCurrent State: 100:[63, 32, 5], Operations: ['75-70=5']\nExploring Operation: 63+32=95, Resulting Numbers: [5, 95]\nGenerated Node #0,0,0: 100:[5, 95] Operation: 63+32=95\nMoving to Node #0,0,0\nCurrent State: 100:[5, 95], Operati

In [16]:
len(dataset)

172552

In [17]:
target_modules = identify_target_modules(model, name_segment='self_attn')
print(target_modules)

['model.layers.0.self_attn.q_proj', 'model.layers.0.self_attn.k_proj', 'model.layers.0.self_attn.v_proj', 'model.layers.0.self_attn.o_proj', 'model.layers.1.self_attn.q_proj', 'model.layers.1.self_attn.k_proj', 'model.layers.1.self_attn.v_proj', 'model.layers.1.self_attn.o_proj', 'model.layers.2.self_attn.q_proj', 'model.layers.2.self_attn.k_proj', 'model.layers.2.self_attn.v_proj', 'model.layers.2.self_attn.o_proj', 'model.layers.3.self_attn.q_proj', 'model.layers.3.self_attn.k_proj', 'model.layers.3.self_attn.v_proj', 'model.layers.3.self_attn.o_proj', 'model.layers.4.self_attn.q_proj', 'model.layers.4.self_attn.k_proj', 'model.layers.4.self_attn.v_proj', 'model.layers.4.self_attn.o_proj', 'model.layers.5.self_attn.q_proj', 'model.layers.5.self_attn.k_proj', 'model.layers.5.self_attn.v_proj', 'model.layers.5.self_attn.o_proj', 'model.layers.6.self_attn.q_proj', 'model.layers.6.self_attn.k_proj', 'model.layers.6.self_attn.v_proj', 'model.layers.6.self_attn.o_proj', 'model.layers.7.sel

In [18]:
lora_config = LoraConfig(
    target_modules=target_modules,
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
)

In [19]:
sft_train_lora(
    base_model=model,
    train_dataset=dataset,
    eval_dataset=dataset,
    tokenizer=tokenizer,
    adapter_name="sft_lora",
    response_template="### Answer:",
    lora_config=lora_config,
)

Map:   0%|          | 0/172552 [00:00<?, ? examples/s]

Map:   0%|          | 0/172552 [00:00<?, ? examples/s]



RuntimeError: WandbCallback requires wandb to be installed. Run `pip install wandb`.