In [1]:
from unsloth import FastLanguageModel
import torch
from unsloth.chat_templates import get_chat_template, train_on_responses_only

model_name = "unsloth/Llama-3.2-1B-Instruct"
dtype = torch.bfloat16 
load_in_4bit = False 

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name, # or choose "unsloth/Llama-3.2-1B"
    # max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    trust_remote_code=True
)
model = FastLanguageModel.get_peft_model(
        model,
        r = 32, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
        target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                        "gate_proj", "up_proj", "down_proj",],
        lora_alpha = 64,
        lora_dropout = 0, # Supports any, but = 0 is optimized
        bias = "none",    # Supports any, but = "none" is optimized
        # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
        use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
        random_state = 3407,
        use_rslora = False,  # We support rank stabilized LoRA
        loftq_config = None, # And LoftQ
)

  from .autonotebook import tqdm as notebook_tqdm


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
Are you certain you want to do remote code execution?
==((====))==  Unsloth 2024.9.post4: Fast Llama patching. Transformers = 4.44.2.
   \\   /|    GPU: NVIDIA A100 80GB PCIe. Max memory: 79.325 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.4.1+cu121. CUDA = 8.0. CUDA Toolkit = 12.1.
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


Unsloth 2024.9.post4 patched 16 layers with 16 QKV layers, 16 O layers and 16 MLP layers.


In [8]:
from unsloth.chat_templates import get_chat_template, train_on_responses_only
from datasets import Dataset
import functools
import json
from finetune import _get_bfcl_tokenized_ds

tokenizer = get_chat_template(
        tokenizer,
        chat_template = "llama-3.1",
    )
train_data = []
with open('train.json', 'r') as file:
    for line in file:
        json_obj = json.loads(line.strip())
        json_obj['Functions'] = json_obj['Functions'][0] if isinstance(json_obj['Functions'],list) else json_obj['Functions']
        json_obj['Output'] = json_obj['Output'][0] if isinstance(json_obj['Output'],list) else json_obj['Output']
        train_data.append(json_obj)
with open('test.json', 'r') as file:
    test_data = json.load(file)
# train_data = _process_bfcl_train_data(train_data)

In [9]:
train_data[5000]

{'Instruction': 'Can you provide me with images and videos of a specific location with latitude 40.712776 and longitude -74.005974?\n',
 'Functions': "{'name': 'RapidAPI', 'api_name': 'requests.get', 'description': 'Geocoding places Info with images & videos.', 'parameters': [{'name': 'lat', 'description': 'Latitude in decimal degrees (wgs84)', 'type': 'NUMBER'}, {'name': 'lng', 'description': 'Longitude in decimal degrees (wgs84)', 'type': 'NUMBER'}, {'name': 'version', 'description': '', 'type': 'string'}, {'name': 'lang', 'description': 'Prefered language of content.', 'type': 'STRING'}]}\n",
 'Output': 'response = requests.get("https://geocoding-places.p.rapidapi.com/get_geocoding_images/v1", headers={"X-RapidAPI-Key": "SIGN-UP-FOR-KEY", "X-RapidAPI-Host": "geocoding-places.p.rapidapi.com"}, params={"lat": "40.712776", "lng": "-74.005974", "version": "v1", "lang": "en"})'}

In [10]:
train_ds = Dataset.from_list(train_data)
json_or_yaml = "json"
train_ds = train_ds.map(functools.partial(_get_bfcl_tokenized_ds,tokenizer=tokenizer,json_or_yaml=json_or_yaml),batched=True)

Map: 100%|██████████| 12125/12125 [00:02<00:00, 5737.67 examples/s]


In [12]:
print(train_ds['prompt'][500])

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 26 July 2024

You are an expert in composing functions. You are given a question and a set of possible functions. Based on the question, you will need to make one or more function/tool calls to achieve the purpose. If none of the function can be used, point it out. If the given question lacks the parameters required by the function, also point it out. You should only return the function call in tools call sections.
<|eot_id|><|start_header_id|>user<|end_header_id|>

#### Question: I want to export the current configuration of my Anthos cluster named "my-cluster" to the output directory "my-dir" using project "my-project".
Here is a list of functions that you can invoke:
{
 "name": "alpha anthos",
 "api_name": "gcloud.alpha.anthos.export",
 "description": "Export current configuration of an Anthos cluster",
 "parameters": [
  {
   "name": "cluster"
  },
  {
   "name": "project"

## REMAINING WORK
1. Evaluation for BFCL and xLAM dataset
2. finetuning scripts and integration with weights and biases

In [13]:
from unsloth.chat_templates import train_on_responses_only
from transformers import DataCollatorForSeq2Seq
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_ds,
    dataset_text_field = "prompt",
    max_seq_length = 3072,
    dataset_num_proc = 8,
    data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 32,
        gradient_accumulation_steps = 1,
        # warmup_steps = 5,
        num_train_epochs = 1, # Set this for 1 full training run.
        # max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "cosine",
        seed = 3407,
        output_dir = "outputs",
        save_safetensors=True
    ),
)
trainer = train_on_responses_only(
    trainer,
    instruction_part = "<|start_header_id|>system<|end_header_id|>\n\n",
    response_part = "<|start_header_id|>assistant<|end_header_id|>\n\n",
)
trainer_stats = trainer.train()

Map (num_proc=8): 100%|██████████| 12125/12125 [00:03<00:00, 3321.55 examples/s]
Map: 100%|██████████| 12125/12125 [00:02<00:00, 4117.46 examples/s]
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 12,125 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 32 | Gradient Accumulation steps = 1
\        /    Total batch size = 32 | Total steps = 379
 "-____-"     Number of trainable parameters = 22,544,384


Step,Training Loss
1,1.239
2,0.7833
3,0.8227
4,0.5844
5,0.3279
6,0.5394
7,0.47
8,0.5237
9,0.4468
10,0.4595


In [14]:
print(tokenizer.decode(trainer.train_dataset[100]["input_ids"]))

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 26 July 2024

You are an expert in composing functions. You are given a question and a set of possible functions. Based on the question, you will need to make one or more function/tool calls to achieve the purpose. If none of the function can be used, point it out. If the given question lacks the parameters required by the function, also point it out. You should only return the function call in tools call sections.
<|eot_id|><|start_header_id|>user<|end_header_id|>

#### Question: I want to replace null elements in two arrays, arr1 and arr2, with corresponding fill values fill1 and fill2.
Here is a list of functions that you can invoke:
{
 "name": "pyarrow",
 "api_name": "fill_null",
 "description": "Replace null elements in an array",
 "parameters": {
  "required": [
   {
    "name": "values",
    "description": "Array, ChunkedArray, or Scalar-like object. Each null element i

In [15]:
space = tokenizer(" ", add_special_tokens = False).input_ids[0]
tokenizer.decode([space if x == -100 else x for x in trainer.train_dataset[500]["labels"]])

'                                                                                                                                                                                                                                                                            \n\ngcloud.alpha.anthos.export("my-cluster", "--project=my-project", "--output-directory=my-dir")<|eot_id|>'

In [16]:
import json

with open('test.json', 'r') as file:
    test_data = json.load(file)

In [17]:
idx = 100
test_data[idx]['function']

{'name': 'Waterfall Finder',
 'api_call': 'waterfall.find_highest',
 'description': 'Find the highest waterfall in the world.',
 'parameters': {'type': 'object', 'properties': {}, 'required': []}}

In [18]:
test_data[idx]['model_answer']

'waterfall.find_highest()'

In [19]:
import yaml

yaml_func = yaml.dump(test_data[idx]['function'])

In [20]:
print(yaml_func)

api_call: waterfall.find_highest
description: Find the highest waterfall in the world.
name: Waterfall Finder
parameters:
  properties: {}
  required: []
  type: object



In [21]:
import finetune
idx = 10
prompt_messages = finetune._create_messages(
    test_data[idx]['question'],
    functions=json.dumps(test_data[idx]['function'],indent=1),
    # functions=yaml_func,
    output=""
)

In [22]:
prompt_messages

[{'role': 'system',
  'content': 'You are an expert in composing functions. You are given a question and a set of possible functions. Based on the question, you will need to make one or more function/tool calls to achieve the purpose. If none of the function can be used, point it out. If the given question lacks the parameters required by the function, also point it out. You should only return the function call in tools call sections.\n'},
 {'role': 'user',
  'content': '#### Question: Find the lyrics of the song \'Shape of You\' by Ed Sheeran.Here is a list of functions that you can invoke:\n{\n "name": "Lyrics Finder",\n "api_call": "lyrics.find",\n "description": "Retrieve the lyrics of a specific song.",\n "parameters": {\n  "type": "object",\n  "properties": {\n   "song": {\n    "type": "string",\n    "description": "The name of the song."\n   },\n   "artist": {\n    "type": "string",\n    "description": "The name of the artist."\n   }\n  },\n  "required": [\n   "song",\n   "artis

In [23]:
prompt = tokenizer.apply_chat_template(
    prompt_messages,
    tokenize=False,
    add_generation_prompt=True
)

In [24]:
print(prompt)

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 26 July 2024

You are an expert in composing functions. You are given a question and a set of possible functions. Based on the question, you will need to make one or more function/tool calls to achieve the purpose. If none of the function can be used, point it out. If the given question lacks the parameters required by the function, also point it out. You should only return the function call in tools call sections.
<|eot_id|><|start_header_id|>user<|end_header_id|>

#### Question: Find the lyrics of the song 'Shape of You' by Ed Sheeran.Here is a list of functions that you can invoke:
{
 "name": "Lyrics Finder",
 "api_call": "lyrics.find",
 "description": "Retrieve the lyrics of a specific song.",
 "parameters": {
  "type": "object",
  "properties": {
   "song": {
    "type": "string",
    "description": "The name of the song."
   },
   "artist": {
    "type": "string",
    "d

In [25]:
model.dtype, model.device

(torch.bfloat16, device(type='cuda', index=0))

In [26]:
model.config.torch_dtype = torch.bfloat16

In [27]:
import os
os.environ['CUDA_LAUNCH_BLOCKING']='1'

In [28]:
import unsloth
unsloth.FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    prompt
], return_tensors = "pt").to("cuda:0")

outputs = model.generate(**inputs, max_new_tokens = 128, use_cache = True, do_sample=False)
out = tokenizer.batch_decode(outputs)

In [29]:
print(out[0])

<|begin_of_text|><|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 26 July 2024

You are an expert in composing functions. You are given a question and a set of possible functions. Based on the question, you will need to make one or more function/tool calls to achieve the purpose. If none of the function can be used, point it out. If the given question lacks the parameters required by the function, also point it out. You should only return the function call in tools call sections.
<|eot_id|><|start_header_id|>user<|end_header_id|>

#### Question: Find the lyrics of the song 'Shape of You' by Ed Sheeran.Here is a list of functions that you can invoke:
{
 "name": "Lyrics Finder",
 "api_call": "lyrics.find",
 "description": "Retrieve the lyrics of a specific song.",
 "parameters": {
  "type": "object",
  "properties": {
   "song": {
    "type": "string",
    "description": "The name of the song."
   },
   "artist": {
    "type":

In [30]:
from finetune import parse_python_function_call

# parse_python_function_call(out[0].)
python_output = out[0][out[0].rfind("<|end_header_id|>",1)+19:-10].strip()

In [31]:
python_output

'lyrics.find(song="Shape of You", artist="Ed Sheeran")'

In [32]:
import ast

def process_ast_node(node):
    if isinstance(node, (ast.Constant, ast.Constant, ast.Constant)):
        return node.value
    elif isinstance(node, ast.List):
        return [process_ast_node(elt) for elt in node.elts]
    else:
        return ast.unparse(node)

def parse_python_function_call(call_str):
    tree = ast.parse(call_str)
    expr = tree.body[0].value

    def extract_function_name(node):
        if isinstance(node, ast.Name):
            return node.id
        elif isinstance(node, ast.Attribute):
            return f"{extract_function_name(node.value)}.{node.attr}"
        else:
            return ast.unparse(node)
    # return expr
    function_name = extract_function_name(expr.func)

    parameters = {}
    noNameParam = []

    # Process positional arguments
    for arg in expr.args:
        noNameParam.append(process_ast_node(arg))

    # Process keyword arguments
    for kw in expr.keywords:
        parameters[kw.arg] = process_ast_node(kw.value)

    if noNameParam:
        parameters["None"] = noNameParam
        
    function_dict = {"name": function_name, "arguments": parameters}
    return function_dict

# Test the function
# call_str = "gcloud.active-directory.domains.trusts.update(DOMAIN='my-other-domain.com', target_dns_ip_addresses=['10.177.0.3'], target_domain_name='my-target-domain.com')"
result = parse_python_function_call(python_output)
print(result)

{'name': 'lyrics.find', 'arguments': {'song': 'Shape of You', 'artist': 'Ed Sheeran'}}


In [33]:
parse_python_function_call(test_data[11]['model_answer'])

{'name': 'news.get_headlines', 'arguments': {'source': 'CNN'}}

In [39]:

po = 'lyrics.find(song="Shape of You", artist="EdSheeran")'

In [37]:
def evaluation(model_answer,gt_answer):
    model_answer = parse_python_function_call(model_answer)
    gt_answer = parse_python_function_call(gt_answer)

    if model_answer['name'] != gt_answer['name']:
        return 0.0
    args_score = 0
    for model_answer_arg,model_answer_val in model_answer['arguments'].items():
        if model_answer_arg not in gt_answer['arguments'] or gt_answer['arguments'][model_answer_arg] != model_answer_val:
            args_score+=0
        else:
            args_score+=1
    return args_score/len(model_answer['arguments'])
evaluation(python_output,test_data[10]['model_answer'])

1.0

In [40]:
evaluation(po,test_data[10]['model_answer'])

0.5