In [None]:
import torch

In [None]:
torch.cuda.is_available()

In [None]:
from huggingface_hub import login
token = "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"
login(token)

In [None]:
!pip3 install datasets
!pip install -q -U bitsandbytes
!pip install -q -U git+https://github.com/huggingface/peft.git
!pip install -q -U git+https://github.com/huggingface/accelerate.git
!pip install -q -U git+https://github.com/lvwerra/trl.git

In [None]:
from datasets import load_dataset

ds = load_dataset("0xayman/single-function-calls-dataset-28K")
ds

In [None]:
model = "0xayman/phi3.5-mini-instruct-fc-v2"

In [None]:
from peft import get_peft_model
import torch
import transformers
from peft import LoraConfig
from transformers import AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer
from transformers import AutoTokenizer

In [None]:
lora_config = LoraConfig.from_pretrained(model)

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model)

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    lora_config.base_model_name_or_path,
    quantization_config=bnb_config,
    device_map={"":0})

In [None]:
model = get_peft_model(model, lora_config)

In [None]:
from IPython.display import display, Markdown

In [None]:
import json

In [None]:
from transformers import pipeline

In [None]:
!pip install lm-format-enforcer
!pip install transformers torch lm-format-enforcer huggingface_hub optimum
!pip install auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/

In [None]:
from pydantic import BaseModel
from lmformatenforcer import JsonSchemaParser
from lmformatenforcer.integrations.transformers import build_transformers_prefix_allowed_tokens_fn

In [None]:
# [{"name": "linear_regression_prediction", "arguments": {"x_values": [1, 2, 3, 4, 5], "y_values": [2, 4, 6, 8, 10], "target_x": 7}}]

class AnswerSchema(BaseModel):
  name: str
  arguments: dict

In [None]:
pipe = pipeline('text-generation', model=model, tokenizer=tokenizer)

parser = JsonSchemaParser(AnswerSchema.schema())
prefix_function = build_transformers_prefix_allowed_tokens_fn(pipe.tokenizer, parser)

In [None]:
def make_inference(query, tools):

    prompt = f"""Your task is to select one of the provided functions to answer the user question.
    you should select the most relevant function and extract its arguements from the user's question.\n

    You have access to the following functions:\n
    {tools} \n

    Your should follow the following rules:\n
    1. Your output should ALWAYS be a valid JSON object.\n
    2. Do not make up information.\n
    3. You cannot use functions that are not provided for you.\n
    4. You should pick up only one function to answer the user question.\n
    5. Do not add any extra text before or after the JSON object.\n
    6. Answer only the provided question. do not make up question.\n
    7. Do not repeat the answer. \n

    Your output json object should follow this schema:\n
    {AnswerSchema.schema_json()}
    Begin!\n

    Question: {query} \n
    Answer:
    """

    output_dict = pipe(prompt, prefix_allowed_tokens_fn=prefix_function, max_new_tokens=200)

    result = output_dict[0]['generated_text'][len(prompt):]
    result = result.strip()
    return result
    # inputs = tokenizer(prompt, return_tensors="pt", return_token_type_ids=False).to("cuda")
    # outputs = model.generate(
    #     **inputs,
    #     max_new_tokens=200,
    #     temperature=0.1,
    # )

    # prompt_length = inputs['input_ids'].shape[1]

    # response = tokenizer.decode(outputs[0][prompt_length:], skip_special_tokens=True)
    # return response
    # # return response
    # # extract the json answer from the response using regex
    # import re
    # # The answer should appear after "Answer:"
    # # Answer should be json object
    # json_pattern = r'Answer:\s*(\{.*?\})'
    # json_match = re.search(json_pattern, response, re.DOTALL)

    # if json_match:
    #   json_str = json_match.group(1)
    #   return auto_correct_json(json_str)

    # else:
    #   return "No valid JSON object found in the response."

In [None]:
query = ds['train'][4515]['query']
tools = ds['train'][4515]['tools']
answers = ds['train'][4515]['answers']

answers

In [None]:
response = make_inference(query, tools)
response

In [None]:
json.loads(response)

In [None]:
import random
random_examples = random.sample(list(ds['train']), 1000)

In [None]:
from tqdm import tqdm

In [None]:
def measure_success_rate(examples):
    success_count = 0
    correct_count = 0
    wrong_json = []
    wrong_function = []
    for example in tqdm(examples, desc="Processing examples", unit="example"):
        query = example['query']
        tools = example['tools']

        try:
            response = make_inference(query, tools)
            response = json.loads(response)
            success_count += 1
        except:
            wrong_json.append(example)
            continue

        answer = json.loads(example['answers'])[0]
        
        try:
            if response['name'] == answer['name']:
                all_args_match = True
                for arg in response['arguments']:
                    if response['arguments'][arg] != answer['arguments'][arg]:
                        all_args_match = False
                        break
                
                if all_args_match:
                    correct_count += 1
        except:
            wrong_function.append(example)
            continue
        
    valid_json_rate = success_count / len(examples)
    success_rate_overall = correct_count / len(examples) 

    return valid_json_rate, success_rate_overall, wrong_json, wrong_function

In [None]:
valid_json_rate, success_rate_overall, wrong_json, wrong_function = measure_success_rate(random_examples)

In [None]:
valid_json_rate, success_rate_overall

In [None]:
len(wrong_function)

In [None]:
wrong_function