In [1]:
import json
import torch
import openai
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, TextStreamer
from pprint import pprint

## Hosted Gorilla Model

In [2]:
# def get_gorilla_response(prompt, functions=[], model="gorilla-openfunctions-v0"):
#     openai.api_key = "EMPTY"
#     openai.api_base = "http://luigi.millennium.berkeley.edu:8000/v1"
#     # openai.api_base = "https://limcheekin-gorilla-openfunctions-v1-gguf.hf.space/v1"
#     try:
#         completion = openai.ChatCompletion.create(
#             model=model,
#             temperature=0.0,
#             messages=[{"role": "user", "content": prompt}],
#             functions=functions,
#         )
#         return completion.choices[0].message.content
#     except Exception as e:
#         print(e, model, prompt)

## Local Model

In [3]:
# Device setup
# device: str = "cuda:1"
torch_dtype = torch.float32

In [4]:
_MODEL_PATH = "gorilla-openfunctions-v0"

In [5]:
tokenizer = AutoTokenizer.from_pretrained(_MODEL_PATH)
model = AutoModelForCausalLM.from_pretrained(
    _MODEL_PATH,
    device_map="auto",
    torch_dtype=torch_dtype,
    low_cpu_mem_usage=True
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [6]:
model.hf_device_map

{'model.embed_tokens': 0,
 'model.layers.0': 0,
 'model.layers.1': 0,
 'model.layers.2': 0,
 'model.layers.3': 0,
 'model.layers.4': 0,
 'model.layers.5': 0,
 'model.layers.6': 0,
 'model.layers.7': 0,
 'model.layers.8': 0,
 'model.layers.9': 0,
 'model.layers.10': 0,
 'model.layers.11': 0,
 'model.layers.12': 0,
 'model.layers.13': 0,
 'model.layers.14': 0,
 'model.layers.15': 0,
 'model.layers.16': 1,
 'model.layers.17': 1,
 'model.layers.18': 1,
 'model.layers.19': 1,
 'model.layers.20': 1,
 'model.layers.21': 1,
 'model.layers.22': 1,
 'model.layers.23': 1,
 'model.layers.24': 1,
 'model.layers.25': 1,
 'model.layers.26': 1,
 'model.layers.27': 1,
 'model.layers.28': 1,
 'model.layers.29': 1,
 'model.layers.30': 1,
 'model.layers.31': 1,
 'model.norm': 1,
 'lm_head': 1}

In [7]:
# streamer = TextStreamer(tokenizer=tokenizer, skip_prompt=True)

In [8]:
# pipe = pipeline(
#     "text-generation",
#     model=model,
#     tokenizer=tokenizer,
#     max_new_tokens=128,
#     batch_size=16,
#     torch_dtype=torch_dtype,
#     streamer=streamer,
# )

In [9]:
def get_prompt(user_query: str, functions: list = []) -> str:
    return f"USER: <<question>> {user_query} <<function>> {json.dumps(functions)}\nASSISTANT: "

In [10]:
query = 'call an ola cab from bhajanpura to nodia sector 62 in twenty two minutes.'
# query = 'call an ola cab from bhajanpura to nodia sector 62 in twenty two minutes and then from uber for the same route in 10 minutes.'
functions = {
        "name": "Call Cab Function",
        "api_name": "cab.ride",
        "description": "Find suitable ride for customers given the location, type of ride, and the amount of time the customer is willing to wait as parameters.",
        "parameters":  {
            "start_loc": {
                "type": "string",
                # "default": "current_location",
                "description": "location of the starting place of the uber ride"
            },
            "end_loc": {
                "type": "string",
                "description": "location of the ending place of the uber ride"
            },
            "type": {
                "type": "string",
                "enum": ["normal", "plus", "premium"],
                # "default": "normal",
                "description": "types of uber ride user is ordering"
            },
            "time": {
                "type": "integer",
                # "default": "now",
                "description": "the amount of time in minutes the customer is willing to wait"
            },
            "platform": {
                "type": "string",
                # "default": "uber",
                "enum": ["uber", "ola"],
                "description": "the platform the user is ordering the ride from"
            }
        },
        "required": ["start_loc", "end_loc", "type", "time", "platform"]
    }

In [11]:
# model.generation_config.temperature = 0.2
# model.generation_config.temperature = 0.01

In [26]:
prompt = get_prompt(query, functions)
prompt

'USER: <<question>> call an ola cab from bhajanpura to nodia sector 62 in twenty two minutes. <<function>> {"name": "Call Cab Function", "api_name": "cab.ride", "description": "Find suitable ride for customers given the location, type of ride, and the amount of time the customer is willing to wait as parameters.", "parameters": {"start_loc": {"type": "string", "description": "location of the starting place of the uber ride"}, "end_loc": {"type": "string", "description": "location of the ending place of the uber ride"}, "type": {"type": "string", "enum": ["normal", "plus", "premium"], "description": "types of uber ride user is ordering"}, "time": {"type": "integer", "description": "the amount of time in minutes the customer is willing to wait"}, "platform": {"type": "string", "enum": ["uber", "ola"], "description": "the platform the user is ordering the ride from"}}, "required": ["start_loc", "end_loc", "type", "time", "platform"]}\nASSISTANT: '

In [27]:
# pipe(prompt, max_new_tokens=512, return_full_text=False, do_sample=True)
# prompt_ids = tokenizer.encode(prompt, return_tensors="pt")
tokenized_prompt = tokenizer([prompt], return_tensors="pt")
if tokenized_prompt['input_ids'].device != model.device:
    print(f"Moving prompt to {model.device}")
    prompt_ids = tokenized_prompt['input_ids'].to(model.device)
    attention_mask = tokenized_prompt['attention_mask'].to(model.device)

Moving prompt to cuda:0


In [28]:
prompt_ids, attention_mask

(tensor([[    1,  3148,  1001, 29901,  3532, 12470,  6778,  1246,   385,   288,
            433,  7776,   515,   289, 29882,  1175,   273, 29886,  2002,   304,
          18778,   423, 17535, 29871, 29953, 29906,   297, 10081,  1023,  6233,
          29889,  3532,  2220,  6778,  8853,   978,  1115,   376,  5594, 11680,
           6680,   613,   376,  2754, 29918,   978,  1115,   376, 29883,   370,
          29889,  2426,   613,   376,  8216,  1115,   376, 12542, 13907, 22203,
            363, 20330,  2183,   278,  4423, 29892,  1134,   310, 22203, 29892,
            322,   278,  5253,   310,   931,   278, 11962,   338, 17762,   304,
           4480,   408,  4128, 19602,   376, 16744,  1115,  8853,  2962, 29918,
           2029,  1115,  8853,  1853,  1115,   376,  1807,   613,   376,  8216,
           1115,   376,  5479,   310,   278,  6257,  2058,   310,   278,   318,
            495, 22203, 10758,   376,   355, 29918,  2029,  1115,  8853,  1853,
           1115,   376,  1807,   613,   

In [35]:
model

AttributeError: 'function' object has no attribute 'parameters'

In [30]:
output = model.generate(
    prompt_ids,
    # max_length=512,
    # do_sample=True,
    do_sample=False,
    # top_p=0.9,
    # top_k=0,
    # temperature=0.01,
    # num_return_sequences=1,
    pad_token_id=tokenizer.eos_token_id,
    eos_token_id=tokenizer.eos_token_id,
    # no_repeat_ngram_size=3,
    # repetition_penalty=2.0,
    # length_penalty=1.0,
    num_beams=1,
    # early_stopping=True,
    # use_cache=True,
    # bad_words_ids=[[tokenizer.eos_token_id]]
)

In [25]:
output_str = tokenizer.decode(output[0], skip_special_tokens=True)
output_str

'USER: <<question>> List all the devices in the house. <<function>> [{"name": "Check Battery", "api_name": "devices.battery", "description": "Check the battery of a device", "parameters": {"devices": {"name": "devices", "description": "list of devices to check the battery of", "value": "list"}}, "required": ["devices"]}, {"name": "Get Devices", "api_name": "devices.get", "description": "Get the list of devices", "parameters": [], "returns": "list of devices"}]\nASSISTANT:  devices.battery()'

In [17]:
# query2 = 'List all the devices in the house and their battery percentage.'
query2 = 'List all the devices in the house.'
functions2 = [
    {
        "name": "Check Battery",
        "api_name": "devices.battery",
        "description": "Check the battery of a device",
        "parameters":  {
            "devices": {
                "name": "devices",
                "description": "list of devices to check the battery of",
                "value": "list",
                # "default": "all"
            },
        },
        # "returns": "battery percentage of the device"
        "required": ["devices"]
    },
    {
        "name": "Get Devices",
        "api_name": "devices.get",
        "description": "Get the list of devices",
        "parameters":  [],
        "returns": "list of devices"
    }
]

In [18]:
prompt2 = get_prompt(query2, functions2)
prompt2

'USER: <<question>> List all the devices in the house. <<function>> [{"name": "Check Battery", "api_name": "devices.battery", "description": "Check the battery of a device", "parameters": {"devices": {"name": "devices", "description": "list of devices to check the battery of", "value": "list"}}, "required": ["devices"]}, {"name": "Get Devices", "api_name": "devices.get", "description": "Get the list of devices", "parameters": [], "returns": "list of devices"}]\nASSISTANT: '

In [19]:
tokenized_prompt2 = tokenizer([prompt2], return_tensors="pt")
if tokenized_prompt2['input_ids'].device != model.device:
    print(f"Moving prompt to {model.device}")
    prompt_ids2 = tokenized_prompt2['input_ids'].to(model.device)
    attention_mask2 = tokenized_prompt2['attention_mask'].to(model.device)

Moving prompt to cuda:0


In [20]:
attention_mask2

tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]],
       device='cuda:0')

In [21]:
output = model.generate(
    prompt_ids2,
    # max_length=512,
    # do_sample=True,
    do_sample=False,
    # top_p=0.9,
    # top_k=0,
    # temperature=0.01,
    # num_return_sequences=1,
    pad_token_id=tokenizer.eos_token_id,
    eos_token_id=tokenizer.eos_token_id,
    # no_repeat_ngram_size=3,
    # repetition_penalty=2.0,
    # length_penalty=1.0,
    num_beams=1,
    # early_stopping=True,
    # use_cache=True,
    # bad_words_ids=[[tokenizer.eos_token_id]]
)

In [22]:
output_str = tokenizer.decode(output[0], skip_special_tokens=True)
output_str

'USER: <<question>> List all the devices in the house. <<function>> [{"name": "Check Battery", "api_name": "devices.battery", "description": "Check the battery of a device", "parameters": {"devices": {"name": "devices", "description": "list of devices to check the battery of", "value": "list"}}, "required": ["devices"]}, {"name": "Get Devices", "api_name": "devices.get", "description": "Get the list of devices", "parameters": [], "returns": "list of devices"}]\nASSISTANT:  devices.battery()'