##HOW TO RUN

Run the 3 sections
  - Importing and loading the data
  - Loading the model
  - Functions
   
   
Provide the custom query in the Inference section and use predict_answer to get the solution

# Importing and loading the data

In [None]:
!pip install transformers ctransformers[cuda] sentence-transformers faiss-cpu accelerate bitsandbytes -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m30.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.6/17.6 MB[0m [31m63.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m265.7/265.7 kB[0m [31m29.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.6/92.6 MB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import torch
import json
from transformers import AutoModelForCausalLM, AutoTokenizer

In [None]:
!wget -O tools.json "https://drive.usercontent.google.com/download?id=1RSC_94KKmvtWnI15RVZkLCy4eTVhxGU_&export=download&authuser=0&confirm=t&uuid=394a0e2f-bb02-4027-8224-ef451c182f49&at=APZUnTVfd2focJ6u3DYRiDeKWBUr:1702403341998"
!wget -O examples.json "https://drive.usercontent.google.com/download?id=1n9eede9tNiqfPfWyc3lx1AHgqJgxLFQ5&export=download&authuser=0&confirm=t&uuid=eabc4504-aca7-4dfc-a6d9-52d357149ee6&at=APZUnTWmVVzr0utprDNGk8VXTU6f:1702403392202"
!wget -O claude_examples_ps_tools_all.json "https://drive.usercontent.google.com/download?id=17orzUW3_n31pZFyL0F-ekGH5iKlh2SUX&export=download&authuser=0&confirm=t&uuid=61097f63-24fe-4c51-9710-77d7fd204c2b&at=APZUnTXUxVA_067GuWmlaIrm74Io:1702403397617"
with open('/content/examples.json', 'r') as f:
    query_data = json.load(f)

with open('/content/tools.json', 'r') as f:
    tool_data = json.load(f)

with open('/content/claude_examples_ps_tools_all.json', 'r') as f:
    tool_query_data = json.load(f)

# Loading the model

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "HuggingFaceH4/zephyr-7b-beta"
prompt = "Tell me about gravity"

model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", load_in_4bit=True)
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
model_inputs = tokenizer(prompt, return_tensors="pt").to("cuda:0")

output = model.generate(**model_inputs)

print(tokenizer.decode(output[0], skip_special_tokens=True))

#Functions

In [None]:
def simple_prompt(tool_data,query_data,query):
  prompt_template=f"""
  You are given the following TOOLS with their DESCRIPTIONS, ARGUMENTS to the tool in question with their DESCRIPTIONS.
  {tool_data}.
   To reference the value of the ith tool in the chain, use $$PREV[i] as argument value, where i refers to the output of the tool positioned in the ith place in the JSON.
   EXAMPLE QUERY 1: {query_data[0]['query']}
   ANSWER: {query_data[0]['answer']}
   EXAMPLE QUERY 2: {query_data[3]['query']}
   ANSWER: {query_data[3]['answer']}
   EXAMPLE QUERY 3: {query_data[2]['query']}
   ANSWER: {query_data[2]['answer']}
  The above are solutions to some queries.
  It is your task to answer the following QUERY. Think step by step. ONLY OUTPUT THE ANSWER TO THE QUERY AND THE ANSWER SHOULD BE IN JSON FORMAT. DO NOT DEVIATE from the instructions and keep in mind that the output answer should be in correct json format.
   QUERY: {query}."""
  return prompt_template

In [None]:
def prompt_template(query):
  return f"""<|system|>
You are a highly intelligent AI assistant that can help users with questions that can be answered by the use of a set of TOOLS. You specialize in identifying tools with arguments to solve the query.
<|user|>
{simple_prompt(tool_data,query_data,query)}
<|assistant|>
"""

In [None]:
# Python program for insert and search
# operation in a Trie
import numpy as np

class TrieNode:
    def __init__(self):
        self.children = [None]*len(tokenizer.vocab)

        # isEndOfWord is True if node represent the end of the word
        self.isEndOfWord = False

class Trie:

    # Trie data structure class
    def __init__(self):
        self.root = self.getNode()

    def getNode(self):

        return TrieNode()

    def _tokToIndex(self,ch):

        return ch


    def insert(self,key):

        pCrawl = self.root
        tokens = tokenizer(key)['input_ids'][1:]

        for token in tokens:
          if not pCrawl.children[token]:
            pCrawl.children[token] = self.getNode()
          pCrawl = pCrawl.children[token]

        pCrawl.isEndOfWord = True

    def get(self, key):
        pCrawl = self.root
        tokens = tokenizer(key)['input_ids'][1:]
        for token in tokens:
          if not pCrawl.children[token]:
            return False
          pCrawl=pCrawl.children[token]

        indexes=np.argwhere(np.array(pCrawl.children)).flatten()
        l=[]
        for index in indexes:
          l.append(tokenizer.decode(index))
        return l

In [None]:
tool_name_trie = Trie()
for tool in tool_data['tools']:
  tool_name_trie.insert(tool['tool_name'])

In [None]:
argument_name_trie={}
for tool in tool_data["tools"]:
  argument_name_trie[tool["tool_name"]]=Trie()
  for argument in tool['argument_list']:
    argument_name_trie[tool["tool_name"]].insert(argument['argument_name'])

In [None]:
def predict(input, next_tokens = None):
  torch.cuda.empty_cache()
  model_inputs=input
  if next_tokens:
    logits =  model.generate(input_ids = model_inputs.input_ids, attention_mask = model_inputs.attention_mask, max_new_tokens = 1, output_scores = True, return_dict_in_generate=True).scores[0][0].cpu()
    return torch.argmax(logits[next_tokens], dim = -1).cpu()
  return tokenizer.decode(model.generate(input_ids = model_inputs.input_ids, attention_mask = model_inputs.attention_mask).cpu()[0])

In [None]:
def predict(input, next_tokens = None):
  torch.cuda.empty_cache()
  model_inputs=input
  if next_tokens:
    def restrict_decode_vocab(batch_id, input_id):
      return next_tokens
    logits =  model.generate(max_new_tokens=1, input_ids = model_inputs.input_ids, attention_mask = model_inputs.attention_mask, prefix_allowed_tokens_fn=restrict_decode_vocab, do_sample = True, temperature = 0.4).cpu()
    model_pred=next_tokens.index(logits.flatten()[-1])
    return model_pred
  # return model.generate(input_ids = model_inputs.input_ids, attention_mask = model_inputs.attention_mask, max_new_tokens = 30,stopping_criteria = StoppingCriteriaList([DummyStopCriterion()]), do_sample = True, temperature = 0.4).cpu()[0][len(model_inputs.input_ids[0]):]
  return model.generate(input_ids = model_inputs.input_ids, attention_mask = model_inputs.attention_mask, max_new_tokens = 30,eos_token_id=28739, do_sample = True, temperature = 0.4).cpu()[0][len(model_inputs.input_ids[0]):]

In [None]:
def add(model_inputs,t1,t2):
  model_inputs["input_ids"]=torch.concat((model_inputs.input_ids,torch.tensor([t1]).to("cuda")),dim=1)
  model_inputs["attention_mask"]=torch.concat((model_inputs.attention_mask,torch.tensor([t2]).to("cuda")),dim=1)
  return model_inputs

In [None]:
def decode(prompt):
  state = "START"
  input = prompt
  model_inputs = tokenizer(input, return_tensors="pt").to("cuda:0")
  while True:
    torch.cuda.empty_cache()
    print(input[len(prompt):])
    match state:
      case "START":
        # this state starts the answer
        input += '['                                         # adding the answer to the input
        model_inputs=add(model_inputs,[733],[1])             # add function adds the given list to the model_inputs
        options = ["]", '{"tool_name":"']                    # these are the options LLM can choose
        next_tokens = []
        for op in options:
          next_tokens.append(tokenizer.encode(op)[1])        # tokenizing the options and only selecting the first token
        model_pred = predict(model_inputs, next_tokens)      # obtaining the prediction from the LLM
        if model_pred == 0:                                  # changing the state according to the LLM prediction
          state = "END"                                      # going to END state if the model prediction is '['
        else:
          state = "NT"                                       # going to NT state if the model prediction is '{"tool_name":"'
      case "END":
        # this state ends the answer
        input += ']'
        model_inputs=add(model_inputs,[4709],[1])
        break
      case "NT":
        # this state starts adding new tool
        input+='{"tool_name":"'
        model_inputs=add(model_inputs,[9830, 6462, 28730, 861, 10549],[1,1,1,1,1])
        options=tool_name_trie.get("")                       # gets the first token of all the tools
        next_tokens = []
        for op in options:
          next_tokens.append(tokenizer.encode(op)[1])
        model_pred = predict(model_inputs, next_tokens)
        state="CT"                                           # changing the state
        next_tool_token=options[model_pred]
        tool_name = next_tool_token                          # storing the tool name for future use
      case "CT":
        # this state continue adding the tool name to the answer
        input+=next_tool_token
        t=[tokenizer(next_tool_token).input_ids[1]]
        model_inputs=add(model_inputs,t,[1])
        options=tool_name_trie.get(tool_name) + ['"']       # gives the continuation token for the tool
        flag = False                                        # to check if we need to be in the same state or to continue the while loop to change the state
        while(len(options) == 2):                           # autofills the tool name if there is no other tool starting with the next_tool_token
          next_tool_token=options[0]
          input+=next_tool_token
          tool_name += next_tool_token
          t=[tokenizer(next_tool_token).input_ids[1]]
          model_inputs=add(model_inputs,t,[1])
          temp = tool_name_trie.get(tool_name)
          if temp:                                          # continues the auto-fill if there are continuation tokens to the existing tool name
            options= temp + ['"']
          else:                                             # else changes the state if the tool name gets completed
            state = "SAL"
            flag = True
            break
        if flag:
          flag = False
          continue
        next_tokens = []
        for op in options:
          next_tokens.append(tokenizer.encode(op)[1])
        model_pred = predict(model_inputs, next_tokens)
        if options[model_pred]=='"':                        # changing the state according to the LLM prediction
          state="SAL"
        else:
          state="CT"
          next_tool_token=options[model_pred]
          tool_name += next_tool_token                      # adding the new token to the tool name
      case "SAL":
        # this state starts the argument list in the respective tool
        input+=',"arguments":['
        model_inputs=add(model_inputs,[28705, 862, 16684, 1264, 28792],[1,1,1,1,1])
        options=[']','{"argument_name":"']
        next_tokens = []
        for op in options:
          next_tokens.append(tokenizer.encode(op)[1])
        model_pred = predict(model_inputs, next_tokens)
        if model_pred==0:                                   # changing the state according to the LLM prediction
          state="EAL"
        else:
          state="CAN"
      case "EAL":
        # this state ends the argument list
        input+=']}'
        model_inputs=add(model_inputs,[4709, 28752],[1,1])
        options=[',',']']
        next_tokens = []
        for op in options:
          next_tokens.append(tokenizer.encode(op)[1])
        model_pred = predict(model_inputs, next_tokens)
        if model_pred==1:                                   # changing the state according to the LLM prediction
          state="END"
        else:
          state="CTL"
      case "CTL":
        # this state continue adding new tools to the existing tool list
        input+=','
        model_inputs=add(model_inputs,[1200],[1])
        state="NT"
      case "CAN":
        # this state start creating the argument name
        input+='"{argument_name":"'
        model_inputs=add(model_inputs,[25002, 14635, 28730, 861, 10549],[1,1,1,1,1])
        arg_name = ""                                       # storing the argument name for the future usage
        options=argument_name_trie[tool_name].get("")       # gets the first token of all the arguments
        next_tokens = []
        for op in options:
          next_tokens.append(tokenizer.encode(op)[1])
        model_pred = predict(model_inputs, next_tokens)
        if options[model_pred]=='"':                        # changing the state according to the LLM prediction
          state="AV"
        else:
          state="CA"
          next_argument_token=options[model_pred]
          arg_name += next_argument_token
      case "CA":
        # this state continue creating the argument name
        input+=next_argument_token
        t=[tokenizer(next_argument_token).input_ids[1]]
        model_inputs=add(model_inputs,t,[1])
        options=argument_name_trie[tool_name].get(arg_name) + ['"'] # gives the continuation token for the argument name

        flag = False                            # to check if we need to be in the same state or to continue the while loop to change the state
        while(len(options) == 2):               # autofills the argument name if there is no other argument name starting with the next_argument_token
          next_argument_token=options[0]
          input+=next_argument_token
          arg_name += next_argument_token
          t=[tokenizer(next_argument_token).input_ids[1]]
          model_inputs=add(model_inputs,t,[1])
          temp = argument_name_trie[tool_name].get(arg_name)
          if temp:
            options= temp + ['"']
          else:
            state = "AV"
            flag = True
            break
        if flag:
          flag = False
          continue

        next_tokens = []
        for op in options:
          next_tokens.append(tokenizer.encode(op)[1])
        model_pred = predict(model_inputs, next_tokens)
        if options[model_pred]=='"':          # changing the state according to the LLM prediction
          state="AV"
        else:
          state="CA"
          next_argument_token=options[model_pred]
          arg_name += next_argument_token
      case "AV":
        # this state starts adding the argument value to the answer
        input+=',"argument_value":'
        model_inputs=add(model_inputs,[28705, 862, 14635, 28730, 1431, 1264],[1,1,1,1,1,1])
        model_pred = predict(model_inputs)
        next_argument_value_token=tokenizer.decode(model_pred, skip_special_tokens = True)
        state="CAV"
      case "CAV":
        # this state continue adding the argument value to the answer
        input+=next_argument_value_token
        t=model_pred.tolist()
        model_inputs=add(model_inputs,t,[1 for i in range(len(t))])
        state="EAV"
      case "EAV":
        # this state ends the argument value
        input+='"}'
        model_inputs=add(model_inputs,[ 345, 28752],[1,1])
        model_pred = predict(model_inputs)
        next_argument_value_token=tokenizer.decode(model_pred, skip_special_tokens = True)
        if next_argument_value_token==",":    # changing the state according to the LLM prediction
          state="CAL"
        else:
          state="EAL"
      case "CAL":
        # this state continue adding new arguments to the existing argument list
        input+=','
        model_inputs=add(model_inputs,[1200],[1])
        state="CAN"
  return input[len(prompt):]

In [None]:
def predict_answer(query):
    return decode(prompt_template(query))

#INFERENCE

In [None]:
query="Summarize my P1 issues in triage"

In [None]:
answer = predict_answer(query)