In [1]:
import json
import google.generativeai as palm

my_config = json.load(open('config.json'))

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
palm.configure(api_key=my_config['palm_api_key'])

In [3]:
models = [m for m in palm.list_models() if 'generateText' in m.supported_generation_methods]
model = models[0].name
print(model)

models/text-bison-001


In [51]:
embeddings_model = "models/embedding-gecko-001"

In [5]:
import os.path

from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow

SCOPES = ['https://www.googleapis.com/auth/generative-language.tuning']

def load_creds():
    """Converts `oauth-client-id.json` to a credential object.
    
    This function caches the generated tokens to minimize the use of the
    consent screen.
    """
    creds = None
    # The file token.json stores the user's access and refresh tokens, and is
    # created automatically when the authorization flow completes for the first
    # time.
    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)
    # If there are no (valid) credentials available, let the user log in.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                'client_secret_56510766963.json', SCOPES)
            creds = flow.run_local_server(port=0)
        # Save the credentials for the next run
        with open('token.json', 'w') as token:
            token.write(creds.to_json())
    return creds

# load_creds()

In [6]:
arg_prompt = """
I have a argument for an function call. I want to summarize it and get its data type and possible values.
For Example:

Input:{"name": "ticket.severity","description": "Filters for tickets with any of the provided severities. Allowed values: blocker, high, low, medium","type": "array of strings"}
Ouput: {"desc": "Filters for tickets with any of the provided severities", "type": "Array[String]", "allowed": ["blocker", "high", "low", "medium"]}

Input:{"name": "query","description": "The search string, could be for example customer's name, part name, user name.","type": "string"}
Output: {"desc": "The search string, could be for example customer's name, part name, user name.", "type": "String"}

Input: {"name": "limit","description": "The maximum number of works to return. The default is '50'","type": "integer (int32)"}
Output: {"desc": "The maximum number of works to return.", "type": "int32"}

Input: {"name": "applies_to_part", "description": "Filters for work belonging to any of the provided parts", "type": "array of strings", "example": [ "FEAT-123", "ENH-123", "PROD-123", "CAPL-123" ]}
Output: {"desc": "Filters for work belonging to any of the provided parts.", "type": "Array[String]", "example": ["FEAT-123", "ENH-123", "PROD-123", "CAPL-123"]}

solve for the following argument ensuring output is valid json:
Input: %s
Output:
"""

tools = json.load(open('tools.json'))
arguments_description = {}

if os.path.exists('refined_arguments_description.json'):
    arguments_description = json.load(open('refined_arguments_description.json'))
else:
    for tool in tools["tools"]:
        arguments_description[tool["name"]] = []
        for argument in tool.get("arguments" ,[]):
            output = palm.generate_text(
                model=model,
                prompt=arg_prompt % argument,
                temperature=0,
                max_output_tokens=800,
            )
            arguments_description[tool["name"]].append({argument["name"]: eval(output.result)}) 
    json.dump(arguments_description, open('refined_arguments_description.json', 'w'))

In [7]:
refined_arguments_description = json.load(open('refined_arguments_description.json'))

In [182]:
class KnowledgeItem:
    description: str
    tool: str

    def __init__(self, description: str, tool: str) -> None:
        self.description = description
        self.tool = tool
    
    def __str__(self) -> str:
        return f"Know <{self.description} from [{self.tool}]>"
    
    def __repr__(self) -> str:
        return str(self)

In [183]:
knowledge = []

l = list(refined_arguments_description.keys())

for tool in refined_arguments_description:
    if len(refined_arguments_description[tool]) == 0:
        tool_names = [t['name'] for t in tools['tools']]
        index = tool_names.index(tool)
        tool_description = tools['tools'][index]['description'].split('Returns ')[1]
        knowledge.append(KnowledgeItem(tool_description, tool))

print(knowledge)

[Know <the ID of the current sprint from [get_sprint_id]>, Know <the ID of the current user from [who_am_i]>]


In [175]:
# tool_to_get = "works_list"
for tool_to_get in tools['tools']:
    tool_to_get = tool_to_get['name']
    primary_arguments = refined_arguments_description[tool_to_get][:3]
    pp(primary_arguments)

[{'applies_to_part': {'desc': 'Filters for work belonging to any of the '
                              'provided parts.',
                      'type': 'Array[String]',
                      'example': ['FEAT-123',
                                  'ENH-123',
                                  'PROD-123',
                                  'CAPL-123']}},
 {'created_by': {'desc': 'Filters for work created by any of these users',
                 'type': 'Array[String]',
                 'example': ['DEVU-123']}},
 {'owned_by': {'desc': 'Filters for work owned by any of these users',
               'type': 'Array[String]',
               'example': ['DEVU-123']}}]
[{'objects': {'desc': 'List of objects to summarize',
              'type': 'Array[Object]',
              'example': [{'name': 'object1', 'type': 'object1'},
                          {'name': 'object2', 'type': 'object2'}]}}]
[{'objects': {'desc': 'A list of objects to be prioritized',
              'type': 'Array[Object]',
  

In [8]:
tools_description = ""

for tool in tools["tools"]:
    tools_description += "\n" + f"{tool['name']}:{tool['description'].split('.')[0]}" 
    for argument in refined_arguments_description[tool["name"]][:1]:
        tools_description += " with args:"
        for arg, props in argument.items():
            tools_description += f"\n\t{arg}:{props['desc'].split('.')[0]}"
            # if 'example' in props.keys():
            #     tools_description += f" like: {props['example'][:2]}"
    


print(tools_description)
print(len(tools_description))


works_list:Returns a list of work items matching the request with args:
	applies_to_part:Filters for work belonging to any of the provided parts
summarize_objects:Summarizes a list of objects with args:
	objects:List of objects to summarize
prioritize_objects:Returns a list of objects sorted by priority with args:
	objects:A list of objects to be prioritized
add_work_items_to_sprint:Adds the given work items to the sprint with args:
	work_ids:A list of work item IDs to be added to the sprint
get_sprint_id:Returns the ID of the current sprint
get_similar_work_items:Returns a list of work items that are similar to the given work item with args:
	work_id:The ID of the work item for which you want to find similar items
search_object_by_name:Given a search string, returns the id of a matching object in the system of record with args:
	query:The search string, could be for example customer's name, part name, user name
create_actionable_tasks_from_text:Given a text, extracts actionable insig

In [9]:
tool_getter_prompt = """
You are a bot for a company to manage their work items using some tools
Here are some actions you can perform with the tool:%s

Using these as context you solve the relevant tasks strictly using these actions in the correct context as follows

Input: List all high severity tickets coming in from slack from customer Cust123 and generate a summary of them
Output: [("Get parts from customer Cust123","search_object_by_name"),("List all high severity tickets coming in from slack from them","works_list"),("Generate a summary of them","summarize_objects")]

Input: Given a customer meeting transcript T, create action items and add them to my current sprint
Output: [("Create action items from T","create_actionable_tasks_from_text"),("Add them to my current sprint","add_tasks_to_sprint")]

Input: Prioritize my P0 issues and add them to the current sprint
Output: [("Get my P0 issues","works_list"),("Prioritize them","prioritize_objects"),("Add them to the current sprint","add_work_items_to_sprint")]

Input: What did the dog eat for breakfast?
Output: []

Give list for:
Input: %s
Output:
"""

In [10]:
input_prompt = """What are my all issues in the triage stage under part FEAT-123? Summarize them."""
# '[("Get my issues in the triage stage under part FEAT-123", "works_list"),("Summarize them", "summarize_objects")]'

input_prompt = """Get all work items similar to TKT-123, summarize them, create issues from that summary, and prioritize them"""

final_prompt = tool_getter_prompt % (tools_description, input_prompt)

print(len(final_prompt))

completion = palm.generate_text(
    model=model,
    prompt=final_prompt,
    temperature=0,
    # The maximum length of the response
    max_output_tokens=800,
)

completion.result

2383


'[("Get all work items similar to TKT-123","get_similar_work_items"),("Summarize them","summarize_objects"),("Create issues from that summary","create_actionable_tasks_from_text"),("Prioritize them","prioritize_objects")]'

In [46]:
input_prompt = """List all high severity tickets coming in from slack from customer Cust123 and generate a summary of them."""

completion = palm.generate_text(
    model=model,
    prompt=tool_getter_prompt % (tools_description,input_prompt),
    temperature=0,
    max_output_tokens=800,
)

completion.result

'[("Get parts from customer Cust123","search_object_by_name"),("List all high severity tickets coming in from slack from them","works_list"),("Generate a summary of them","summarize_objects")]'

In [47]:
instructions = eval(completion.result)

In [52]:
import numpy as np

def cosine_similarity(embeddings1, embeddings2):
    vector1 = np.array(embeddings1)
    vector2 = np.array(embeddings2)

    # Calculate dot product
    dot_product = np.dot(vector1, vector2)

    # Calculate magnitudes
    magnitude1 = np.linalg.norm(vector1)
    magnitude2 = np.linalg.norm(vector2)

    # Calculate cosine similarity
    similarity = dot_product / (magnitude1 * magnitude2)

    return similarity

In [None]:
palm.generate_text(
    model=embeddings_model,
    prompt="Get all work items similar to TKT-123, summarize them, create issues from that summary, and prioritize them",
    temperature=0,
    max_output_tokens=800,
)

In [75]:
def elaborate_args(args: list[dict]):
    response = ""
    for arg in args:
        for name, props in arg.items():
            response += f"{name}:{props['desc']}"
            if 'allowed' in props.keys():
                response += f" allowing: {props['allowed']}"
            response += "\n\t"

    return response


def get_tool_arguments(instruction):
    directive = instruction[0]
    tool_to_be_used = instruction[1]

    tool_arguments = refined_arguments_description[tool_to_be_used]

    prompt = f"""We want to do:{directive}, using the tool {tool_to_be_used} with the following arguments:\n\t{elaborate_args(tool_arguments)}
    """

    print(prompt)

get_tool_arguments(instructions[1])
    

We want to do:List all high severity tickets coming in from slack from them, using the tool works_list with the following arguments:
	applies_to_part:Filters for work belonging to any of the provided parts.
	created_by:Filters for work created by any of these users
	owned_by:Filters for work owned by any of these users
	issue.rev_orgs:Filters for issues with any of the provided Rev organizations
	issue.priority:Filters for issues with any of the provided priorities allowing: ['p0', 'p1', 'p2', 'p3']
	limit:The maximum number of works to return.
	stage.name:Filters for records in the provided stage(s) by name
	ticket.needs_response:Filters for tickets that need a response
	ticket.severity:Filters for tickets with any of the provided severities allowing: ['blocker', 'high', 'low', 'medium']
	ticket.source_channel:Filters for tickets with any of the provided source channels
	types:Filters for work of the provided types. allowing: ['issue', 'ticket', 'task']
	
    


In [None]:
argument_embeddings = json.load(open('argument_embeddings.json'))

In [55]:
directive_embeddings = palm.generate_embeddings(
    model=embeddings_model,
    text="Get parts from customer Cust123",
)

for arg in argument_embeddings['search_object_by_name']:
    arg_embedding = argument_embeddings['search_object_by_name'][arg]
    print(arg, cosine_similarity(arg_embedding, directive_embeddings['embedding']))

query 0.6966728323092024


In [59]:
import spacy
from spacy import displacy
nlp = spacy.load("en_core_web_lg")

doc = nlp("Get parts from Cust123")

print([(ent.text, ent.label_) for ent in doc.ents])

[]


In [60]:
list(doc.noun_chunks)

[parts, Cust123]

In [50]:
doc2 = nlp("List all high severity tickets from them")

for token in doc2:
    print(token.text, token.dep_, token.head.text, token.head.pos_,
            [child for child in token.children])
chunks = list(doc2.noun_chunks)
cat_text = ""
for chunk in chunks:
    cat_text += chunk.text + " "

print(doc2.adjectives)

List ROOT List NOUN [tickets]
all det tickets NOUN []
high amod tickets NOUN []
severity compound tickets NOUN []
tickets dobj List NOUN [all, high, severity, from]
from prep tickets NOUN [them]
them pobj from ADP []


AttributeError: 'spacy.tokens.doc.Doc' object has no attribute 'adjectives'

In [33]:
print(" ".join(map(list(doc.noun_chunks), str())))




In [17]:
args = refined_arguments_description['works_list']

In [None]:
list(palm.list_models())

In [27]:
argument_embeddings = {}
x = None
for tool in refined_arguments_description:
    argument_embeddings[tool] = {}
    for argument in refined_arguments_description[tool]:
        for arg, props in argument.items():
            argument_ = arg + ":" + props['desc']
            # print(argument_)
            embeddings = palm.generate_embeddings(
                model="models/embedding-gecko-001",
                text=argument_
            )
            argument_embeddings[tool][arg] = embeddings['embedding']

In [29]:
json.dump(argument_embeddings, open('argument_embeddings.json', 'w'), indent=2)

In [39]:
info_embeddings = palm.generate_embeddings(
    model="models/embedding-gecko-001",
    text="List all high severity tickets them "
)

In [44]:
import numpy as np

def cosine_similarity(embeddings1, embeddings2):
    vector1 = np.array(embeddings1)
    vector2 = np.array(embeddings2)

    # Calculate dot product
    dot_product = np.dot(vector1, vector2)

    # Calculate magnitudes
    magnitude1 = np.linalg.norm(vector1)
    magnitude2 = np.linalg.norm(vector2)

    # Calculate cosine similarity
    similarity = dot_product / (magnitude1 * magnitude2)

    return similarity

In [45]:
# import numpy as np

for arg in argument_embeddings['works_list']:
    arg_embedding = argument_embeddings['works_list'][arg]
    print(arg, cosine_similarity(arg_embedding, info_embeddings['embedding']))

applies_to_part 0.6344296627448915
created_by 0.5870074218989871
owned_by 0.6118433946207997
issue.rev_orgs 0.660691662266941
issue.priority 0.7029635073056274
limit 0.6032555833625599
stage.name 0.5941191389196038
ticket.needs_response 0.7088068220806242
ticket.severity 0.8144742037308171
ticket.source_channel 0.6731756104675004
types 0.6195709153384679


In [30]:
print(doc2)
for arg in args:
    for name, props in arg.items():
        print(f"{name} : {props['desc']}")
        relevant_noun_chunks = [(chunk, chunk.similarity(nlp(props['desc']))) for chunk in doc2.noun_chunks if chunk.similarity(nlp(props['desc'])) > 0.3]
        print(relevant_noun_chunks)

        # print(doc2.noun_chunks[0].similarity(nlp(props['desc'])))
        print()

List all high severity tickets from them
applies_to_part : Filters for work belonging to any of the provided parts.
[(all high severity tickets, 0.6258663710735963), (them, 0.5119058029487383)]

created_by : Filters for work created by any of these users
[(all high severity tickets, 0.5973284370231899), (them, 0.44365882280241603)]

owned_by : Filters for work owned by any of these users
[(all high severity tickets, 0.5853890601086897), (them, 0.42142880469493155)]

issue.rev_orgs : Filters for issues with any of the provided Rev organizations
[(all high severity tickets, 0.5996469246404537), (them, 0.37242701507888104)]

issue.priority : Filters for issues with any of the provided priorities
[(all high severity tickets, 0.6185587946715899), (them, 0.39029457688801417)]

limit : The maximum number of works to return.
[(all high severity tickets, 0.5937899029368208), (them, 0.4160570717768777)]

stage.name : Filters for records in the provided stage(s) by name
[(all high severity ticket

In [270]:
input_fields = {
    'applies_to_part': 'Filters for work belonging to any of the provided parts.',
    'created_by': 'Filters for work created by any of these users.',
    'owned_by': 'Filters for work owned by any of these users.',
    'issue.rev_orgs': 'Filters for issues with any of the provided Rev organizations.',
    'issue.priority': 'Filters for issues with any of the provided priorities.',
    'limit': 'The maximum number of works to return.',
    'stage.name': 'Filters for records in the provided stage(s) by name.',
    'ticket.needs_response': 'Filters for tickets that need a response.',
    'ticket.severity': 'Filters for tickets with any of the provided severities.',
    'ticket.source_channel': 'Filters for tickets with any of the provided source channels.',
    'types': 'Filters for work of the provided types.'
}

# Given noun chunk
noun_chunk = "all high severity tickets them"

# Process the noun chunk with spaCy
noun_doc = nlp(noun_chunk)

# Define a function to calculate similarity between the noun chunk and each input field description
def calculate_similarity(description):
    x = noun_doc.similarity(nlp(description))
    print(description, x)
    return x

# Find the most relevant input field
most_relevant_field = max(input_fields, key=lambda field: calculate_similarity(input_fields[field]))

# Print the result
print(f"The most relevant input field for '{noun_chunk}' is: {most_relevant_field}")


Filters for work belonging to any of the provided parts. 0.6678973009450858
Filters for work created by any of these users. 0.6391649774712338
Filters for work owned by any of these users. 0.6248281978477502
Filters for issues with any of the provided Rev organizations. 0.6137762123198153
Filters for issues with any of the provided priorities. 0.6312085020893078
The maximum number of works to return. 0.6117348495352216
Filters for records in the provided stage(s) by name. 0.366006611664165
Filters for tickets that need a response. 0.6691003652021462
Filters for tickets with any of the provided severities. 0.6597274816160735
Filters for tickets with any of the provided source channels. 0.6549307538800886
Filters for work of the provided types. 0.5990385488555882
The most relevant input field for 'all high severity tickets them' is: ticket.needs_response


In [269]:
d = nlp('Filters for issues with any of the provided Rev organizations.')
list(d.noun_chunks)

[Filters, issues, any, the provided Rev organizations]