In [1]:
from imports import *
from pipeline import Pipeline, PipeConfig

In [2]:
with open('devrev_tools.json') as f:
    tools = json.load(f)

with open('devrev_data.json') as f:
    data = json.load(f)

In [3]:
examples = [
    {
        "query": "Allocate the work items I own that are at the 'QA' stage and with 'medium' severity to the current sprint, after prioritizing and summarizing them.",
        "solution": [
            {
                "tool_name": "who_am_i",
                "arguments": []
            },
            {
                "tool_name": "works_list",
                "arguments": [
                    {
                        "argument_name": "owned_by",
                        "argument_value": ["$$PREV[0]"]
                    },
                    {
                        "argument_name": "stage.name",
                        "argument_value": ["QA"]
                    },
                    {
                        "argument_name": "type",
                        "argument_value": ["ticket"]
                    },
                    {
                        "argument_name": "ticket.severity",
                        "argument_value": ["medium"]
                    }
                ]
            },
            {
                "tool_name": "prioritize_objects",
                "arguments": [
                    {
                        "argument_name": "objects",
                        "argument_value": "$$PREV[2]"
                    }
                ]
            },
            {
                "tool_name": "summarize_objects",
                "arguments": [
                    {
                        "argument_name": "objects",
                        "argument_value": "$$PREV[3]"
                    }
                ]
            },
            {
                "tool_name": "get_sprint_id",
                "arguments": []
            },
            {
                "tool_name": "add_work_items_to_sprint",
                "arguments": [
                    {
                        "argument_name": "work_ids",
                        "argument_value": "$$PREV[1]"
                    },
                    {
                        "argument_name": "sprint_id",
                        "argument_value": "$$PREV[2]"
                    }
                ]
            }
        ]
    },

    {
        "query": "Create actionable tasks from the summary of all 'high' severity tickets associated with 'REV-456', get similar work items to those tasks, prioritize by severity, and filter out those that don't need immediate action.",
        "solution": [
            {
                "tool_name": "works_list",
                "arguments": [
                    {
                        "argument_name": "ticket.severity",
                        "argument_value": ["high"]
                    },
                    {
                        "argument_name": "ticket.rev_org",
                        "argument_value": ["REV-456"]
                    },
                    {
                        "argument_name": "type",
                        "argument_value": ["ticket"]
                    },
                    {
                        "argument_name": "ticket.needs_response",
                        "argument_value": True
                    }
                ]
            },
            {
                "tool_name": "summarize_objects",
                "arguments": [
                    {
                        "argument_name": "objects",
                        "argument_value": "$$PREV[0]"
                    }
                ]
            },
            {
                "tool_name": "create_actionable_tasks_from_text",
                "arguments": [
                    {
                        "argument_name": "text",
                        "argument_value": "$$PREV[1]"
                    }
                ]
            },
            {
                "tool_name": "get_similar_work_items",
                "arguments": [
                    {
                        "argument_name": "work_id",
                        "argument_value": "$$PREV[2]"
                    }
                ]
            },
            {
                "tool_name": "prioritize_objects",
                "arguments": [
                    {
                        "argument_name": "objects",
                        "argument_value": "$$PREV[3]"
                    }
                ]
            }
        ]
    }
]

# Pipeline-1: GPT-3.5

In [4]:
pipe = Pipeline(PipeConfig)

In [5]:
import litellm
litellm.set_verbose=False

In [8]:
query = "Prioritize open tasks, find similar work items for the task and create tasks based on their summary."
output = pipe(query,tools,examples)

In [9]:
output

{'embed_calls': 1,
 'return_tools': [],
 'solution': {'query': 'Prioritize open tasks, find similar work items for the task and create tasks based on their summary.',
  'solution': [{'tool_name': 'works_list',
    'arguments': [{'argument_name': 'status', 'argument_value': ['open']},
     {'argument_name': 'type', 'argument_value': ['task']}]},
   {'tool_name': 'get_similar_work_items',
    'arguments': [{'argument_name': 'work_id',
      'argument_value': '$$PREV[0]'}]},
   {'tool_name': 'summarize_objects',
    'arguments': [{'argument_name': 'objects',
      'argument_value': '$$PREV[1]'}]},
   {'tool_name': 'create_actionable_tasks_from_text',
    'arguments': [{'argument_name': 'text', 'argument_value': '$$PREV[2]'}]}]},
 'time_taken': 5.324092149734497,
 'openai_model': 'gpt-3.5-turbo-1106',
 'embedding_model': 'text-embedding-ada-002',
 'num_embedding_tokens_query': 20,
 'num_embedding_tokens_tool': 0,
 'num_output_tokens': 244,
 'num_input_tokens': 1095,
 'embedding_cost_query'

# Examples retriever

In [None]:
from imports import *
import numpy as np
import copy

class Retriever:
    def __init__(self, model_name):
        self.model_name = model_name
        self._tools = []
        self._tool_names = set()

    def embed(self, texts):
        # Assuming EmbeddingBackend accepts a list of texts and returns a list of embeddings
        embeddings_data = EmbeddingBackend(self.model_name, texts).data
        return np.array([item['embedding'] for item in embeddings_data])

    def get_doc(self, tool):
        s = tool['tool_description']
        for arg in tool['args']:
            s += ' ' + arg['arg_description']
        return s

    def index(self, tools):
        tools = copy.deepcopy(tools)
        embed_calls = 0
        new_tools = [tool for tool in tools if tool['tool_name'] not in self._tool_names]
        embed_calls += len(new_tools) 
        # Extract documents from new tools
        docs_to_embed = [self.get_doc(tool) for tool in new_tools]
        docs_embeded = ' '.join(docs_to_embed)
        if not docs_to_embed:return 0, []
        # Get embeddings for all new documents in one batch
        embeddings = self.embed(docs_to_embed)
        
        
        # Assign the corresponding embedding to each tool and update internal structures
        for i, tool in enumerate(new_tools):
            tool['embedding'] = embeddings[i]
            self._tool_names.add(tool['tool_name'])
            self._tools.append(tool)
        return embed_calls,docs_embeded

    def embedding_similarity(self, query_embedding, tool_embedding):
        return (query_embedding.T @ tool_embedding)

    def __call__(self, query, tools=None, k=8):
        if tools is not None:
            embed_calls,docs_embeded = self.index(tools)
                    
        query_embed = self.embed([query])[0]  # Embed the single query text
        embed_calls+=1
        query_embeded = query
        
        # Calculate similarity scores between the query embedding and all indexed tools' embeddings.
        for tool in self._tools:
            tool['similarity'] = self.embedding_similarity(query_embed, tool['embedding'])
            
         # Sort tools by similarity score and filter out those that are not part of the input 'tools' if provided.
        return_tools = sorted(self._tools,key=lambda x: x['similarity'],reverse=True).copy()
        if tools is not None:
            return_tools = [tool for tool in return_tools if tool in tools]
        return {'embed_calls':embed_calls,
                'return_tools':return_tools[:k],
                'query_embeded':query_embeded,
                'docs_embeded':docs_embeded}

    @staticmethod
    def strip_embeddings(tools):
       stripped_tools = copy.deepcopy(tools)
       for tool in stripped_tools:
           del tool['embedding']
           del tool['similarity']
       return stripped_tools

In [5]:
def embed(texts):
    # Assuming EmbeddingBackend accepts a list of texts and returns a list of embeddings
    embeddings_data = EmbeddingBackend(PipeConfig.embedding_model, texts).data
    return np.array([item['embedding'] for item in embeddings_data])

In [8]:
for query in data:
    query['embed'] = embed([query['query']])[0]

In [23]:
new_query = 'Prioritise my top P0 issues and add them to the current sprint'
new_query_embed = embed([new_query])[0]
scores = []
for query in data:
    scores.append(query['embed'].T @ new_query_embed)
idx = np.argsort(scores)[::-1]
for i in idx:
    print(data[i]['query'],scores[i])

Prioritize my P0 issues and add them to the current sprint 0.9820559665613069
Given a customer meeting transcript T , create action items and add them to my current sprint 0.8447304038034076
Get all work items similar to TKT-123, summarize them, create issues from that summary, and prioritize them 0.8161371033004452
What are my all issues in the triage stage under part FEAT-123? Summarize them. 0.8020886654641636
Summarize work items similar to don:core:dvrv-us-1:devo/0:issue/1 0.7932433821950118
Summarize high severity tickets from the customer UltimateCustomer 0.7864176972938117
List all high severity tickets coming in from slack from customer Cust123 and generate a summary of them. 0.7665894492504293
What is the meaning of life? 0.7144496811530685


In [24]:
examples[0]

NameError: name 'examples' is not defined

In [None]:
class ExampleRetreiev