In [1]:
!sudo apt-get install -y pciutils
!curl -fsSL https://ollama.com/install.sh | sh # download ollama api
from IPython.display import clear_output

import os
import threading
import subprocess
import requests
import json

def ollama():
    os.environ['OLLAMA_HOST'] = '0.0.0.0:11434'
    os.environ['OLLAMA_ORIGINS'] = '*'
    subprocess.Popen(["ollama", "serve"])

ollama_thread = threading.Thread(target=ollama)
ollama_thread.start()

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  libpci3 pci.ids
The following NEW packages will be installed:
  libpci3 pci.ids pciutils
0 upgraded, 3 newly installed, 0 to remove and 49 not upgraded.
Need to get 343 kB of archives.
After this operation, 1,581 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/main amd64 pci.ids all 0.0~2022.01.22-1 [251 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/main amd64 libpci3 amd64 1:3.7.0-6 [28.9 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy/main amd64 pciutils amd64 1:3.7.0-6 [63.6 kB]
Fetched 343 kB in 2s (200 kB/s)
debconf: unable to initialize frontend: Dialog
debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 78, <> line 3.)
debconf: falling back to frontend: Readline
debconf: unable to initializ

In [2]:
from IPython.display import clear_output
!ollama pull llama3.1:8b
# !ollama run hf.co/meta-llama/Llama-3.2-1B-Instruct
clear_output()

In [32]:
import json
import requests
from typing import List, Dict, Any
from pydantic import BaseModel
# import ollama

# Pydantic models for tool and output structures
class ToolArgument(BaseModel):
    arg_name: str
    arg_type: str
    is_array: bool
    is_required: bool

class ToolOutput(BaseModel):
    arg_type: str
    is_array: bool
    is_required: bool

class Tool(BaseModel):
    tool_name: str
    tool_description: str
    args: List[ToolArgument]
    output: ToolOutput

class Argument(BaseModel):
    argument_name: str
    argument_value: Any

class ToolCall(BaseModel):
    tool_name: str
    arguments: List[Argument]

In [5]:
def load_tools(file_path: str) -> List[Tool]:
    with open(file_path, 'r') as f:
        tools_data = json.load(f)
    return [Tool(**tool) for tool in tools_data]

def query_ollama(prompt):
    url = "http://localhost:11434/api/generate"
    # "http://localhost:11434/api/chat"
    data={
            "model": "llama3.1:8b",
            "prompt": prompt,
            "stream": False,
        }
    response = requests.post(url, data=json.dumps(data))
    if response.status_code == 200:
        # print(response.text)
        return response.json()['response']
    else:
        raise Exception(f"Ollama API request failed with status code {response.status_code}")


In [60]:
tools = load_tools('/content/tools.json')


In [61]:
file_name = '/content/data (1).json'

with open(file_name, 'r') as file:
    ground_truth= file.read()

print(type(ground_truth))
# print(json_string)


<class 'str'>


In [62]:
def process_query(query, tools):
    tools_description = "\n".join([f"{tool.tool_name}: {tool.tool_description}" for tool in tools])
    # args = "\n".join([", ".join([f"{tool}: {arg.arg_name} ({arg.arg_type})" for arg in tool.args]) for tool in tools])


    prompt = f"""
You are a query solver. You will be given tools{tools}. Using those tools, you have to solve the query.
To solve the query{query}, at each point, you have to ask sub-questions. These sub-questions are "What is the next tool to use, its arguments and argument values?". Look at answers to the previous sub-questions, which will give you context of how the current set of tools have been chosen so far. Compare this to the greater context, which is, how to solve the next question.

Some important points :
1) Tool description and argument description are very important. Read them to understand what exactly a certain tool generates as output or what inputs a tool can get.
2) Output of tools in the previous step is input to tool for current statement. Compare descriptions, types and examples to get a huge clue.
3) While using a tool, understand all it's argument from its description, type and what values it give ouputs and then select the required argument.
3) Always check if authentication tools like "who_am_i", "team_id", "get_sprint_id", etc. are needed at any point. But don't just see whether the tools are needed also see if any of the argument is required, then only go for these authentication tools.
4) Take care of "type" argument in "works_list" is issues, tickets or tasks are explicitly mentioned.
5) Stop once you feel the task is complete and no further tools are needed to solve the query.
6) To answer the query, you are only allowed to use the tools that we have provided.
7) Understand the query and it's intent then, if the question is simply unsolvable using any tool we have, only return [].
8) Use "$$PREV[i]" to reference the output of the i-th previous tool call.
9) Try not to use a tool more than once while solving a particular query.
10)Try to use minimum number of tools required to solve the query completely, don't add extra tools other than what is required.
11) Only provide the JSON output, without any additional explanations in this format
Whenever a query comes go through all the above points to solve it and follow the above steps carefully, but the output should be just in json format as given below: **
Follow this format for output:
[
    {{
        "tool_name": "<tool_name>",
        "arguments": [
            {{
                "argument_name": "<arg_name>",
                "argument_value": <value>
            }},
            ...
        ]
    }},
    ...
]
.
Refer to these examples for better clarity:

1. Query: Check dependencies for ’FEAT-123’ and ’BUG-987’ then add to sprint
   Output: [
    {{
      "tool_name": "validate_work_dependency",
      "arguments": [
        {{
          "argument_name": "work_item_ids",
          "argument_value": [
            "FEAT-123",
            "BUG-987"
          ]
        }}
      ]
    }},
    {{
      "tool_name": "get_sprint_id",
      "arguments": []
    }},
    {{
      "tool_name": "add_work_items_to_sprint",
      "arguments": [
        {{
          "argument_name": "work_ids",
          "argument_value": [
            "FEAT-123",
            "BUG-987"
          ]
        }},
        {{
          "argument_name": "sprint_id",
          "argument_value": "$$PREV[1]"
        }}
      ]
    }}
  ]

2. Query: Find issues or tasks created by users ’DEVU-123’ or ’DEVU-456’
   Output: [
  "tool_name": "works_list",
  "arguments": [
    {{
      "argument_name": "created_by",
      "argument_value": [
        "DEVU-123",
        "DEVU-456"
      ]
    }},
    {{
      "argument_name": "type",
      "argument_value": [
        "issue",
        "task"
      ]
    }}
  ]
}}
]
3. Query: Assign validated ’TASK-789’ and ’ISSUE-321’ to current user and sync with calendar
   Output:[
    {{
      "tool_name": "who_am_i",
      "arguments": []
    }},
    {{
      "tool_name": "validate_work_dependency",
      "arguments": [
        {{
          "argument_name": "work_item_ids",
          "argument_value": [
            "TASK-789",
            "ISSUE-321"
          ]
        }}
      ]
    }},
    {{
      "tool_name": "assign_work_items",
      "arguments": [
        {{
          "argument_name": "work_item_ids",
          "argument_value": [
            "TASK-789",
            "ISSUE-321"
          ]
        }},
        {{
          "argument_name": "user_id",
          "argument_value": "$$PREV[0]"
        }}
      ]
    }},
    {{
      "tool_name": "sync_work_items_with_calendar",
      "arguments": [
        {{
          "argument_name": "user_id",
          "argument_value": "$$PREV[0]"
        }},
        {{
          "argument_name": "work_item_ids",
          "argument_value": [
            "TASK-789",
            "ISSUE-321"
          ]
        }}
      ]
    }}
    ]
4.  Query:  "What is the meaning of life?"
    Output:[]
"""
    ollama_output = query_ollama(prompt)
    return ollama_output

In [None]:
answer = "str"
user_query ="a"
i = 0
list_queries  = ["List work items owned by 'DEVU-789' needing response for organization 'REV-654'","List issues with 'blocker' severity categorized as tickets","Find issues or tasks created by users 'DEVU-123' or 'DEVU-456'","Fetch 'p3' priority work items that need customer response for 'REV-333'","list their high-severity tickets for 'Globex' user","Summarize tickets from 'support' channel","Prioritize tasks for part 'ENH-789'","Fetch 'medium' severity work items in 'QA Review' or 'Testing'","Summarize work items similar to don:core:dvrv-us-1:devo/0:issue/1","What is the meaning of life?","Who is the president of India?","Prioritize my P0 issues and add them to the current sprint","Summarize high severity tickets from the customer UltimateCustomer","What are my all issues in the triage stage under part FEAT-123? Summarize them.","List all high severity tickets coming in from slack from customer Cust123 and generate a summary of them.","Given a customer meeting transcript T , create action items and add them to my current sprint","Find work items related to 'FEAT-123' and prioritize them.","Find work items with a 'severe' priority and summarize them.","Get all work items similar to TKT-123, summarize them, create issues from that summary, and prioritize them",]
while i < len(list_queries):
  user_query = list_queries[i]
  i+=1
  # user_query = input("Enter your query (or 'quit' to exit): ")
  tool_calls = process_query(user_query, tools)
  answer += str(tool_calls)
  print(tool_calls)

In [None]:
print(answer)

In [65]:
import json
import re

def extract_json_blocks(input_str):
    # Step 1: Remove extraneous square brackets that might surround the JSON block
    input_str = input_str.replace("[", "").replace("]", "")

    # Step 2: Regular expression to extract valid JSON-like structures (blocks with curly braces)
    json_blocks = re.findall(r'\{[^}]*\}', input_str, re.DOTALL)

    valid_jsons = []
    for block in json_blocks:
        try:
            # Attempt to load each block as JSON (wrap them in list format if needed)
            json_data = json.loads(block)
            valid_jsons.append(json_data)
        except json.JSONDecodeError:
            # Ignore any invalid JSON blocks
            continue

    return valid_jsons

# Test input string
input_str = answer

# Extract valid JSON blocks
answer = extract_json_blocks(input_str)

# Output the result
print(answer)


[{'argument_name': 'needs_response', 'argument_value': True}, {'argument_name': 'type', 'argument_value': 'tickets'}, {'argument_name': 'ticket_need_response', 'argument_value': True}, {'argument_name': 'type', 'argument_value': 'issue'}, {'argument_name': 'type', 'argument_value': '$$PREV0.type'}, {'argument_name': 'ticket_need_response', 'argument_value': True}, {'argument_name': 'sprint_id', 'argument_value': '$$PREV1'}, {'argument_name': 'ticket_severity', 'argument_value': 'high'}, {'argument_name': 'severity_type', 'argument_value': '$$PREV1.severity'}, {'argument_name': 'type', 'argument_value': '$$PREV2.type'}, {'argument_name': 'due_on', 'argument_value': '$$PREV3.due_on'}, {'argument_name': 'severity_type', 'argument_value': '$$PREV5.severity_type'}, {'argument_name': 'ticket_source_channel', 'argument_value': 'support'}, {'argument_name': 'severity', 'argument_value': 'medium'}, {'argument_name': 'similarity', 'argument_value': 'don:core:dvrv-us-1:devo/0:issue/1'}, {'argumen

In [66]:
ground_truth = extract_json_blocks(ground_truth)
print(ground_truth)

[{'argument_name': 'ticket_need_response', 'argument_value': True}, {'argument_name': 'ticket_rev_org', 'argument_value': 'REV-654'}, {'argument_name': 'type', 'argument_value': 'ticket'}, {'argument_name': 'ticket_need_response', 'argument_value': True}, {'argument_name': 'issue.rev_orgs', 'argument_value': 'REV-333'}, {'argument_name': 'ticket_severity', 'argument_value': 'high'}, {'argument_name': 'type', 'argument_value': 'ticket'}, {'argument_name': 'type', 'argument_value': 'ticket'}, {'argument_name': 'type', 'argument_value': 'task'}, {'argument_name': 'issue_priority', 'argument_value': 'p0'}, {'argument_name': 'type', 'argument_value': 'issue'}, {'argument_name': 'sprint_id', 'argument_value': '$$PREV3'}, {'argument_name': 'ticket_severity', 'argument_value': 'high'}, {'argument_name': 'type', 'argument_value': 'ticket'}, {'argument_name': 'created_by', 'argument_value': '$$PREV0'}, {'argument_name': 'stage_name', 'argument_value': 'triage'}, {'argument_name': 'type', 'argume

In [12]:
!pip install nltk
!pip install rouge-score
!pip install rouge

Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=15d7c661f51cef82ff6868fd36ee8d451115582c5473ad31d2af1815d3e362d4
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2
Collecting rouge
  Downloading rouge-1.0.1-py3-none-any.whl.metadata (4.1 kB)
Downloading rouge-1.0.1-py3-none-any.whl (13 kB)
Installing collected packages: rouge
Successfully installed rouge-1.0.1


**Rouge** **Score**

In [73]:
import json
from nltk.translate.bleu_score import sentence_bleu
from rouge import Rouge

def evaluate_json_bleu_rouge_list(llm_responses, ground_truths):
    # Initialize lists to store scores for each pair
    bleu_scores = []
    rouge_scores_list = []

    rouge = Rouge()

    # # Ensure that both input lists have the same length
    # if len(llm_responses) != len(ground_truths):
    #     raise ValueError("The length of llm_responses and ground_truths must be the same.")

    # Iterate over the responses and ground truths
    for llm_response, ground_truth in zip(llm_responses, ground_truths):
        # Convert JSON objects to strings for comparison
        llm_response_str = json.dumps(llm_response, sort_keys=True)
        ground_truth_str = json.dumps(ground_truth, sort_keys=True)

        # BLEU score evaluation
        reference = [ground_truth_str.split()]  # ground truth as reference
        candidate = llm_response_str.split()  # LLM response as candidate
        bleu_score = sentence_bleu(reference, candidate)

        # ROUGE score evaluation
        rouge_scores = rouge.get_scores(llm_response_str, ground_truth_str, avg=True)

        # Append the scores for the current pair
        bleu_scores.append(bleu_score)
        rouge_scores_list.append(rouge_scores)

    # Return the BLEU and ROUGE scores for all pairs
    return bleu_scores, rouge_scores_list

# Example usage:
llm_responses = answer
ground_truths = ground_truth

bleu, rouge = evaluate_json_bleu_rouge_list(llm_responses, ground_truths)
print("ROUGE Scores:", rouge[0]['rouge-l'])


ROUGE Scores: {'r': 0.75, 'p': 0.75, 'f': 0.749999995}
