# Swallow System

### Queries that focus on: timings, performance, task duration, system state, errors.

In [152]:
import os
print(os.getenv("SAMBANOVA_API_KEY"))  # or whatever your variable name is

505a583a-7721-4eeb-9626-f737cdc27a25


In [151]:
import os
os.environ["SAMBANOVA_API_KEY"] = '505a583a-7721-4eeb-9626-f737cdc27a25'

In [78]:
from flowcept import Flowcept
from workflow import Workflow
from qa_chain import QAChain

In [79]:
workflow_id = Workflow.run()

Input I = 12
I → H: 12 → 43
H → E: 43 → 462.25
H → F: 43 → 39.344631145812
H → G: 43 → 38
E → D: 462.25 → 213674.0625
F → C: 39.344631145812 → 10.672359527074835
G → B: 38 → 234.2477321128211
D,C,B → A: (213674.0625, 10.672359527074835, 234.2477321128211) → 71306.32753054664

Final Result: I(12) → A(71306.3275)
Workflow_id=0e5ae5e3-4f21-4836-a4b9-044b423fde2f


In [80]:
workflow_id = '0e5ae5e3-4f21-4836-a4b9-044b423fde2f'
qa = QAChain().build(workflow_id)

In [130]:
import time
import pandas as pd
import re

def cot_prompt(query):
    """Add Chain of Thought prompting to a query""" 
    return f"Let's think step by step. {query} Please explain your reasoning process."

def benchmark_query_with_tracking(qa_chain, base_query_id, query, runs=5, use_cot=False, context=None):
    """
    Run multiple queries and automatically track them in the QAChain DataFrame
    
    Args:
        qa_chain: Your QAChain instance
        base_query_id: Base ID like "DF-DL-Q01-NS-NC-R3-Swallow" (R3 sets starting run number)
        query: The actual query text
        runs: Number of times to run the query
        use_cot: Whether to use Chain of Thought prompting
        context: Custom context (if None, uses default based on query_id)
    """
    results = []
    prompt = cot_prompt(query) if use_cot else query
    
    # MODIFY QUERY ON COT 
    if use_cot and "NS" in base_query_id:
        base_query_id = base_query_id.replace("NS", "CoT")
    
    # EXTRACT STARTING ITERATION NUM
    run_match = re.search(r'-R(\d+)$', base_query_id)  # Match run number only if at end of string
    if run_match:
        start_run = int(run_match.group(1))
        # REMOVE RUN SUFFIX ONLY AT END
        base_id_clean = re.sub(r'-R\d+$', '', base_query_id)
    else:
        start_run = 1
        base_id_clean = base_query_id
    
    for i in range(runs):
        current_run = start_run + i
        query_id = f"{base_id_clean}-R{current_run:02d}"
        
       
        if context:
            result = qa_chain.ask(prompt, query_id=query_id, context=context)
        else:
            result = qa_chain.ask(prompt, query_id=query_id)
        
        # RESPONSE AND TIMING INFO
        response = result["result"]
        query_row = qa_chain.query_df[qa_chain.query_df['Query_ID'] == query_id].iloc[-1]
        response_time = query_row['Response_Time']
        char_count = query_row['Response_Chars']
        
        results.append({
            "query_id": query_id,
            "run": current_run,
            "response_time_sec": response_time,
            "response": response,
            "char_count": char_count
        })
    
    avg_time = sum(r["response_time_sec"] for r in results) / runs
    avg_char_count = sum(r["char_count"] for r in results) / runs
    
    return {
        "base_query_id": base_query_id,
        "query": query,
        "prompt_used": prompt,
        "use_cot": use_cot,
        "runs": results,
        "average_response_time_sec": avg_time,
        "average_char_count": avg_char_count
    }

def run_query_suite(qa_chain, query_suite):
    """
    Run a suite of queries with different configurations
    
    Args:
        qa_chain: Your QAChain instance
        query_suite: List of dictionaries with query configurations
                    Each dict should have: base_id, query, runs, use_cot, context
    
    Example:
        suite = [
            {
                "base_id": "DF-DL-Q01-NS-NC-R1-Swallow",
                "query": "What is the data lineage?",
                "runs": 3,
                "use_cot": False,
                "context": None
            },
            {
                "base_id": "CF-EO-Q02-CoT-FC-R5-Swallow", 
                "query": "What is the execution order?",
                "runs": 3,
                "use_cot": True,
                "context": "X"
            }
        ]
    """
    all_results = []
    
    for config in query_suite:
        print(f"\n{'='*60}")
        print(f"Running: {config['base_id']}")
        print(f"Query: {config['query']}")
        print(f"{'='*60}")
        
        result = benchmark_query_with_tracking(
            qa_chain=qa_chain,
            base_query_id=config["base_id"],
            query=config["query"],
            runs=config.get("runs", 5),
            use_cot=config.get("use_cot", False),
            context=config.get("context", None)
        )
        
        all_results.append(result)
        
        print(f"Average Response Time: {result['average_response_time_sec']:.2f}s")
        print(f"Average Character Count: {result['average_char_count']:.0f}")
    
    return all_results

def generate_query_ids_for_batch(base_id, runs, start_run=1):
    """
    Helper function to generate query IDs for batch accuracy updates
    
    Args:
        base_id: Base query ID (without run numbers)
        runs: Number of runs
        start_run: Starting run number
    
    Returns:
        List of query IDs
    """
    base_id_clean = re.sub(r'-R\d+$', '', base_id)
    
    query_ids = []
    for i in range(runs):
        current_run = start_run + i
        query_id = f"{base_id_clean}-R{current_run:02d}"
        query_ids.append(query_id)
    
    return query_ids


In [82]:
import pandas as pd

# DF TO INSTANCE
qa.query_df = pd.DataFrame(columns=[
    'Query_ID', 'Query_Text', 'Query_Chars', 'Response_Text', 
    'Response_Chars', 'Response_Time', 'Accuracy'
])

# METHODS
def update_accuracy(self, query_id, accuracy_score):
    mask = self.query_df['Query_ID'] == query_id
    if mask.any():
        self.query_df.loc[mask, 'Accuracy'] = accuracy_score
        print(f"Updated accuracy for {query_id}: {accuracy_score}")
    else:
        print(f"Query ID {query_id} not found")

def export_queries(self, filename="query_results.csv"):
    self.query_df.to_csv(filename, index=False)
    print(f"Query results exported to {filename}")

# METHODS IN INSTANCE
import types
qa.update_accuracy = types.MethodType(update_accuracy, qa)
qa.export_queries = types.MethodType(export_queries, qa)

# TEST
print(qa.query_df)

Empty DataFrame
Columns: [Query_ID, Query_Text, Query_Chars, Response_Text, Response_Chars, Response_Time, Accuracy]
Index: []


In [83]:
import types

def ask(self, query, query_id=None, context=None):
    """
    Main ask method that can optionally take a query_id parameter
    If query_id is not provided, auto-generates one
    """
    # If no query_id provided, auto-generate one
    if query_id is None:
        query_id = f"Q_{len(self.query_df) + 1}"
    
    if context is None:
        context = "Each document represents a task. All tasks belong to a same workflow execution trace. "
        context += "The time the task started is stored in the started_at. The time the task ended is stored in the ended_at. The task duration is ended_at - started_at for each task "
    
    # Prepare full query text
    full_query = f"{context}. {query}"
    
    # Time the query
    from time import time
    t0 = time()
    result = self.qa_chain({"query": full_query})
    response_time = time() - t0
    
    # Extract response text
    response_text = result["result"]
    
    # Calculate character counts
    query_chars = len(query)
    response_chars = len(response_text)
    
    # Add to tracking DataFrame
    import pandas as pd
    new_row = {
        'Query_ID': query_id,
        'Query_Text': query,
        'Query_Chars': query_chars,
        'Response_Text': response_text,
        'Response_Chars': response_chars,
        'Response_Time': response_time,
        'Accuracy': None  # To be filled manually
    }
    
    self.query_df = pd.concat([self.query_df, pd.DataFrame([new_row])], ignore_index=True)
    
    print(f"Q: {query}")
    print(response_text)
    print(f"---------------- I took {response_time:.1f} s to answer this.")
    print("\n\n")
    
    return result

# Replace the ask method on your existing instance
qa.ask = types.MethodType(ask, qa)

# Zero-Shot Prompts

In [148]:
qa.query_df = qa.query_df.iloc[0:0]  # Deletes all rows, but keeps columns. Able to use to wipe it for whatever reason.

In [149]:
qa.query_df

Unnamed: 0,Query_ID,Query_Text,Query_Chars,Response_Text,Response_Chars,Response_Time,Accuracy


In [63]:
# To turn df into csv
qa.query_df.to_csv("query_data.csv", index=False)

### What was in the input value?

In [109]:
result = qa.ask("What was in the input value?",query_id = 'DF-DL-Q01-NS-LC-Swallow-R01')

Q: What was in the input value?
The input value was **43**. 

You can find this information in the 'used' section of the first document, under the key 'arg_0'.
---------------- I took 6.1 s to answer this.





In [110]:
result = qa.ask("What was in the input value?",query_id = 'DF-DL-Q01-NS-LC-Swallow-R02')

Q: What was in the input value?
The input value was **43**. 

You can find this information in the 'used' section of the first document, under the key 'arg_0'.
---------------- I took 6.3 s to answer this.





In [111]:
result = qa.ask("What was in the input value?",query_id = 'DF-DL-Q01-NS-LC-Swallow-R03')

Q: What was in the input value?
The input value was **43**. 

You can find this information in the 'used' section of the first document, under the key 'arg_0'.
---------------- I took 6.1 s to answer this.





### How was the initial input I transformed in the first step?

In [112]:
result = qa.ask("How was the initial input I transformed in the first step?", query_id = "DF-DL-Q02-NS-LC-Swallow-R01")

Q: How was the initial input I transformed in the first step?
I don't know.
---------------- I took 5.9 s to answer this.





In [113]:
result = qa.ask("How was the initial input I transformed in the first step?", query_id = "DF-DL-Q02-NS-LC-Swallow-R02")

Q: How was the initial input I transformed in the first step?
I don't know.
---------------- I took 6.2 s to answer this.





In [114]:
result = qa.ask("How was the initial input I transformed in the first step?", query_id = "DF-DL-Q02-NS-LC-Swallow-R03")

Q: How was the initial input I transformed in the first step?
I don't know.
---------------- I took 5.9 s to answer this.





### What intermediate values were generated from input 'I'?

In [115]:
result = qa.ask("What intermediate values were generated from input 'I'?", query_id = "DF-DL-Q03-NS-LC-Swallow-R01")

Q: What intermediate values were generated from input 'I'?
The intermediate values generated from input 'I' are:

* **43** 

This value is found in the 'generated' field of the document with task_id '1750258167.173979'.
---------------- I took 6.4 s to answer this.





In [116]:
result = qa.ask("What intermediate values were generated from input 'I'?", query_id = "DF-DL-Q03-NS-LC-Swallow-R02")

Q: What intermediate values were generated from input 'I'?
The intermediate values generated from input 'I' are:

* **43** 

This value is found in the 'generated' field of the document with task_id '1750258167.173979'.
---------------- I took 6.2 s to answer this.





In [117]:
result = qa.ask("What intermediate values were generated from input 'I'?", query_id = "DF-DL-Q03-NS-LC-Swallow-R03")

Q: What intermediate values were generated from input 'I'?
The intermediate values generated from input 'I' are:

* **43** 

This value is found in the 'generated' field of the document with task_id '1750258167.173979'.
---------------- I took 6.3 s to answer this.





### What tasks contributed data to the final score 'A'

In [56]:
result = qa.ask("What tasks contributed data to the final score 'A'?", query_id = "DF-DL-Q08-NS-LC-Swallow-R01")

Q: What tasks contributed data to the final score 'A'?
I don't know.
---------------- I took 5.9 s to answer this.





In [57]:
result = qa.ask("What tasks contributed data to the final score 'A'?", query_id = "DF-DL-Q08-NS-LC-Swallow-R02")

Q: What tasks contributed data to the final score 'A'?
I don't know.
---------------- I took 6.0 s to answer this.





In [58]:
result = qa.ask("What tasks contributed data to the final score 'A'?", query_id = "DF-DL-Q08-NS-LC-Swallow-R03")

Q: What tasks contributed data to the final score 'A'?
I don't know.
---------------- I took 5.9 s to answer this.





In [122]:
query_suite = [
    {
        "base_id": "DF-DL-Q04-NS-LC-Swallow-R01",
        "query": "What were the outputs of the tasks 'H_TO_E', 'H_TO_F', and 'H_TO_G'?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "DF-DL-Q05-NS-LC-Swallow-R01", 
        "query": "How did the value of 'B' change compared to the value of 'D?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "DF-DL-Q06-NS-LC-Swallow-R01",
        "query": "Trace the lineage of the final output 'A', back to the input 'I'.",
        "runs": 3,
        "use_cot": False,  # Uses Chain of Thought prompting
        "context": None
    },
    {
        "base_id": "DF-DL-Q07-NS-LC-Swallow-R01",
        "query": "Trace the lineage of 'C', back to the input 'I'.",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
]

# Run all queries in the suite
all_results = run_query_suite(qa, query_suite)


Running: DF-DL-Q04-NS-LC-Swallow-R01
Query: What were the outputs of the tasks 'H_TO_E', 'H_TO_F', and 'H_TO_G'?
Q: What were the outputs of the tasks 'H_TO_E', 'H_TO_F', and 'H_TO_G'?
I don't have information about the tasks 'H_TO_E', 'H_TO_F', and 'H_TO_G' in the provided data.
---------------- I took 6.2 s to answer this.



Q: What were the outputs of the tasks 'H_TO_E', 'H_TO_F', and 'H_TO_G'?
I don't have information about the tasks 'H_TO_E', 'H_TO_F', and 'H_TO_G' in the provided data.
---------------- I took 6.4 s to answer this.



Q: What were the outputs of the tasks 'H_TO_E', 'H_TO_F', and 'H_TO_G'?
I don't have information about the tasks 'H_TO_E', 'H_TO_F', and 'H_TO_G' in the provided data.
---------------- I took 6.1 s to answer this.



Average Response Time: 6.20s
Average Character Count: 95

Running: DF-DL-Q05-NS-LC-Swallow-R01
Query: How did the value of 'B' change compared to the value of 'D?
Q: How did the value of 'B' change compared to the value of 'D?
I don't 

## Dataflow Transformation logic Queries

In [125]:
query_suite = [
    {
        "base_id": "DF-TL-Q01-NS-LC-Swallow",
        "query": "Which functions process data sequentially and which process data in parallel?",
        "runs": 1,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "DF-TL-Q02-NS-LC-Swallow",
        "query": "Is there any data transformation that involves conditional branching within a function?",
        "runs": 1,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "DF-TL-Q03-NS-LC-Swallow",
        "query": "Which task applied a logarithmic transformation?",
        "runs": 1,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "DF-TL-Q04-NS-LC-Swallow",
        "query": "Are there any tasks that square or root their inputs? Which ones?",
        "runs": 1,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "DF-TL-Q05-NS-LC-Swallow",
        "query": "Did any task apply a nonlinear transformation to its input?",
        "runs": 1,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "DF-TL-Q06-NS-LC-Swallow", 
        "query": "How does 'H_TO_F' process the input from 'H'?",
        "runs": 1,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "DF-TL-Q07-NS-LC-Swallow",
        "query": "How are the inputs of 'E_TO_D', 'F_TO_C', and 'G_TO_B' combined?",
        "runs": 1,
        "use_cot": False, 
        "context": None
    },
    {
        "base_id": "DF-TL-Q08-NS-LC-Swallow",
        "query": "Describe each transformation from task to task.",
        "runs": 1,
        "use_cot": False,
        "context": None
    }
]

# Run all queries in the suite
all_results = run_query_suite(qa, query_suite)


Running: DF-TL-Q01-NS-LC-Swallow
Query: Which functions process data sequentially and which process data in parallel?
Q: Which functions process data sequentially and which process data in parallel?
I can't answer that question based on the provided data. 

Here's why:

* **Lack of Information about Task Relationships:** The data only provides information about individual tasks, but doesn't indicate how they are connected or ordered within the workflow. 
* **No Indication of Parallelism:** There are no fields or values that explicitly state whether tasks are executed in parallel or sequentially.

To determine if tasks are processed sequentially or in parallel, you would need additional information about the workflow structure, such as:

* **Dependencies:**  Does one task depend on the completion of another?
* **Concurrency Control:** Are there mechanisms in place to allow multiple tasks to run at the same time?
* **Workflow Diagram:** A visual representation of the workflow would clea

In [None]:
import pandas as pd

# Load existing CSV
existing_df = pd.read_csv("query_data.csv")

# New data to overwrite first 15 rows
new_df = pd.DataFrame({
    'id': ['NEW-Q01'],
    'question': ['What is the new question?'],
    'answer': ['42'],
    'response_time': [5.678]
    # You can add more rows if needed
})

# Ensure all columns match between existing_df and new_df
all_columns = existing_df.columns.union(new_df.columns)
existing_df = existing_df.reindex(columns=all_columns)
new_df = new_df.reindex(columns=all_columns)

# Overwrite rows 0–14 (i.e., the first 15 rows) with as many rows as new_df has
num_rows_to_replace = min(len(new_df), 15)
existing_df.iloc[:num_rows_to_replace] = new_df.iloc[:num_rows_to_replace].values

# Save back to CSV
existing_df.to_csv("query_data.csv", index=False)


In [None]:
# Broke due to token limit. It got 2/3 of the iterations for the final query in this set. Just throw in another API key to fix this for now. 

## Dataflow: Inputs and Outputs of Tasks

In [153]:
query_suite = [
  #  {
   #     "base_id": "DF-INOP-Q01-NS-LC-R1-Swallow",
    #    "query": "What tasks take the value 'H' as input?",
     #   "runs": 3,
      #  "use_cot": False,
       # "context": None
   # },
    {
        "base_id": "DF-INOP-Q02-NS-LC-R1-Swallow",
        "query": "Was the output of I_TO_H used directly by multiple downstream tasks? If so what tasks?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "DF-INOP-Q03-NS-LC-R1-Swallow",
        "query": "Which tasks produce intermediate values that are consumed by other tasks?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "DF-INOP-Q04-NS-LC-R1-Swallow",
        "query": "How many values are combined to produce the final output 'A'?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "DF-INOP-Q05-NS-LC-R1-Swallow",
        "query": "Did any task produce more than one output?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "DF-INOP-Q06-NS-LC-R1-Swallow", 
        "query": "What is the data type of the output produced by 'I_TO_H'?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "DF-INOP-Q07-NS-LC-R1-Swallow",
        "query": "What are the output values of 'E_TO_D', 'F_TO_C', and 'G_TO_B'?",
        "runs": 3,
        "use_cot": False, 
        "context": None
    },
    {
        "base_id": "DF-INOP-Q08-NS-LC-R1-Swallow",
        "query": "How can 'H_TO_G's output change given the input?",
        "runs": 3,
        "use_cot": False,
        "context": None
    }
]

# Run all queries in the suite
all_results = run_query_suite(qa, query_suite)


Running: DF-INOP-Q02-NS-LC-R1-Swallow
Query: Was the output of I_TO_H used directly by multiple downstream tasks? If so what tasks?


RuntimeError: ('Sambanova /complete call failed with status code 429.', '{"error":{"code":null,"message":"Rate limit exceeded","param":null,"type":"rate_limit_exceeded"}}\n.')

In [127]:
qa.query_df.tail()

Unnamed: 0,Query_ID,Query_Text,Query_Chars,Response_Text,Response_Chars,Response_Time,Accuracy
48,DF-TL-Q04-NS-LC-Swallow-R01,Are there any tasks that square or root their ...,65,I can't tell you which tasks square or root th...,274,6.375111,
49,DF-TL-Q05-NS-LC-Swallow-R01,Did any task apply a nonlinear transformation ...,59,I don't know.,13,6.017566,
50,DF-TL-Q06-NS-LC-Swallow-R01,How does 'H_TO_F' process the input from 'H'?,45,I don't know.,13,6.108374,
51,DF-TL-Q07-NS-LC-Swallow-R01,"How are the inputs of 'E_TO_D', 'F_TO_C', and ...",64,I don't know.,13,6.481077,
52,DF-TL-Q08-NS-LC-Swallow-R01,Describe each transformation from task to task.,47,"Based on the provided data, here's a descripti...",1801,8.203693,


## Dataflow: Function Level Tracing

In [67]:
query_suite = [
    {
        "base_id": "DF-FLT-Q01-NS-LC-R1-Swallow",
        "query": "What is the role of the 'DCB_TO_A' function in data-flow?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "DF-FLT-Q02-NS-LC-R1-Swallow",
        "query": "How does the task 'H_TO_G' alter downstream values?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "DF-FLT-Q03-NS-LC-R1-Swallow",
        "query": "Are there any functions that perform conditional branching or dynamic transformations?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "DF-FLT-Q04-NS-LC-R1-Swallow",
        "query": "How many tasks/ functions does the data flow pass through before the final output?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "DF-FLT-Q05-NS-LC-R1-Swallow",
        "query": "Which functions produce outputs that are inputs to more than one downstream task?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "DF-FLT-Q06-NS-LC-R1-Swallow", 
        "query": "What relationship exists between the output of 'H_TO_E' and the input 'E_TO_D'?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "DF-FLT-Q07-NS-LC-R1-Swallow",
        "query": "Which functions handle parallel data transformations?",
        "runs": 3,
        "use_cot": False, 
        "context": None
    },
    {
        "base_id": "DF-FLT-Q08-NS-LC-R1-Swallow",
        "query": "Which functions process data sequentially, and which process data in parallel?",
        "runs": 3,
        "use_cot": False,
        "context": None
    }
]

# Run all queries in the suite
all_results = run_query_suite(qa, query_suite)


Running: DF-FLT-Q01-NS-LC-R1-Swallow
Query: What is the role of the 'DCB_TO_A' function in data-flow?
Q: What is the role of the 'DCB_TO_A' function in data-flow?
I don't know.
---------------- I took 6.4 s to answer this.



Q: What is the role of the 'DCB_TO_A' function in data-flow?
I don't know.
---------------- I took 6.0 s to answer this.



Q: What is the role of the 'DCB_TO_A' function in data-flow?
I don't know.
---------------- I took 6.0 s to answer this.



Average Response Time: 6.13s
Average Character Count: 13

Running: DF-FLT-Q02-NS-LC-R1-Swallow
Query: How does the task 'H_TO_G' alter downstream values?
Q: How does the task 'H_TO_G' alter downstream values?
I don't have enough information to answer that question. 

The provided documents contain information about tasks, their execution times, and some system metrics, but they don't explain how tasks interact with each other or how the output of one task influences the input of another. 

To understand how 'H_TO_G' alt

In [None]:
# To Run on Monday

In [None]:
# Monday, Finish NS Queries. Put them all in a csv from this df so I can better analyze them.
# Begin / Finish FS and CoT. Ask Renan about if I should do FS-WIK-WCON-ROLE | FS-NOWIK-WCON-ROLE | FS-NOWIK-NOWCON-ROLE | FS-WIK-NOWCON-NOROLE | FS-WIK-WCON-NOROLE type of thing or is that to complicated. 
# Also ask him if I should do a true no-shot as in NO CONTEXT AT all, as these above were ran with limited context being what he wrote in Flowcept. Would that be worth doing?
# 

# Scheduling Data Queries

In [99]:
query_suite = [
    {
        "base_id": "S-NA-Q01-NS-LC-R1-Swallow",
        "query": "On which node or machine did the task G_TO_B run?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "S-NA-Q02-NS-LC-R1-Swallow",
        "query": "Where was the task DCB_TO_A executed?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "S-NA-Q03-NS-LC-R1-Swallow",
        "query": "Did all the tasks in the workflow run on the same node?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "S-NA-Q04-NS-LC-R1-Swallow",
        "query": "How long did each task wait before it was scheduled?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "S-NA-Q05-NS-LC-R1-Swallow",
        "query": "Which task had the longest delay between being ready and being scheduled?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "S-NA-Q06-NS-LC-R1-Swallow", 
        "query": "Where there any idle periods on any node during execution?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "S-NA-Q07-NS-LC-R1-Swallow",
        "query": "Did the scheduler assign any tasks to the same resource consecutively?",
        "runs": 3,
        "use_cot": False, 
        "context": None
    },
    {
        "base_id": "S-NA-Q08-NS-LC-R1-Swallow",
        "query": "Were any tasks exeucted in parllel across different processes?",
        "runs": 3,
        "use_cot": False,
        "context": None
    }
]

# Run all queries in the suite
all_results = run_query_suite(qa, query_suite)


Running: S-NA-Q01-NS-LC-R1-Swallow
Query: On which node or machine did the task G_TO_B run?


RuntimeError: ('Sambanova /complete call failed with status code 429.', '{"error":{"code":null,"message":"Rate limit exceeded","param":null,"type":"rate_limit_exceeded"}}\n.')

In [None]:
# Need to change API KEY TO RUN AGAIN. RAN OUTTA TOKENS

# Telemetry Queries: Task Duration & Timing

In [None]:
query_suite = [
    {
        "base_id": "T-TDT-Q01-NS-LC-R1-Swallow",
        "query": "How long did the 'F_TO_C' task take to execute?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "T-TDT-Q02-NS-LC-R1-Swallow",
        "query": "Order the tasks from longest to shortest execution time.",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "T-TDT-Q03-NS-LC-R1-Swallow",
        "query": "Which task had the largest influence on overall runtime?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "T-TDT-Q04-NS-LC-R1-Swallow",
        "query": "What was the runtime of the entire workflow?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "T-TDT-Q05-NS-LC-R1-Swallow",
        "query": "Which stage of the workflow was most time-consuming?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "T-TDT-Q06-NS-LC-R1-Swallow", 
        "query": "What is the standard deviation of task durations across the workflow?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "T-TDT-Q07-NS-LC-R1-Swallow",
        "query": "What was the average duration of tasks in the parallel stage?",
        "runs": 3,
        "use_cot": False, 
        "context": None
    },
    {
        "base_id": "T-TDT-Q08-NS-LC-R1-Swallow",
        "query": "How much time elapsed between 'I_TO_H' and 'G_TO_B'?",
        "runs": 3,
        "use_cot": False,
        "context": None
    }
]

# Run all queries in the suite
all_results = run_query_suite(qa, query_suite)

# Resource Usage

In [None]:
query_suite = [
    {
        "base_id": "T-RU-Q01-NS-LC-R1-Swallow",
        "query": "What was the peak memory usage during the task 'G_TO_B'?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "T-RU-Q02-NS-LC-R1-Swallow",
        "query": "What task had the highest CPU utilization?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "T-RU-Q03-NS-LC-R1-Swallow",
        "query": "Which task used the most RAM?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "T-RU-Q04-NS-LC-R1-Swallow",
        "query": "What were the resource usage metrics for 'E_TO_D'?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "T-RU-Q05-NS-LC-R1-Swallow",
        "query": "Did any tasks exceed their allocated memory limits?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "T-RU-Q06-NS-LC-R1-Swallow", 
        "query": "Did any task experience memory spikes?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "T-RU-Q07-NS-LC-R1-Swallow",
        "query": "Was the system under high load during the execution of the parallel stage?",
        "runs": 3,
        "use_cot": False, 
        "context": None
    },
    {
        "base_id": "T-RU-Q08-NS-LC-R1-Swallow",
        "query": "I want the mean, median, and mode of CPU usage across the workflow by task.",
        "runs": 3,
        "use_cot": False,
        "context": None
    }
]

# Run all queries in the suite
all_results = run_query_suite(qa, query_suite)

# Errors & Execeptions

In [None]:
query_suite = [
    {
        "base_id": "T-EE-Q01-NS-LC-R1-Swallow",
        "query": "Did any task fail with an exception during execution?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "T-EE-Q02-NS-LC-R1-Swallow",
        "query": "Were any runtime errors logged during the execution of the workflow?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "T-EE-Q03-NS-LC-R1-Swallow",
        "query": "What exception occured in the 'H_TO_G' function?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "T-EE-Q04-NS-LC-R1-Swallow",
        "query": "Did any function raise a ValueError or TypeError?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "T-EE-Q05-NS-LC-R1-Swallow",
        "query": "Which task caused the workflow to terminate early?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "T-EE-Q06-NS-LC-R1-Swallow", 
        "query": "Were any partial results produced despite an error in one branch?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "T-EE-Q07-NS-LC-R1-Swallow",
        "query": "Were any downstream tasks skipped due to a failure upstream?",
        "runs": 3,
        "use_cot": False, 
        "context": None
    },
    {
        "base_id": "T-EE-Q08-NS-LC-R1-Swallow",
        "query": "How many tasks reported failures in the telemetry logs, and what were their associated error messages?",
        "runs": 3,
        "use_cot": False,
        "context": None
    }
]

# Run all queries in the suite
all_results = run_query_suite(qa, query_suite)

# ControlFlow: Execution Order

In [None]:
query_suite = [
    {
        "base_id": "CF-EO-Q01-NS-LC-R1-Swallow",
        "query": "What was the first task executed in the workflow?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "CF-EO-Q02-NS-LC-R1-Swallow",
        "query": "What was the final task executed in the workflow?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "CF-EO-Q03-NS-LC-R1-Swallow",
        "query": "What step was executed after 'I_TO_H'?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "CF-EO-Q04-NS-LC-R1-Swallow",
        "query": "Did 'F_TO_C' execute before or after 'G_TO_B?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "CF-EO-Q05-NS-LC-R1-Swallow",
        "query": "In what order were D,C, and B produced?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "CF-EO-Q06-NS-LC-R1-Swallow", 
        "query": "List the complete execution order of all tasks in the workflow?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "CF-EO-Q07-NS-LC-R1-Swallow",
        "query": "Were any steps executed more than once? ",
        "runs": 3,
        "use_cot": False, 
        "context": None
    },
    {
        "base_id": "CF-EO-Q08-NS-LC-R1-Swallow",
        "query": "Which steps ran in parallel after H was computed?",
        "runs": 3,
        "use_cot": False,
        "context": None
    }
]

# Run all queries in the suite
all_results = run_query_suite(qa, query_suite)

# Dependencies

In [None]:
query_suite = [
    {
        "base_id": "CF-D-Q01-NS-LC-R1-Swallow",
        "query": "What are the direct predecessors of 'I_TO_H'?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "CF-D-Q02-NS-LC-R1-Swallow",
        "query": "Which task(s) could not start until 'H_TO_F' was complete?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "CF-D-Q03-NS-LC-R1-Swallow",
        "query": "Which earlier task did 'DCB_TO_A' depend on?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "CF-D-Q04-NS-LC-R1-Swallow",
        "query": "Which task triggered the execution of 'H_TO_E'?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "CF-D-Q05-NS-LC-R1-Swallow",
        "query": "Was 'E_TO_D' apart of a sequential or branched path?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "CF-D-Q06-NS-LC-R1-Swallow", 
        "query": "How is 'G_TO_B' related to 'I_TO_H' in the control-flow path?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "CF-D-Q07-NS-LC-R1-Swallow",
        "query": "What dependencies exist between the H node and the final node result of A? ",
        "runs": 3,
        "use_cot": False, 
        "context": None
    },
    {
        "base_id": "CF-D-Q08-NS-LC-R1-Swallow",
        "query": "How many tasks depend on another task?",
        "runs": 3,
        "use_cot": False,
        "context": None
    }
]

# Run all queries in the suite
all_results = run_query_suite(qa, query_suite)

# Failure & Error Handling

In [None]:
query_suite = [
    {
        "base_id": "CF-FEH-Q01-NS-LC-R1-Swallow",
        "query": "Did any task in the workflow fail?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "CF-FEH-Q02-NS-LC-R1-Swallow",
        "query": "Did any tasks raise an error during execution?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "CF-FEH-Q03-NS-LC-R1-Swallow",
        "query": "Were any tasks skipped entirely?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "CF-FEH-Q04-NS-LC-R1-Swallow",
        "query": "Was the workflow able to complete successfully despite any failure?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "CF-FEH-Q05-NS-LC-R1-Swallow",
        "query": "If 'H_TO_G' had failed, what downstream tasks would have been affected?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "CF-FEH-Q06-NS-LC-R1-Swallow", 
        "query": "What tasks failed and what were their immediate predecessors?",
        "runs": 3,
        "use_cot": False,
        "context": None
    },
    {
        "base_id": "CF-FEH-Q07-NS-LC-R1-Swallow",
        "query": "What is the reason that 'H_TO_F' would fail?",
        "runs": 3,
        "use_cot": False, 
        "context": None
    },
    {
        "base_id": "CF-FEH-Q08-NS-LC-R1-Swallow",
        "query": "Are there any tasks more liable to fail than others, if so why?",
        "runs": 3,
        "use_cot": False,
        "context": None
    }
]

# Run all queries in the suite
all_results = run_query_suite(qa, query_suite)

In [None]:
# RUN TO MAKE INTO A CSV AFTER IT IS ALL RAN ONCE
df.to_csv("qa.query_df", index=False)

In [8]:
q = qa.ask(" How many tasks are there?", context = 'None')

  result = self.qa_chain({"query": f"{context}. {query}"})


Q:  How many tasks are there?
There are 3 tasks.
---------------- I took 7.1 s to answer this.





In [15]:
result = benchmark_query(qa, "How many tasks are there?", use_cot=False)
print(result["average_char_count"])
print(result["average_response_time_sec"])


Q: How many tasks are there?
There are 3 tasks.
---------------- I took 7.6 s to answer this.



Q: How many tasks are there?
There are 3 tasks.
---------------- I took 6.5 s to answer this.



Q: How many tasks are there?
There are 3 tasks.
---------------- I took 4.7 s to answer this.



Q: How many tasks are there?
There are 3 tasks.
---------------- I took 7.2 s to answer this.



Q: How many tasks are there?
There are 3 tasks.
---------------- I took 7.4 s to answer this.



18.0
6.693453311920166


#### Few-Shot "How many tasks are there?" with added context

In [64]:
result = benchmark_query(qa,"""
Q1: @task def a(): ... @task def b(): ... How many tasks? 
A1: 2

Q2: @flowcept_task def foo(): return 1 
@flowcept_task def bar(): return 2 
How many tasks? 
A2: 2

Q3: @flowcept_task def I_TO_H(...), H_TO_E(...), ..., DCB_TO_A(...) 
How many tasks? 
A3:
""")
print(result["average_char_count"])
print(result["average_response_time_sec"])

Q: 
Q1: @task def a(): ... @task def b(): ... How many tasks? 
A1: 2

Q2: @flowcept_task def foo(): return 1 
@flowcept_task def bar(): return 2 
How many tasks? 
A2: 2

Q3: @flowcept_task def I_TO_H(...), H_TO_E(...), ..., DCB_TO_A(...) 
How many tasks? 
A3:

A3: 8
---------------- I took 7.2 s to answer this.



Q: 
Q1: @task def a(): ... @task def b(): ... How many tasks? 
A1: 2

Q2: @flowcept_task def foo(): return 1 
@flowcept_task def bar(): return 2 
How many tasks? 
A2: 2

Q3: @flowcept_task def I_TO_H(...), H_TO_E(...), ..., DCB_TO_A(...) 
How many tasks? 
A3:

A3: 8
---------------- I took 6.1 s to answer this.



Q: 
Q1: @task def a(): ... @task def b(): ... How many tasks? 
A1: 2

Q2: @flowcept_task def foo(): return 1 
@flowcept_task def bar(): return 2 
How many tasks? 
A2: 2

Q3: @flowcept_task def I_TO_H(...), H_TO_E(...), ..., DCB_TO_A(...) 
How many tasks? 
A3:

A3: 8
---------------- I took 6.1 s to answer this.



Q: 
Q1: @task def a(): ... @task def b(): ... How ma

In [19]:
q = qa.ask("""
Q1: @task def a(): ... @task def b(): ... How many tasks? 
A1: 2

Q2: @flowcept_task def foo(): return 1 
@flowcept_task def bar(): return 2 
How many tasks? 
A2: 2

Q3: @flowcept_task def I_TO_H(...), H_TO_E(...), ..., DCB_TO_A(...) 
How many tasks? 
A3:
""")

Q: 
Q1: @task def a(): ... @task def b(): ... How many tasks? 
A1: 2

Q2: @flowcept_task def foo(): return 1 
@flowcept_task def bar(): return 2 
How many tasks? 
A2: 2

Q3: @flowcept_task def I_TO_H(...), H_TO_E(...), ..., DCB_TO_A(...) 
How many tasks? 
A3:

A3: 8
---------------- I took 5.9 s to answer this.





#### ReAct "How many tasks are there?" With context and prompting for reasoning through the response

In [22]:
react_prompt = """
You are an expert in analyzing Python workflows. Your job is to count how many tasks are present in a given code.

Here is the code:
@flowcept_task
def I_TO_H(input_value): return (input_value * 3) + 7
@flowcept_task
def H_TO_E(h_value): return (h_value ** 2) / 4
@flowcept_task
def H_TO_F(h_value): return math.sqrt(abs(h_value)) * 6
@flowcept_task
def H_TO_G(h_value): return h_value - 5 if h_value > 10 else h_value + 3
@flowcept_task
def E_TO_D(e_value): return e_value ** 2 - 1
@flowcept_task
def F_TO_C(f_value): return math.log(f_value) + 7
@flowcept_task
def G_TO_B(g_value): return g_value ** 1.5
@flowcept_task
def DCB_TO_A(d, c, b): return (d + c + b) / 3

Step-by-step reasoning:
1. I will count the number of functions decorated with @flowcept_task.
2. Each such function represents a unique task in the workflow.
3. Let’s count them:
   - I_TO_H
   - H_TO_E
   - H_TO_F
   - H_TO_G
   - E_TO_D
   - F_TO_C
   - G_TO_B
   - DCB_TO_A
4. 

Answer:
"""

q = qa.ask(react_prompt)

Q: 
You are an expert in analyzing Python workflows. Your job is to count how many tasks are present in a given code.

Here is the code:
@flowcept_task
def I_TO_H(input_value): return (input_value * 3) + 7
@flowcept_task
def H_TO_E(h_value): return (h_value ** 2) / 4
@flowcept_task
def H_TO_F(h_value): return math.sqrt(abs(h_value)) * 6
@flowcept_task
def H_TO_G(h_value): return h_value - 5 if h_value > 10 else h_value + 3
@flowcept_task
def E_TO_D(e_value): return e_value ** 2 - 1
@flowcept_task
def F_TO_C(f_value): return math.log(f_value) + 7
@flowcept_task
def G_TO_B(g_value): return g_value ** 1.5
@flowcept_task
def DCB_TO_A(d, c, b): return (d + c + b) / 3

Step-by-step reasoning:
1. I will count the number of functions decorated with @flowcept_task.
2. Each such function represents a unique task in the workflow.
3. Let’s count them:
   - I_TO_H
   - H_TO_E
   - H_TO_F
   - H_TO_G
   - E_TO_D
   - F_TO_C
   - G_TO_B
   - DCB_TO_A
4. 

Answer:

There are **8** tasks in the given c

## Given the final result A, what were the values of tasks D,C,and B used to compute it. Zero-Shot, Few-Shot

In [65]:
# Zero Shot
result = benchmark_query(qa, "Given the final result A, what were the values of tasks D, C, and B used to compute it?")
print(result["average_char_count"])
print(result["average_response_time_sec"])


Q: Given the final result A, what were the values of tasks D, C, and B used to compute it?
Based on the provided data, here's how to determine the values of tasks D, C, and B used to compute the final result A:

* **Understanding the Data Structure:**

Each document represents a task execution with details like task ID, activity ID, start/end times, and input/output values (`used` and `generated`).

* **Tracing the Workflow:**

1. **Identify Task A:** Look for the document with `activity_id` equal to the final activity in your workflow (likely "A" based on your question).

2. **Find Predecessors:**  Examine the `used` field of Task A. This will contain the input values it received. These values should correspond to the outputs (`generated`) of tasks D, C, and B.

3. **Locate Tasks D, C, and B:** Search for documents with `activity_id` matching "D", "C", and "B".  Their `generated` fields will hold the values used by Task A.

**Example:**

Let's say Task A's `used` field contains:

```j

In [66]:
# Few-Shot
result = benchmark_query(qa, """This workflow uses @flowcept_task decorators to define each task. 
For example, @flowcept_task def step_one(): ... is a task.
Workflow:
@flowcept_task def I_TO_H(...)  
@flowcept_task def H_TO_E(...)  
@flowcept_task def H_TO_F(...)  
@flowcept_task def H_TO_G(...)  
@flowcept_task def E_TO_D(...)  
@flowcept_task def F_TO_C(...)  
@flowcept_task def G_TO_B(...)  
@flowcept_task def DCB_TO_A(...)  
Given the final result A, what were the values of tasks D, C, and B used to compute it?""")
print(result["average_char_count"])
print(result["average_response_time_sec"])


Q: This workflow uses @flowcept_task decorators to define each task. 
For example, @flowcept_task def step_one(): ... is a task.
Workflow:
@flowcept_task def I_TO_H(...)  
@flowcept_task def H_TO_E(...)  
@flowcept_task def H_TO_F(...)  
@flowcept_task def H_TO_G(...)  
@flowcept_task def E_TO_D(...)  
@flowcept_task def F_TO_C(...)  
@flowcept_task def G_TO_B(...)  
@flowcept_task def DCB_TO_A(...)  
Given the final result A, what were the values of tasks D, C, and B used to compute it?
Based on the provided data, here are the values used to compute the final result A:

* **Task D:**  The value of task D is **10.672359527074835**. This is found in the `generated` section of the task with `activity_id` 'E_TO_D'.

* **Task C:** The value of task C is **234.2477321128211**. This is found in the `generated` section of the task with `activity_id` 'F_TO_C'.

* **Task B:** The value of task B is **38**. This is found in the `generated` section of the task with `activity_id` 'G_TO_B'. 


Let 

In [127]:
# CoT
result = benchmark_query(qa, "Given the final result A, what were the values of tasks D, C, and B used to compute it?", use_cot=True, context = WFC )
print(result["average_char_count"])
print(result["average_response_time_sec"])

Q: 

Given the final result A, what were the values of tasks D, C, and B used to compute it?

You are analyzing a mathematical workflow composed of multiple tasks. 
Each task processes data and passes it to the next task. 
Tasks have names like I_TO_H and H_TO_G. Data flows through tasks in a directed graph based on dependencies. Workflow = workflow.py

To answer questions, follow these reasoning steps:
1. Identify the task being asked about.
2. Trace its inputs: which task produced the data it consumed?
3. Trace its outputs: what does it return and where does it go?
4. Use task definitions to compute or infer results.
5. If the workflow failed, identify the last successful task, then check inputs and errors in the failed task.

Always explain your reasoning step by step before providing a final answer.

Let's break down how to find the values of tasks D, C, and B used to compute the final result A.

**1. Identify the Task Being Asked About**

We're looking for the values of tasks D, C

## What was the initial input I, and what task transformations were performed to arrive at output A?

In [67]:
# Zero-Shot
result = benchmark_query(qa, "What was the initial input I, and what task transformations were performed to arrive at output A?")
print(result["average_char_count"])
print(result["average_response_time_sec"])


Q: What was the initial input I, and what task transformations were performed to arrive at output A?
I can't answer that question. The provided data doesn't contain information about the initial input I or the task transformations performed to arrive at output A.
---------------- I took 7.1 s to answer this.



Q: What was the initial input I, and what task transformations were performed to arrive at output A?
I can't answer that question. The provided data doesn't contain information about the initial input I or the task transformations performed to arrive at output A.
---------------- I took 6.3 s to answer this.



Q: What was the initial input I, and what task transformations were performed to arrive at output A?
I can't answer that question. The provided data doesn't contain information about the initial input I or the task transformations performed to arrive at output A.
---------------- I took 6.3 s to answer this.



Q: What was the initial input I, and what task transformation

In [128]:
# Few-Shot
result = benchmark_query(qa, """This workflow uses @flowcept_task decorators to define each task. 
For example, @flowcept_task def step_one(): ... is a task
Workflow:
@flowcept_task def I_TO_H(...)  
@flowcept_task def H_TO_E(...)  
@flowcept_task def H_TO_F(...)  
@flowcept_task def H_TO_G(...)  
@flowcept_task def E_TO_D(...)  
@flowcept_task def F_TO_C(...)  
@flowcept_task def G_TO_B(...)  
@flowcept_task def DCB_TO_A(...)  

**Question:** Given final output A, what was the initial input I and what transformations were performed step-by-step to produce A?)""")
print(result["average_char_count"])
print(result["average_response_time_sec"])


Q: This workflow uses @flowcept_task decorators to define each task. 
For example, @flowcept_task def step_one(): ... is a task
Workflow:
@flowcept_task def I_TO_H(...)  
@flowcept_task def H_TO_E(...)  
@flowcept_task def H_TO_F(...)  
@flowcept_task def H_TO_G(...)  
@flowcept_task def E_TO_D(...)  
@flowcept_task def F_TO_C(...)  
@flowcept_task def G_TO_B(...)  
@flowcept_task def DCB_TO_A(...)  

**Question:** Given final output A, what was the initial input I and what transformations were performed step-by-step to produce A?)
Based on the provided workflow definition, here's how the transformations occur step-by-step to produce output A from initial input I:

1. **I_TO_H:** The initial input `I` is processed by the task `I_TO_H`.

2. **H_TO_E, H_TO_F, H_TO_G:** The output from `I_TO_H` (let's call it `H`) is then split into three parallel paths:
   - `H_TO_E` processes `H` to produce `E`.
   - `H_TO_F` processes `H` to produce `F`.
   - `H_TO_G` processes `H` to produce `G`.

3. 

In [23]:
q = qa.ask('How do you get 8 tasks, and what are they?')

Q: How do you get 8 tasks, and what are they?
You provided 8 documents, each representing a task. 

Here are the tasks and their durations:

1. **Task ID:** 1749217820.748001
   - **Activity ID:** DCB_TO_A
   - **Duration:** 2 milliseconds (ended_at - started_at)

2. **Task ID:** 1749217820.7363381
   - **Activity ID:** F_TO_C
   - **Duration:** 2 milliseconds

3. **Task ID:** 1749217820.7026298
   - **Activity ID:** I_TO_H
   - **Duration:** 20 milliseconds

4. **Task ID:** 1749217820.7422442
   - **Activity ID:** G_TO_B
   - **Duration:** 3 milliseconds

5. **Task ID:** 1749217820.748001
   - **Activity ID:** DCB_TO_A
   - **Duration:** 2 milliseconds

6. **Task ID:** 1749217820.7363381
   - **Activity ID:** F_TO_C
   - **Duration:** 2 milliseconds

7. **Task ID:** 1749217820.7026298
   - **Activity ID:** I_TO_H
   - **Duration:** 20 milliseconds

8. **Task ID:** 1749217820.7422442
   - **Activity ID:** G_TO_B
   - **Duration:** 3 milliseconds 


Let me know if you have any other que

In [8]:
q = qa.ask('Tell me the entire workflow, how it goes from I to A ?')

Q: Tell me the entire workflow, how it goes from I to A ?
Based on the provided data, here's how the workflow progresses from I to A:

1. **I_TO_H:** This task starts at `2025-06-06 13:50:20.730000` and ends at `2025-06-06 13:50:20.733000`. It has an `activity_id` of "I_TO_H".

2. **F_TO_C:** This task starts at `2025-06-06 13:50:20.736000` and ends at `2025-06-06 13:50:20.738000`. It has an `activity_id` of "F_TO_C".

3. **DCB_TO_A:** This task starts at `2025-06-06 13:50:20.748000` and ends at `2025-06-06 13:50:20.750000`. It has an `activity_id` of "DCB_TO_A".

**Therefore, the workflow progresses as follows:**

I_TO_H -> F_TO_C -> DCB_TO_A 

Let me know if you have any other questions about the workflow!
---------------- I took 7.6 s to answer this.





# Which tasks were executed in parallel after task H, and what were their output values?

In [83]:
# Zero-shot
result = benchmark_query(qa, "Which tasks were executed in parallel after task H, and what were their output values?")
print(result["average_char_count"])
print(result["average_response_time_sec"])


Q: Which tasks were executed in parallel after task H, and what were their output values?
I don't know.
---------------- I took 7.1 s to answer this.



Q: Which tasks were executed in parallel after task H, and what were their output values?
I don't know.
---------------- I took 6.0 s to answer this.



Q: Which tasks were executed in parallel after task H, and what were their output values?
I don't know.
---------------- I took 5.8 s to answer this.



Q: Which tasks were executed in parallel after task H, and what were their output values?
I don't know.
---------------- I took 6.0 s to answer this.



Q: Which tasks were executed in parallel after task H, and what were their output values?
I don't know.
---------------- I took 5.8 s to answer this.



13.0
6.168262147903443


In [100]:
# Few-shot
result = benchmark_query(qa,"""This workflow uses @flowcept_task decorators to define each task. 
For example, @flowcept_task def I_TO_H(input_value): ... is a task.

Workflow:
@flowcept_task def I_TO_H(...)  
@flowcept_task def H_TO_E(...)  
@flowcept_task def H_TO_F(...)  
@flowcept_task def H_TO_G(...)  
@flowcept_task def E_TO_D(...)  
@flowcept_task def F_TO_C(...)  
@flowcept_task def G_TO_B(...)  
@flowcept_task def DCB_TO_A(...)  

Which tasks were executed in parallel after task H, and what were their output values?""")
print(result["average_char_count"])
print(result["average_response_time_sec"])

Q: This workflow uses @flowcept_task decorators to define each task. 
For example, @flowcept_task def step_one(): ... is a task.

Workflow:
@flowcept_task def I_TO_H(...)  
@flowcept_task def H_TO_E(...)  
@flowcept_task def H_TO_F(...)  
@flowcept_task def H_TO_G(...)  
@flowcept_task def E_TO_D(...)  
@flowcept_task def F_TO_C(...)  
@flowcept_task def G_TO_B(...)  
@flowcept_task def DCB_TO_A(...)  

Which tasks were executed in parallel after task H, and what were their output values?
Based on the provided workflow definition, the tasks executed in parallel after task H are:

* **H_TO_E**
* **H_TO_F**
* **H_TO_G**

The output values for these tasks are:

* **H_TO_E:**  `arg_0` = 39.344631145812
* **H_TO_F:** `arg_0` = 10.672359527074835
* **H_TO_G:** `arg_0` = 234.2477321128211
---------------- I took 6.7 s to answer this.



Q: This workflow uses @flowcept_task decorators to define each task. 
For example, @flowcept_task def step_one(): ... is a task.

Workflow:
@flowcept_task def

# CoT Prompts

In [121]:
qcot = qa.ask(cot_prompt("Track, trace, and provide the H_TO_G task and input.", context = WFC))

Q: 
<function Workflow.run at 0x7fa34e8cc2c0>
Track, trace, and provide the H_TO_G task and input.

You are analyzing a mathematic workflow composed of multiple tasks. 
Each task processes data and passes it to the next task. 
Tasks have names like I_TO_H and H_TO_G. Data flows through tasks in a directed graph based on dependencies. Workflow = workflow.py

To answer questions, follow these reasoning steps:
1. Identify the task being asked about.
2. Trace its inputs: which task produced the data it consumed?
3. Trace its outputs: what does it return and where does it go?
4. Use task definitions to compute or infer results.
5. If the workflow failed, identify the last successful task, then check inputs and errors in the failed task.

Always explain your reasoning step by step before providing a final answer.

Let's break down the task you're asking about and trace its inputs and outputs.

**1. Identify the task:**

You're asking about the `H_TO_G` task.

**2. Trace its inputs:**

Lookin

In [19]:
qcot = qa.ask(cot_prompt("Track, trace, and provide the H_TO_G task and input.", context = 'None'))

Q: 
None
Track, trace, and provide the H_TO_G task and input.

You are analyzing a workflow composed of multiple tasks. 
Each task processes data and passes it to the next task. 
Tasks have names like I_TO_H and H_TO_G. Data flows through tasks in a directed graph based on dependencies. Workflow = workflow.py

To answer questions, follow these reasoning steps:
1. Identify the task being asked about.
2. Trace its inputs: which task produced the data it consumed?
3. Trace its outputs: what does it return and where does it go?
4. Use task definitions to compute or infer results.
5. If the workflow failed, identify the last successful task, then check inputs and errors in the failed task.

Always explain your reasoning step by step before providing a final answer.

Let's break down the problem and find the H_TO_G task and its input.

1. **Identify the Task:** We're looking for the task named "H_TO_G".

2. **Trace its Inputs:** To find the input for H_TO_G, we need to look for the task that

In [18]:
qcot = qa.ask(cot_prompt("Was 'E_TO_D' apart of a sequential or branched path?"))

Q: 
<function Workflow.run at 0x7fd408c10400>
Was 'E_TO_D' apart of a sequential or branched path?

You are analyzing a mathematical workflow composed of multiple tasks. 
Each task processes data and passes it to the next task. 
Tasks have names like I_TO_H and H_TO_G. Data flows through tasks in a directed graph based on dependencies. Workflow = workflow.py

To answer questions, follow these reasoning steps:
1. Identify the task being asked about.
2. Trace its inputs: which task produced the data it consumed?
3. Trace its outputs: what does it return and where does it go?
4. Use task definitions to compute or infer results.
5. If the workflow failed, identify the last successful task, then check inputs and errors in the failed task.

Always explain your reasoning step by step before providing a final answer.

Let's analyze the workflow to determine if 'E_TO_D' was part of a sequential or branched path.

1. **Identify the Task:** We are looking for the task named 'E_TO_D'.

2. **Trace 

In [22]:
qcot = qa.ask(cot_prompt("Was 'E_TO_D' apart of a sequential or branched path?", context = WFC))

Q: 
<function Workflow.run at 0x7fd408c10400>
Was 'E_TO_D' apart of a sequential or branched path?

You are analyzing a mathematical workflow composed of multiple tasks. 
Each task processes data and passes it to the next task. 
Tasks have names like I_TO_H and H_TO_G. Data flows through tasks in a directed graph based on dependencies. Workflow = WFC, which is in your context parameter.

To answer questions, follow these reasoning steps:
1. Identify the task being asked about.
2. Trace its inputs: which task produced the data it consumed?
3. Trace its outputs: what does it return and where does it go?
4. Use task definitions to compute or infer results.
5. If the workflow failed, identify the last successful task, then check inputs and errors in the failed task.

Always explain your reasoning step by step before providing a final answer.

Let's analyze the workflow to determine if 'E_TO_D' was part of a sequential or branched path.

1. **Identify the Task:** We are looking at the task 

In [31]:
qcot = qa.ask(cot_prompt("Was 'E_TO_D' apart of a sequential or branched path?", context = WFS ))

Q: 
 I_TO_H is the entry point, branching into three parallel paths: H_TO_E, H_TO_F, and H_TO_G. These produce inputs for E_TO_D, F_TO_C, and G_TO_B respectively, which then converge into a final aggregation function DCB_TO_A.  
Was 'E_TO_D' apart of a sequential or branched path?

You are analyzing a mathematical workflow composed of multiple tasks. 
Each task processes data and passes it to the next task. 
Tasks have names like I_TO_H and H_TO_G. Data flows through tasks in a directed graph based on dependencies. The entire workflow is stored in 'WFC', which is in your context parameter.

To answer questions, follow these reasoning steps:
1. Identify the task being asked about.
2. Trace its inputs: which task produced the data it consumed?
3. Trace its outputs: what does it return and where does it go?
4. Use task definitions to compute or infer results.
5. If the workflow failed, identify the last successful task, then check inputs and errors in the failed task.

Always explain your

In [32]:
WFS = " I_TO_H is the entry point, branching into three parallel paths: H_TO_E, H_TO_F, and H_TO_G. These produce inputs for E_TO_D, F_TO_C, and G_TO_B respectively, which then converge into a final aggregation function DCB_TO_A.  "

In [34]:
qcot = qa.ask(cot_prompt(" How is 'G_TO_B' related to 'I_TO_H' in the control-flow path? "))

Q: 

 How is 'G_TO_B' related to 'I_TO_H' in the control-flow path? 

You are analyzing a mathematical workflow composed of multiple tasks. 
Each task processes data and passes it to the next task. 
Tasks have names like I_TO_H and H_TO_G. Data flows through tasks in a directed graph based on dependencies. The entire workflow is stored in 'WFC', which is in your context parameter.

To answer questions, follow these reasoning steps:
1. Identify the task being asked about.
2. Trace its inputs: which task produced the data it consumed?
3. Trace its outputs: what does it return and where does it go?
4. Use task definitions to compute or infer results.
5. If the workflow failed, identify the last successful task, then check inputs and errors in the failed task.

Always explain your reasoning step by step before providing a final answer.

Let's analyze the relationship between 'G_TO_B' and 'I_TO_H' in the control-flow path.

1. **Identify the tasks:** We are looking at 'G_TO_B' and 'I_TO_H'.

In [35]:
qcot = qa.ask(cot_prompt(" How is 'G_TO_B' related to 'I_TO_H' in the control-flow path? ", context = WFS))

Q: 
 I_TO_H is the entry point, branching into three parallel paths: H_TO_E, H_TO_F, and H_TO_G. These produce inputs for E_TO_D, F_TO_C, and G_TO_B respectively, which then converge into a final aggregation function DCB_TO_A.  
 How is 'G_TO_B' related to 'I_TO_H' in the control-flow path? 

You are analyzing a mathematical workflow composed of multiple tasks. 
Each task processes data and passes it to the next task. 
Tasks have names like I_TO_H and H_TO_G. Data flows through tasks in a directed graph based on dependencies. The entire workflow is stored in 'WFC', which is in your context parameter.

To answer questions, follow these reasoning steps:
1. Identify the task being asked about.
2. Trace its inputs: which task produced the data it consumed?
3. Trace its outputs: what does it return and where does it go?
4. Use task definitions to compute or infer results.
5. If the workflow failed, identify the last successful task, then check inputs and errors in the failed task.

Always e

In [36]:
qcot = qa.ask(cot_prompt(""" You are an expert in HPC workflow provenance analysis with a deep knowledge of data lineage tracing, workflow management, and computing systems. 
                        You are analyzing provenance data from a complex workflow consisting of multiple tasks.
                        Workflow provenance in HPC refers to a comprehensive way of tracking and recording all computational processes, transformations, resource utilization, and dependencies within a workflow.
                        Within the workflow, a task object has its own task provenance. 
                        Input data is stored in the 'used' field whereas the created output data is stored in the 'generated' field. Tasks with the same 'workflow_id' indicate that they belong to the same workflow execution trace.
                        Your job is to understand the complex workflow in detail so that you can accurately respond to the query.
                        
                        How is 'G_TO_B' related to 'I_TO_H' in the control-flow path? """))

Q: 

 You are an expert in HPC workflow provenance analysis with a deep knowledge of data lineage tracing, workflow management, and computing systems. 
                        You are analyzing provenance data from a complex workflow consisting of multiple tasks.
                        Workflow provenance in HPC refers to a comprehensive way of tracking and recording all computational processes, transformations, resource utilization, and dependencies within a workflow.
                        Within the workflow, a task object has its own task provenance. 
                        Input data is stored in the 'used' field whereas the created output data is stored in the 'generated' field. Tasks with the same 'workflow_id' indicate that they belong to the same workflow execution trace.
                        Your job is to understand the complex workflow in detail so that you can accurately respond to the query.

                        How is 'G_TO_B' related to 'I_TO_H' in the control