In [1]:
import numpy as np

from flowcept import TaskQueryAPI
from datetime import datetime
from flowcept.analytics.analytics_utils import clean_dataframe

[flowcept][DEBUG][mac132633.ornl.gov][pid=21850][thread=7901387840][function=_build_logger][flowcept's base log is set up!]
IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html


In [2]:
def ingest_mock_data():
    """
    This function is here just to enable the execution of the functions we are showing in this notebook.
    """
    import json
    from uuid import uuid4
    from flowcept import DBAPI
    db_api = DBAPI()
    test_data_path = '../tests/api/sample_data_with_telemetry_and_rai.json' # This sample data contains a workflow composed of 9 tasks.
    with open(test_data_path) as f:
        base_data = json.loads(f.read())
    
    docs = []
    wf_id = str(uuid4())
    for d in base_data:
        new_doc = d.copy()
        new_doc.pop("_id")
        new_doc["task_id"] = str(uuid4())
        new_doc["workflow_id"] = wf_id
        docs.append(new_doc)
    
    inserted_ids = db_api._dao.insert_many(docs)
    assert len(inserted_ids) == len(base_data)
    return wf_id

In [6]:
# Need to run only if this is the first time.
wf_id = ingest_mock_data()
wf_id

'100faab4-ff4c-4f78-92a7-6f20ec1fad83'

In [3]:
query_api = TaskQueryAPI()

## Very Simple query returning a DataFrame

In [7]:
_filter = {
    "workflow_id": wf_id
}
df = query_api.df_query(_filter, calculate_telemetry_diff=True)

In [11]:
df.head(3)

Unnamed: 0,task_id,submitted_at,activity_id,workflow_id,utc_timestamp,adapter_id,user,campaign_id,sys_name,node_name,...,telemetry_diff.network.netio_per_interface.bridge100.bytes_sent,telemetry_diff.network.netio_per_interface.bridge100.bytes_recv,telemetry_diff.network.netio_per_interface.bridge100.packets_sent,telemetry_diff.network.netio_per_interface.bridge100.packets_recv,telemetry_diff.network.netio_per_interface.bridge100.errin,telemetry_diff.network.netio_per_interface.bridge100.errout,telemetry_diff.network.netio_per_interface.bridge100.dropin,telemetry_diff.network.netio_per_interface.bridge100.dropout,status,elapsed_time
0,6b1209fe-e078-4572-b082-db14d0de025e,2024-02-09 01:05:28.202881024,wrapper,100faab4-ff4c-4f78-92a7-6f20ec1fad83,2024-02-09 01:06:27.422988032,dask,root,super_campaign,Darwin,MAC132633,...,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,FINISHED,59.133646
1,8646acc7-bdd7-4504-bfb4-3768b97912d6,2024-02-09 01:05:28.206701056,wrapper,100faab4-ff4c-4f78-92a7-6f20ec1fad83,2024-02-09 01:06:29.350380800,dask,root,super_campaign,Darwin,MAC132633,...,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,FINISHED,61.062001
2,d36a4538-7e52-49df-b8c9-332506160d5b,2024-02-09 01:05:28.210365952,wrapper,100faab4-ff4c-4f78-92a7-6f20ec1fad83,2024-02-09 01:08:17.270892032,dask,root,super_campaign,Darwin,MAC132633,...,1024.0,0.0,10.0,0.0,0.0,0.0,0.0,0.0,FINISHED,168.981788


## Query Returning the Top K tasks

In [27]:
'generated.loss' in list(df.columns)

True

In [28]:
sort = [
    ("generated.loss", TaskQueryAPI.ASC),
    ("generated.responsible_ai_metrics.params", TaskQueryAPI.ASC),
]
df = query_api.df_get_top_k_tasks(
    filter=_filter,
    calculate_telemetry_diff=False,
    sort=sort,
    k=3,
)
df.filter(regex='used[.]|generated[.]')

Unnamed: 0,used.conv_in_outs,used.conv_kernel_sizes,used.conv_pool_sizes,used.fc_in_outs,used.softmax_dims,used.max_epochs,generated.loss,generated.accuracy,generated.responsible_ai_metrics.shap_sum,generated.responsible_ai_metrics.flops,generated.responsible_ai_metrics.params,generated.responsible_ai_metrics.max_width,generated.responsible_ai_metrics.depth,generated.responsible_ai_metrics.n_fc_layers,generated.responsible_ai_metrics.n_cv_layers,generated.responsible_ai_metrics.convolutional_layers,generated.responsible_ai_metrics.fully_connected_layers
0,"[[1, 10], [10, 20]]","[1, 28]","[1, 1]","[[20, 50], [50, 100]]","[None, 1]",1,0.014729,40.75,0.0,21880190.0,162990.0,100.0,12.0,5.0,7.0,"Sequential(\n (0): Conv2d(1, 10, kernel_size=...","Sequential(\n (0): Linear(in_features=20, out..."
6,"[[1, 30], [30, 60], [60, 90], [90, 120]]","[1, 1, 1, 28]","[1, 1, 1, 1]","[[120, 50], [50, 100]]","[None, 1]",1,0.018208,10.09,0.0,1810793000.0,8485880.0,120.0,20.0,5.0,15.0,"Sequential(\n (0): Conv2d(1, 30, kernel_size=...","Sequential(\n (0): Linear(in_features=120, ou..."
3,"[[1, 20], [20, 40], [40, 60]]","[1, 1, 28]","[1, 1, 1]","[[60, 50], [50, 100]]","[None, 1]",1,0.018242,10.28,0.0,324195700.0,1890690.0,100.0,16.0,5.0,11.0,"Sequential(\n (0): Conv2d(1, 20, kernel_size=...","Sequential(\n (0): Linear(in_features=60, out..."


## Query Returning the Top K tasks using quantile thresholds

This query filters values based on quantiles (list only ocurrences with cpu_times < 50% quantile, i.e., median) then sort by cpu, loss, and flops.

In [32]:
clauses = [
    ("telemetry_diff.process.cpu_times.user", "<", 0.5),
]
sort = [
    ("telemetry_diff.process.cpu_times.user", TaskQueryAPI.ASC),
    ("generated.loss", TaskQueryAPI.ASC),
    ("generated.responsible_ai_metrics.flops", TaskQueryAPI.ASC),
]
df = query_api.df_get_tasks_quantiles(
    clauses=clauses,
    filter=_filter,
    sort=sort,
    calculate_telemetry_diff=True,
    clean_dataframe=True,
)
df

Number of columns originally: 334
Number of columns later: 58


Unnamed: 0,used.max_epochs,generated.loss,generated.accuracy,generated.responsible_ai_metrics.shap_sum,generated.responsible_ai_metrics.flops,generated.responsible_ai_metrics.params,generated.responsible_ai_metrics.max_width,generated.responsible_ai_metrics.depth,generated.responsible_ai_metrics.n_fc_layers,generated.responsible_ai_metrics.n_cv_layers,...,telemetry_diff.network.netio_per_interface.en0.bytes_sent,telemetry_diff.network.netio_per_interface.en0.packets_sent,telemetry_diff.network.netio_per_interface.utun4.bytes_sent,telemetry_diff.network.netio_per_interface.utun4.bytes_recv,telemetry_diff.network.netio_per_interface.utun4.packets_sent,telemetry_diff.network.netio_per_interface.utun4.packets_recv,telemetry_diff.network.netio_per_interface.vmenet0.bytes_sent,telemetry_diff.network.netio_per_interface.vmenet0.packets_sent,telemetry_diff.network.netio_per_interface.bridge100.bytes_sent,telemetry_diff.network.netio_per_interface.bridge100.packets_sent
0,1.0,0.014729,40.75,0.0,21880190.0,162990.0,100.0,12.0,5.0,7.0,...,243712.0,534.0,199680.0,205824.0,507.0,780.0,0.0,1.0,0.0,2.0
1,1.0,0.040326,11.35,0.0,47275140.0,359840.0,400.0,16.0,9.0,7.0,...,245760.0,546.0,200704.0,208896.0,519.0,806.0,0.0,1.0,0.0,2.0
2,1.0,0.058157,11.35,0.0,5405073000.0,42184840.0,4000.0,24.0,17.0,7.0,...,626688.0,1355.0,514048.0,493568.0,1288.0,1960.0,0.0,5.0,1024.0,10.0
3,1.0,0.018242,10.28,0.0,324195700.0,1890690.0,100.0,16.0,5.0,11.0,...,1671168.0,2556.0,812032.0,799744.0,1994.0,3078.0,1024.0,10.0,2048.0,20.0


## Correlation Analysis