### This Jupyter notebook provides examples of how the pipeline metadata stored with CMF tracking layer can be queried with CMF query layer.

In [1]:
import ml_metadata as mlmd
from ml_metadata.metadata_store import metadata_store
from ml_metadata.proto import metadata_store_pb2

In [2]:
connection_config = metadata_store_pb2.ConnectionConfig()
connection_config.sqlite.filename_uri = 'mlmd'
connection_config.sqlite.connection_mode = 3 # READWRITE_OPENCREATE
store = metadata_store.MetadataStore(connection_config)

In [5]:
ids = store.get_executions()
print(ids)

[id: 1
type_id: 12
properties {
  key: "Context_ID"
  value {
    int_value: 2
  }
}
properties {
  key: "Context_Type"
  value {
    string_value: "active_learning/Initial_select"
  }
}
properties {
  key: "Execution"
  value {
    string_value: "[\'initial_select.py\', \'configs/MIAOD-GRAY.py\', \'--work_directory\', \'work_dirs/test\', \'--labeled_next\', \'work_dirs/test/X_L_0.npy\', \'--unselected\', \'work_dirs/test/X_U_0.npy\']"
  }
}
properties {
  key: "Git_End_Commit"
  value {
    string_value: ""
  }
}
properties {
  key: "Git_Repo"
  value {
    string_value: "/tmp/remote"
  }
}
properties {
  key: "Git_Start_Commit"
  value {
    string_value: "62bc1f873091a1ddee42228601857e722f05b92a"
  }
}
properties {
  key: "Pipeline_Type"
  value {
    string_value: "active_learning"
  }
}
properties {
  key: "Pipeline_id"
  value {
    int_value: 1
  }
}
custom_properties {
  key: "config"
  value {
    string_value: "Config (path: configs/MIAOD-GRAY.py): {\'model\': {\'type\': \'Re

In [14]:
import pandas as pd
from cmflib import cmfquery


### Initialize the library and get all the stages in the pipeline
Point the library to the metadata file. <br>
The `get_pipeline_stages` call point to the different stages in the pipeline.

In [15]:
query = cmfquery.CmfQuery("./mlmd")
pipelines = query.get_pipeline_names()
stages = query.get_pipeline_stages(pipelines[0])
display(stages)

['active_learning/Initial_select',
 'active_learning/Train',
 'active_learning/Cycle_Select']

### Query the Executions in each stage

In [16]:
for stage in stages:
    executions = query.get_all_executions_in_stage(stage)
    print(f"Displaying execution for stage {stage}")
    display(executions)

Displaying execution for stage active_learning/Initial_select


Unnamed: 0,Context_ID,Context_Type,Execution,Git_End_Commit,Git_Repo,Git_Start_Commit,Pipeline_Type,Pipeline_id,config,env,id
0,2,active_learning/Initial_select,"['initial_select.py', 'configs/MIAOD-GRAY.py',...",,/tmp/remote,62bc1f873091a1ddee42228601857e722f05b92a,active_learning,1,Config (path: configs/MIAOD-GRAY.py): {'model'...,"{'sys.platform': 'linux', 'Python': '3.8.15 (d...",1


Displaying execution for stage active_learning/Train


Unnamed: 0,Context_ID,Context_Type,Execution,Git_End_Commit,Git_Repo,Git_Start_Commit,Pipeline_Type,Pipeline_id,env,id,seed
0,3,active_learning/Train,"['cycle_train.py', 'configs/MIAOD-GRAY.py', '-...",,/tmp/remote,2684533ec003bc7ab828bc7223c98200a88dfa04,active_learning,1,"{'sys.platform': 'linux', 'Python': '3.8.15 (d...",6,666
1,3,active_learning/Train,"['cycle_train.py', 'configs/MIAOD-GRAY.py', '-...",,/tmp/remote,5462a60d81b5b72eeb60c99b8decde79a63b4c04,active_learning,1,"{'sys.platform': 'linux', 'Python': '3.8.15 (d...",4,666
2,3,active_learning/Train,"['cycle_train.py', 'configs/MIAOD-GRAY.py', '-...",,/tmp/remote,62bc1f873091a1ddee42228601857e722f05b92a,active_learning,1,"{'sys.platform': 'linux', 'Python': '3.8.15 (d...",2,666


Displaying execution for stage active_learning/Cycle_Select


Unnamed: 0,Context_ID,Context_Type,Execution,Git_End_Commit,Git_Repo,Git_Start_Commit,Pipeline_Type,Pipeline_id,config,env,id
0,4,active_learning/Cycle_Select,"['cycle_select.py', 'configs/MIAOD-GRAY.py', '...",,/tmp/remote,62c61d5acb2f080a5dc96305c57ee9f8845a6fbb,active_learning,1,<class 'mmcv.utils.config.Config'>,"{'sys.platform': 'linux', 'Python': '3.8.15 (d...",7
1,4,active_learning/Cycle_Select,"['cycle_select.py', 'configs/MIAOD-GRAY.py', '...",,/tmp/remote,2684533ec003bc7ab828bc7223c98200a88dfa04,active_learning,1,<class 'mmcv.utils.config.Config'>,"{'sys.platform': 'linux', 'Python': '3.8.15 (d...",5
2,4,active_learning/Cycle_Select,"['cycle_select.py', 'configs/MIAOD-GRAY.py', '...",,/tmp/remote,5462a60d81b5b72eeb60c99b8decde79a63b4c04,active_learning,1,<class 'mmcv.utils.config.Config'>,"{'sys.platform': 'linux', 'Python': '3.8.15 (d...",3


### Get all  artifacts of an execution. 
<b>input parameter - execution_id</b><br>
<b>output parameter - artifacts</b><br>


In [17]:

executions = query.get_all_executions_in_stage(stages[0])
print(f"Displaying the artifacts for execution with id {executions.iloc[0]['id']} belonging to {stages[0]}")
artifacts = query.get_all_artifacts_for_execution(executions.iloc[0]["id"])
display(artifacts)

Displaying the artifacts for execution with id 1 belonging to active_learning/Initial_select


Unnamed: 0,Background_no_bounding_boxes,Commit,Remote,create_time_since_epoch,cycle,event,git_repo,gray_scale,id,labeled,last_update_time_since_epoch,mean,median,name,type,uri,url
0,True,5ccd6cefba4a460ea5a9edad75a1e4af.dir,,1680746922564,,INPUT,/tmp/remote,True,1,,1680788260710,,,hdcdatasets/hdc_single/JPEGImages:5ccd6cefba4a...,Dataset,5ccd6cefba4a460ea5a9edad75a1e4af.dir,active_learning:/tmp/myremote/5c/cd6cefba4a460...
1,,84e52a5da234c849655b276561889960,,1680746925160,0.0,OUTPUT,/tmp/remote,,2,True,1680788263123,,,work_dirs/test/X_L_0.npy:84e52a5da234c849655b2...,numpy,84e52a5da234c849655b276561889960,active_learning:/tmp/myremote/84/e52a5da234c84...
2,,0eef7540aa910839ad064798927b5b78,/tmp/myremote/0e/ef7540aa910839ad064798927b5b78,1680746928083,,OUTPUT,/tmp/remote,,3,,1680746928083,-1.0,-1.0,slice-0:0eef7540aa910839ad064798927b5b78,Dataslice,0eef7540aa910839ad064798927b5b78,
3,,60c0ebf589d21493fcd7ee6744558063,,1680746930631,0.0,OUTPUT,/tmp/remote,,4,False,1680788518007,,,work_dirs/test/X_U_0.npy:60c0ebf589d21493fcd7e...,numpy,60c0ebf589d21493fcd7ee6744558063,active_learning:/tmp/myremote/60/c0ebf589d2149...


### Get all executions for an artifact (pass the artifact full name as the input parameter)

In [18]:
#Provide the artifact in name:hash format
artifacts = query.get_all_artifacts_for_execution(executions.iloc[0]['id'])
for index, art in artifacts.iterrows():
    if art["event"] == "INPUT":
        artifact_name = art["name"]
        break
linked = query.get_all_executions_for_artifact(artifact_name)
display(linked)

Unnamed: 0,Type,execution_id,execution_name,pipeline,stage
0,INPUT,1,,active_learning,active_learning/Initial_select
1,INPUT,8,,active_learning_demo,active_learning_demo/Initial_select
2,INPUT,9,,active_learning_demo,active_learning_demo/Initial_select


### Get all the parent artifacts of an artifact. Provides the artifact lineage chain

In [19]:
artifacts = query.get_all_artifacts_for_execution(2)
for index, art in artifacts.iterrows():
    if art["event"] == "OUTPUT":
        artifact_name = art["name"]
        break
print(f"Parent artifact of {artifact_name}")
linked = query.get_all_parent_artifacts(artifact_name)
display(linked)

Parent artifact of metrics_1:34d783f5cd5f76dcd4edb6d63779c07c:2:47ae49f0-d420-11ed-a37a-d4c9efcecfde


Unnamed: 0,Background_no_bounding_boxes,Commit,Remote,create_time_since_epoch,cycle,git_repo,gray_scale,id,labeled,last_update_time_since_epoch,mean,median,name,type,uri,url
0,,84e52a5da234c849655b276561889960,,1680746925160,0.0,/tmp/remote,,2,True,1680788263123,,,work_dirs/test/X_L_0.npy:84e52a5da234c849655b2...,numpy,84e52a5da234c849655b276561889960,active_learning:/tmp/myremote/84/e52a5da234c84...
1,,60c0ebf589d21493fcd7ee6744558063,,1680746930631,0.0,/tmp/remote,,4,False,1680788518007,,,work_dirs/test/X_U_0.npy:60c0ebf589d21493fcd7e...,numpy,60c0ebf589d21493fcd7ee6744558063,active_learning:/tmp/myremote/60/c0ebf589d2149...
2,True,5ccd6cefba4a460ea5a9edad75a1e4af.dir,,1680746922564,,/tmp/remote,True,1,,1680788260710,,,hdcdatasets/hdc_single/JPEGImages:5ccd6cefba4a...,Dataset,5ccd6cefba4a460ea5a9edad75a1e4af.dir,active_learning:/tmp/myremote/5c/cd6cefba4a460...
3,,0eef7540aa910839ad064798927b5b78,/tmp/myremote/0e/ef7540aa910839ad064798927b5b78,1680746928083,,/tmp/remote,,3,,1680746928083,-1.0,-1.0,slice-0:0eef7540aa910839ad064798927b5b78,Dataslice,0eef7540aa910839ad064798927b5b78,


###  Get all  child artifacts of an artifact. It provides the lineage chain of successors

In [20]:
artifacts = query.get_all_artifacts_for_execution(1)
for index, art in artifacts.iterrows():
    if art["event"] == "INPUT":
        artifact_name = art["name"]
        break

print(f"Child artifact of {artifact_name}")
linked = query.get_all_child_artifacts(artifact_name)
display(linked)


Child artifact of hdcdatasets/hdc_single/JPEGImages:5ccd6cefba4a460ea5a9edad75a1e4af.dir


Unnamed: 0,Commit,Name,Remote,backbone,bbox_head,create_time_since_epoch,cycle,git_repo,id,labeled,...,metrics_name,model_framework,model_name,model_type,name,neck,type,uri,url,val/mAP
0,84e52a5da234c849655b276561889960,,,,,1680746925160,0.0,/tmp/remote,2,True,...,,,,,work_dirs/test/X_L_0.npy:84e52a5da234c849655b2...,,numpy,84e52a5da234c849655b276561889960,active_learning:/tmp/myremote/84/e52a5da234c84...,
1,0eef7540aa910839ad064798927b5b78,,/tmp/myremote/0e/ef7540aa910839ad064798927b5b78,,,1680746928083,,/tmp/remote,3,,...,,,,,slice-0:0eef7540aa910839ad064798927b5b78,,Dataslice,0eef7540aa910839ad064798927b5b78,,
2,60c0ebf589d21493fcd7ee6744558063,,,,,1680746930631,0.0,/tmp/remote,4,False,...,,,,,work_dirs/test/X_U_0.npy:60c0ebf589d21493fcd7e...,,numpy,60c0ebf589d21493fcd7ee6744558063,active_learning:/tmp/myremote/60/c0ebf589d2149...,
3,34d783f5cd5f76dcd4edb6d63779c07c,metrics_1,,,,1680747062501,,,5,,...,,,,,metrics_1:34d783f5cd5f76dcd4edb6d63779c07c:2:4...,,Step_Metrics,34d783f5cd5f76dcd4edb6d63779c07c,,
4,,,,,,1680747399557,,,6,,...,mAP:1094e374-d421-11ed-a37a-d4c9efcecfde:2,,,,mAP:1094e374-d421-11ed-a37a-d4c9efcecfde:2,,Metrics,1094e374-d421-11ed-a37a-d4c9efcecfde,,0.123142
5,8f328f08a1cc232c1aaf92a7ec038405,metrics_1,,,,1680747401867,,,7,,...,,,,,metrics_1:8f328f08a1cc232c1aaf92a7ec038405:2:1...,,Step_Metrics,8f328f08a1cc232c1aaf92a7ec038405,,
6,43b7ae48099741419a1cf520b8ac8fae,,,,,1680747406594,,,8,,...,,mmdet,resnet50,RetinaNet,work_dirs/test/cycle_0.pth:43b7ae48099741419a1...,,Model,43b7ae48099741419a1cf520b8ac8fae,active_learning:/tmp/myremote/43/b7ae480997414...,
7,13840834c91f4a2f1925dc7cd6949688,metrics_1,,,,1680788698779,,,26,,...,,,,,metrics_1:13840834c91f4a2f1925dc7cd6949688:10:...,,Step_Metrics,13840834c91f4a2f1925dc7cd6949688,,
8,,,,,,1680789037554,,,27,,...,mAP:02c34248-d482-11ed-a37a-d4c9efcecfde:10,,,,mAP:02c34248-d482-11ed-a37a-d4c9efcecfde:10,,Metrics,02c34248-d482-11ed-a37a-d4c9efcecfde,,0.123142
9,16de902f650a94db87d7c83f74b87eef,metrics_1,,,,1680789039975,,,28,,...,,,,,metrics_1:16de902f650a94db87d7c83f74b87eef:10:...,,Step_Metrics,16de902f650a94db87d7c83f74b87eef,,


### Get all the parent artifacts of an artifact. Provides the artifact lineage chain of predecessors

In [None]:

linked = query.get_all_parent_artifacts(linked.iloc[0]["name"])
display(linked)

In [None]:
linked = query.get_all_child_artifacts(linked.iloc[0]["name"])
display(linked)

### Get immediate child artifacts of an artifact. 

In [None]:
linked = query.get_one_hop_child_artifacts(artifact_name)
display(linked)

### Get all child artifacts 

In [None]:
linked = query.get_all_child_artifacts(artifact_name)
display(linked)

In [None]:
# Provide Execution id corresponding to a Evaluate stage
linked = query.get_all_artifacts_for_execution(4)
for index, row in linked.iterrows():
    if row["type"] == "Step_Metrics":
        break



### Get artifact 

In [9]:
artifact = query.get_artifact(row["name"])
display(artifact)


NameError: name 'row' is not defined

In [10]:
artifacts = query.get_artifact("metrics_1:fd4f4f1a73f77d23be7ae7c7f72c834a:6:f8e54b50-d426-11ed-a37a-d4c9efcecfde")
display(artifacts)

Unnamed: 0,id,type,uri,name,create_time_since_epoch,last_update_time_since_epoch,Name,Commit
0,19,Step_Metrics,fd4f4f1a73f77d23be7ae7c7f72c834a,metrics_1:fd4f4f1a73f77d23be7ae7c7f72c834a:6:f...,1680749936798,1680749936798,metrics_1,fd4f4f1a73f77d23be7ae7c7f72c834a


In [21]:
df = query.read_dataslice("slice_1")
df = df.drop("Uncertinity", axis = 1)
display(df)

Unnamed: 0_level_0,cluster_distance
Path,Unnamed: 1_level_1
hdcdatasets/hdc_single/JPEGImages/000107.jpg,0.45611964897696833
hdcdatasets/hdc_single/JPEGImages/000174.jpg,0.9033231539802643
hdcdatasets/hdc_single/JPEGImages/000211.jpg,0.4343754968307887
hdcdatasets/hdc_single/JPEGImages/000228.jpg,0.5006266907917188
hdcdatasets/hdc_single/JPEGImages/000242.jpg,0.8113594841043066
...,...
hdcdatasets/hdc_single/JPEGImages/2011_003066.jpg,0.4562636710967408
hdcdatasets/hdc_single/JPEGImages/2011_003194.jpg,0.7199558878656946
hdcdatasets/hdc_single/JPEGImages/2011_003230.jpg,0.5480589223524482
hdcdatasets/hdc_single/JPEGImages/2011_003238.jpg,0.7264216245977809


In [22]:
df = query.get_artifact("slice_2:6267c2069f4b3e5aea8ee2f8c012c4de")
display(df)
df1 = df.drop(df.columns.difference(['name','median','mean']), axis=1, inplace=False)
df = query.get_artifact("slice_1:3eef0041cb7851dc19f5509201246d18")
df2 = df.drop(df.columns.difference(['name','median','mean']), axis=1, inplace=False)

df = query.get_artifact("slice_3:e95c79451b72ff42ee4a168c84cea684")
df3 = df.drop(df.columns.difference(['name','median','mean']), axis=1, inplace=False)
df = pd.concat([df2, df1, df3], axis=1)
print(df)

Unnamed: 0,id,type,uri,name,create_time_since_epoch,last_update_time_since_epoch,Remote,mean,median,git_repo,Commit
0,18,Dataslice,6267c2069f4b3e5aea8ee2f8c012c4de,slice_2:6267c2069f4b3e5aea8ee2f8c012c4de,1680749417271,1680749417271,/tmp/myremote/62/67c2069f4b3e5aea8ee2f8c012c4de,6.0782277e-06,6.097564e-06,/tmp/remote,6267c2069f4b3e5aea8ee2f8c012c4de


                                       name          mean         median  \
0  slice_1:3eef0041cb7851dc19f5509201246d18  7.785772e-08  7.7003534e-08   

                                       name           mean        median  \
0  slice_2:6267c2069f4b3e5aea8ee2f8c012c4de  6.0782277e-06  6.097564e-06   

                                       name         median           mean  
0  slice_3:e95c79451b72ff42ee4a168c84cea684  1.3451602e-06  1.3491577e-06  


In [23]:
#df = query.get_metrics("metrics_1:388f94930ac804605b96ba8127d214ef:4:30980540-d424-11ed-a37a-d4c9efcecfde")
df = pd.read_parquet("metrics_1")
display(df)

Unnamed: 0_level_0,train/l_det_cls,train/l_det_loc,train/l_imgcls,train/L_det,train/grad_norm,learning_rate,momentum,val/mAP
SequenceNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,0.089573,0.0,1.313254,1.402827,0.086545,0.001,0.9,
2,0.086868,0.0,1.313254,1.400122,0.103774,0.001,0.9,
3,,,,,,0.001,0.9,0.0
