### This Jupyter notebook provides examples of how the pipeline metadata stored with CMF tracking layer can be queried with CMF query layer.

In [1]:
import pandas as pd
from cmflib import cmfquery


### Initialize the library and get all the stages in the pipeline
Point the library to the metadata file. <br>
The `get_pipeline_stages` call point to the different stages in the pipeline.

In [2]:
query = cmfquery.CmfQuery("./mlmd")
pipelines = query.get_pipeline_names()
stages = query.get_pipeline_stages(pipelines[0])
display(stages)

['active_learning/Initial_select',
 'active_learning/Train',
 'active_learning/Cycle_Select']

### Query the Executions in each stage

In [None]:
for stage in stages:
    executions = query.get_all_executions_in_stage(stage)
    print(f"Displaying execution for stage {stage}")
    display(executions)

### Get all  artifacts of an execution. 
<b>input parameter - execution_id</b><br>
<b>output parameter - artifacts</b><br>


In [None]:

executions = query.get_all_executions_in_stage(stages[0])
print(f"Displaying the artifacts for execution with id {executions.iloc[0]['id']} belonging to {stages[0]}")
artifacts = query.get_all_artifacts_for_execution(executions.iloc[0]["id"])
display(artifacts)

### Get all executions for an artifact (pass the artifact full name as the input parameter)

In [None]:
#Provide the artifact in name:hash format
artifacts = query.get_all_artifacts_for_execution(executions.iloc[0]['id'])
for index, art in artifacts.iterrows():
    if art["event"] == "INPUT":
        artifact_name = art["name"]
        break
linked = query.get_all_executions_for_artifact(artifact_name)
display(linked)

### Get all the parent artifacts of an artifact. Provides the artifact lineage chain

In [None]:
artifacts = query.get_all_artifacts_for_execution(2)
for index, art in artifacts.iterrows():
    if art["event"] == "OUTPUT":
        artifact_name = art["name"]
        break
print(f"Parent artifact of {artifact_name}")
linked = query.get_all_parent_artifacts(artifact_name)
display(linked)

###  Get all  child artifacts of an artifact. It provides the lineage chain of successors

In [None]:
artifacts = query.get_all_artifacts_for_execution(1)
for index, art in artifacts.iterrows():
    if art["event"] == "INPUT":
        artifact_name = art["name"]
        break

print(f"Child artifact of {artifact_name}")
linked = query.get_all_child_artifacts(artifact_name)
display(linked)


### Get all the parent artifacts of an artifact. Provides the artifact lineage chain of predecessors

In [None]:

linked = query.get_all_parent_artifacts(linked.iloc[0]["name"])
display(linked)

In [None]:
linked = query.get_all_child_artifacts(linked.iloc[0]["name"])
display(linked)

### Get immediate child artifacts of an artifact. 

In [None]:
linked = query.get_one_hop_child_artifacts(artifact_name)
display(linked)

### Get all child artifacts 

In [None]:
linked = query.get_all_child_artifacts(artifact_name)
display(linked)

In [None]:
# Provide Execution id corresponding to a Evaluate stage
linked = query.get_all_artifacts_for_execution(4)
for index, row in linked.iterrows():
    if row["type"] == "Step_Metrics":
        break



### Get artifact 

In [None]:
artifact = query.get_artifact(row["name"])
display(artifact)


In [None]:
artifacts = query.get_artifact("metrics_1:fd4f4f1a73f77d23be7ae7c7f72c834a:6:f8e54b50-d426-11ed-a37a-d4c9efcecfde")
display(artifacts)

In [3]:
df = query.read_dataslice("slice_1")
df = df.drop("Uncertinity", axis = 1)
display(df)

Unnamed: 0_level_0,cluster_distance
Path,Unnamed: 1_level_1
hdcdatasets/hdc_single/JPEGImages/000107.jpg,0.45611964897696833
hdcdatasets/hdc_single/JPEGImages/000174.jpg,0.9033231539802643
hdcdatasets/hdc_single/JPEGImages/000211.jpg,0.4343754968307887
hdcdatasets/hdc_single/JPEGImages/000228.jpg,0.5006266907917188
hdcdatasets/hdc_single/JPEGImages/000242.jpg,0.8113594841043066
...,...
hdcdatasets/hdc_single/JPEGImages/2011_003066.jpg,0.4562636710967408
hdcdatasets/hdc_single/JPEGImages/2011_003194.jpg,0.7199558878656946
hdcdatasets/hdc_single/JPEGImages/2011_003230.jpg,0.5480589223524482
hdcdatasets/hdc_single/JPEGImages/2011_003238.jpg,0.7264216245977809


In [4]:
df = query.get_artifact("slice_2:6267c2069f4b3e5aea8ee2f8c012c4de")
display(df)
df1 = df.drop(df.columns.difference(['name','median','mean']), axis=1, inplace=False)
df = query.get_artifact("slice_1:3eef0041cb7851dc19f5509201246d18")
df2 = df.drop(df.columns.difference(['name','median','mean']), axis=1, inplace=False)

df = query.get_artifact("slice_3:e95c79451b72ff42ee4a168c84cea684")
df3 = df.drop(df.columns.difference(['name','median','mean']), axis=1, inplace=False)
df = pd.concat([df2, df1, df3], axis=1)
print(df)

Unnamed: 0,id,type,uri,name,create_time_since_epoch,last_update_time_since_epoch,median,mean,git_repo,Commit,Remote
0,18,Dataslice,6267c2069f4b3e5aea8ee2f8c012c4de,slice_2:6267c2069f4b3e5aea8ee2f8c012c4de,1680749417271,1680749417271,6.097564e-06,6.0782277e-06,/tmp/remote,6267c2069f4b3e5aea8ee2f8c012c4de,/tmp/myremote/62/67c2069f4b3e5aea8ee2f8c012c4de


                                       name          mean         median  \
0  slice_1:3eef0041cb7851dc19f5509201246d18  7.785772e-08  7.7003534e-08   

                                       name        median           mean  \
0  slice_2:6267c2069f4b3e5aea8ee2f8c012c4de  6.097564e-06  6.0782277e-06   

                                       name           mean         median  
0  slice_3:e95c79451b72ff42ee4a168c84cea684  1.3491577e-06  1.3451602e-06  


In [6]:
#df = query.get_metrics("metrics_1:388f94930ac804605b96ba8127d214ef:4:30980540-d424-11ed-a37a-d4c9efcecfde")
df = pd.read_parquet("metrics_1")
display(df)

Unnamed: 0_level_0,train/l_det_cls,train/l_det_loc,train/l_imgcls,train/L_det,train/grad_norm,learning_rate,momentum,val/mAP
SequenceNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,0.089573,0.0,1.313254,1.402827,0.086545,0.001,0.9,
2,0.086868,0.0,1.313254,1.400122,0.103774,0.001,0.9,
3,,,,,,0.001,0.9,0.0
