### This Jupyter notebook provides examples of how the pipeline metadata stored with CMF tracking layer can be queried with CMF query layer.

In [40]:
import pandas as pd
from cmflib import cmfquery

### Initialize the library and get all the stages in the pipeline
Point the library to the metadata file. <br>
The `get_pipeline_stages` call point to the different stages in the pipeline.

In [53]:
query = cmfquery.CmfQuery("./mlmd")
pipelines = query.get_pipeline_names()
stages = query.get_pipeline_stages(pipelines[0])
display(stages)

['Prepare', 'Featurize', 'Train', 'Evaluate']

### Query the Executions in each stage

In [54]:
for stage in stages:
    executions = query.get_all_executions_in_stage(stage)
    print(f"Displaying execution for stage {stage}")
    display(executions)

Displaying execution for stage Prepare


Unnamed: 0,Context_ID,Context_Type,Execution,Git_End_Commit,Git_Repo,Git_Start_Commit,Pipeline_Type,Pipeline_id,id,seed,split
0,2,Prepare,"['src/parse.py', 'artifacts/data.xml.gz', 'artifacts/parsed']",,/tmp/cmf/example_get_started/git_remote,8158283953c04affb8fe5ea6710656564ede7d3a,Test-env,1,1,20170428,0.2


Displaying execution for stage Featurize


Unnamed: 0,Context_ID,Context_Type,Execution,Git_End_Commit,Git_Repo,Git_Start_Commit,Pipeline_Type,Pipeline_id,id,max_features,ngrams
0,3,Featurize-execution,"['src/featurize.py', 'artifacts/parsed', 'artifacts/features']",,/tmp/cmf/example_get_started/git_remote,8158283953c04affb8fe5ea6710656564ede7d3a,Test-env,1,2,3000,2


Displaying execution for stage Train


Unnamed: 0,Context_ID,Context_Type,Execution,Git_End_Commit,Git_Repo,Git_Start_Commit,Pipeline_Type,Pipeline_id,id,min_split,n_est,seed
0,4,Train-execution,"['src/train.py', 'artifacts/features', 'artifacts/model']",,/tmp/cmf/example_get_started/git_remote,8158283953c04affb8fe5ea6710656564ede7d3a,Test-env,1,3,64,100,20170428


Displaying execution for stage Evaluate


Unnamed: 0,Context_ID,Context_Type,Execution,Git_End_Commit,Git_Repo,Git_Start_Commit,Pipeline_Type,Pipeline_id,id
0,5,Evaluate-execution,"['src/test.py', 'artifacts/model', 'artifacts/features', 'artifacts/tes...",,/tmp/cmf/example_get_started/git_remote,8158283953c04affb8fe5ea6710656564ede7d3a,Test-env,1,4


### Get all  artifacts of an execution. 
<b>input parameter - execution_id</b><br>
<b>output parameter - artifacts</b><br>


In [56]:

executions = query.get_all_executions_in_stage(stages[0])
print(f"Displaying the artifacts for execution with id {executions.iloc[0]['id']} belonging to {stages[0]}")
artifacts = query.get_all_artifacts_for_execution(executions.iloc[0]["id"])
display(artifacts)

Displaying the artifacts for execution with id 1 belonging to Prepare


Unnamed: 0,Commit,create_time_since_epoch,event,git_repo,id,last_update_time_since_epoch,name,type,uri,user-metadata1,user-metadata2
0,commit 03c25dfdb6c188b7b04f7e675dec072de192b851,1667255770792,INPUT,/tmp/cmf/example_get_started/git_remote,1,1667255778222,artifacts/data.xml.gz:236d9502e0283d91f689d7038b8508a2,Dataset,236d9502e0283d91f689d7038b8508a2,metadata_value,metadata_value
1,commit 4fba7197919fb85dd1a0899d2cf5c5c690ee607c,1667255774532,OUTPUT,/tmp/cmf/example_get_started/git_remote,2,1667255774532,artifacts/parsed/train.tsv:22ec7737f442cfc81e8c701fb58d1007,Dataset,22ec7737f442cfc81e8c701fb58d1007,,
2,commit 5dfd3ac63c950f6394e5b7cebd55343402c7fdb6,1667255776391,OUTPUT,/tmp/cmf/example_get_started/git_remote,3,1667255776391,artifacts/parsed/test.tsv:03e3627bda150c8cf51a55ef96ab3ede,Dataset,03e3627bda150c8cf51a55ef96ab3ede,,


### Get all executions for an artifact (pass the artifact full name as the input parameter)

In [57]:
#Provide the artifact in name:hash format
artifacts = query.get_all_artifacts_for_execution(executions.iloc[0]['id'])
for index, art in artifacts.iterrows():
    if art["event"] == "INPUT":
        artifact_name = art["name"]
        break
linked = query.get_all_executions_for_artifact(artifact_name)
display(linked)

Unnamed: 0,Type,execution_id,execution_name,pipeline,stage
0,INPUT,1,,Test-env,Prepare


### Get all the parent artifacts of an artifact. Provides the artifact lineage chain

In [58]:
artifacts = query.get_all_artifacts_for_execution(2)
for index, art in artifacts.iterrows():
    if art["event"] == "OUTPUT":
        artifact_name = art["name"]
        break
print(f"Parent artifact of {artifact_name}")
linked = query.get_all_parent_artifacts(artifact_name)
display(linked)

Parent artifact of artifacts/features/train.pkl:5de5e987eadb4b86fc47604b59cb3725


Unnamed: 0,Commit,create_time_since_epoch,git_repo,id,last_update_time_since_epoch,name,type,uri,user-metadata1,user-metadata2
0,commit 4fba7197919fb85dd1a0899d2cf5c5c690ee607c,1667255774532,/tmp/cmf/example_get_started/git_remote,2,1667255774532,artifacts/parsed/train.tsv:22ec7737f442cfc81e8c701fb58d1007,Dataset,22ec7737f442cfc81e8c701fb58d1007,,
1,commit 5dfd3ac63c950f6394e5b7cebd55343402c7fdb6,1667255776391,/tmp/cmf/example_get_started/git_remote,3,1667255776391,artifacts/parsed/test.tsv:03e3627bda150c8cf51a55ef96ab3ede,Dataset,03e3627bda150c8cf51a55ef96ab3ede,,
2,commit 03c25dfdb6c188b7b04f7e675dec072de192b851,1667255770792,/tmp/cmf/example_get_started/git_remote,1,1667255778222,artifacts/data.xml.gz:236d9502e0283d91f689d7038b8508a2,Dataset,236d9502e0283d91f689d7038b8508a2,metadata_value,metadata_value


###  Get all  child artifacts of an artifact. It provides the lineage chain of successors

In [46]:
print(f"Child artifact of {artifact_name}")
linked = query.get_all_child_artifacts(artifact_name)
display(linked)


Child artifact of artifacts/features/train.pkl:5de5e987eadb4b86fc47604b59cb3725


Unnamed: 0,Commit,avg_prec,create_time_since_epoch,id,last_update_time_since_epoch,metrics_name,model_framework,model_name,model_type,name,roc_auc,type,uri
0,commit 8158283953c04affb8fe5ea6710656564ede7d3a,,1667255811813,6,1667255811813,,SKlearn,RandomForestClassifier:default,RandomForestClassifier,artifacts/model/model.pkl:5f6e4aa57cce9e3a0b2f12e5766d19be:3,,Model,5f6e4aa57cce9e3a0b2f12e5766d19be
1,,0.526754,1667255818388,7,1667255818388,metrics:878d492e-596c-11ed-99a3-b47af137252e:4,,,,metrics:878d492e-596c-11ed-99a3-b47af137252e:4,0.959238,Metrics,878d492e-596c-11ed-99a3-b47af137252e


### Get all the parent artifacts of an artifact. Provides the artifact lineage chain of predecessors

In [59]:

linked = query.get_all_parent_artifacts(linked.iloc[0]["name"])
display(linked)

Unnamed: 0,Commit,create_time_since_epoch,git_repo,id,last_update_time_since_epoch,name,type,uri,user-metadata1,user-metadata2
0,commit 03c25dfdb6c188b7b04f7e675dec072de192b851,1667255770792,/tmp/cmf/example_get_started/git_remote,1,1667255778222,artifacts/data.xml.gz:236d9502e0283d91f689d7038b8508a2,Dataset,236d9502e0283d91f689d7038b8508a2,metadata_value,metadata_value


In [60]:
linked = query.get_all_child_artifacts(linked.iloc[0]["name"])
display(linked)

Unnamed: 0,Commit,avg_prec,create_time_since_epoch,git_repo,id,last_update_time_since_epoch,metrics_name,model_framework,model_name,model_type,name,roc_auc,type,uri
0,commit 4fba7197919fb85dd1a0899d2cf5c5c690ee607c,,1667255774532,/tmp/cmf/example_get_started/git_remote,2,1667255774532,,,,,artifacts/parsed/train.tsv:22ec7737f442cfc81e8c701fb58d1007,,Dataset,22ec7737f442cfc81e8c701fb58d1007
1,commit 5dfd3ac63c950f6394e5b7cebd55343402c7fdb6,,1667255776391,/tmp/cmf/example_get_started/git_remote,3,1667255776391,,,,,artifacts/parsed/test.tsv:03e3627bda150c8cf51a55ef96ab3ede,,Dataset,03e3627bda150c8cf51a55ef96ab3ede
2,commit 4546b0679bcae18bd85893c69581db91da40495c,,1667255800206,/tmp/cmf/example_get_started/git_remote,4,1667255800206,,,,,artifacts/features/train.pkl:5de5e987eadb4b86fc47604b59cb3725,,Dataset,5de5e987eadb4b86fc47604b59cb3725
3,commit d67bedaa20e64e45fe9f553935d9ff0726f19b59,,1667255802382,/tmp/cmf/example_get_started/git_remote,5,1667255802382,,,,,artifacts/features/test.pkl:b1f98b4ebd09a0bdc72f1a8c102065dd,,Dataset,b1f98b4ebd09a0bdc72f1a8c102065dd
4,commit 8158283953c04affb8fe5ea6710656564ede7d3a,,1667255811813,,6,1667255811813,,SKlearn,RandomForestClassifier:default,RandomForestClassifier,artifacts/model/model.pkl:5f6e4aa57cce9e3a0b2f12e5766d19be:3,,Model,5f6e4aa57cce9e3a0b2f12e5766d19be
5,,0.526754,1667255818388,,7,1667255818388,metrics:878d492e-596c-11ed-99a3-b47af137252e:4,,,,metrics:878d492e-596c-11ed-99a3-b47af137252e:4,0.959238,Metrics,878d492e-596c-11ed-99a3-b47af137252e


### Get immediate child artifacts of an artifact. 

In [61]:
linked = query.get_one_hop_child_artifacts(artifact_name)
display(linked)

Unnamed: 0,Commit,create_time_since_epoch,id,last_update_time_since_epoch,model_framework,model_name,model_type,name,type,uri
0,commit 8158283953c04affb8fe5ea6710656564ede7d3a,1667255811813,6,1667255811813,SKlearn,RandomForestClassifier:default,RandomForestClassifier,artifacts/model/model.pkl:5f6e4aa57cce9e3a0b2f12e5766d19be:3,Model,5f6e4aa57cce9e3a0b2f12e5766d19be


### Get all child artifacts 

In [62]:
linked = query.get_all_child_artifacts(artifact_name)
display(linked)

Unnamed: 0,Commit,avg_prec,create_time_since_epoch,id,last_update_time_since_epoch,metrics_name,model_framework,model_name,model_type,name,roc_auc,type,uri
0,commit 8158283953c04affb8fe5ea6710656564ede7d3a,,1667255811813,6,1667255811813,,SKlearn,RandomForestClassifier:default,RandomForestClassifier,artifacts/model/model.pkl:5f6e4aa57cce9e3a0b2f12e5766d19be:3,,Model,5f6e4aa57cce9e3a0b2f12e5766d19be
1,,0.526754,1667255818388,7,1667255818388,metrics:878d492e-596c-11ed-99a3-b47af137252e:4,,,,metrics:878d492e-596c-11ed-99a3-b47af137252e:4,0.959238,Metrics,878d492e-596c-11ed-99a3-b47af137252e


In [63]:
# Provide Execution id corresponding to a Evaluate stage
linked = query.get_all_artifacts_for_execution(4)
for index, row in linked.iterrows():
    if row["type"] == "Metrics":
        break



### Get artifact 

In [64]:
artifact = query.get_artifact(row["name"])
display(artifact)


Unnamed: 0,id,type,uri,name,create_time_since_epoch,last_update_time_since_epoch,metrics_name,avg_prec,roc_auc
0,7,Metrics,878d492e-596c-11ed-99a3-b47af137252e,metrics:878d492e-596c-11ed-99a3-b47af137252e:4,1667255818388,1667255818388,metrics:878d492e-596c-11ed-99a3-b47af137252e:4,0.526754,0.959238
