# Playing with MLflow

## Logging details

First experiment: see why the MLflow job created by hello world notebook does not log code, etc. I assume this is because we need to use `command` and create a job through  to the `ml_client`, as explained in the hello world notebook, under section `running script as a job`:

In [5]:
%%writefile hello_world_with_logs.py
import mlflow
from hello_world_core import hello_world, parse_args

def start_logging (args):
    # set name for logging
    mlflow.set_experiment("Hello World with logging")
    mlflow.start_run()
    mlflow.log_param ("name to log", args.name)
    
def finish_logging ():
    mlflow.end_run ()

def main():
    """Main function of the script."""
    args = parse_args ()
    start_logging (args)
    hello_world (args.name)
    finish_logging ()

if __name__ == "__main__":
    main()

Writing hello_world_with_logs.py


In [8]:
import mlflow

In [10]:
mlflow.create_experiment?

[0;31mSignature:[0m
[0mmlflow[0m[0;34m.[0m[0mcreate_experiment[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mname[0m[0;34m:[0m [0mstr[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0martifact_location[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mstr[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mtags[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mDict[0m[0;34m[[0m[0mstr[0m[0;34m,[0m [0mAny[0m[0;34m][0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m [0;34m->[0m [0mstr[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Create an experiment.

:param name: The experiment name, which must be unique and is case sensitive
:param artifact_location: The location to store run artifacts.
                          If not provided, the server picks an appropriate default.
:param tags: An optional dictionary of string keys and values to set as
                        tags on the experiment.
:r

In [9]:
mlflow.start_run?

[0;31mSignature:[0m
[0mmlflow[0m[0;34m.[0m[0mstart_run[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mrun_id[0m[0;34m:[0m [0mstr[0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mexperiment_id[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mstr[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mrun_name[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mstr[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mnested[0m[0;34m:[0m [0mbool[0m [0;34m=[0m [0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mtags[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mDict[0m[0;34m[[0m[0mstr[0m[0;34m,[0m [0mAny[0m[0;34m][0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdescription[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mstr[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m [0;34m->[0m [0

In [6]:
# Standard imports
import os

# Third-party imports
import pandas as pd

# AML imports
from azure.ai.ml import command, MLClient
from azure.identity import DefaultAzureCredential

# authenticate
credential = DefaultAzureCredential()

# Get a handle to the workspace
ml_client = MLClient.from_config (
    credential=credential
)

# configure job
job = command(
    inputs=dict(
        name="Jaume", # default value of our parameter
    ),
    code=f"./",  # location of source code: in this case, the root folder
    command="python hello_world_with_logs.py --name ${{inputs.name}}",
    environment="AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest",
    display_name="Hello World with logging and job",
)

# submit job
ml_client.create_or_update(job)

Found the config file in: /config.json
[32mUploading data_science (12.66 MBs): 100%|██████████| 12658976/12658976 [00:00<00:00, 18557482.88it/s]
[39m



Experiment,Name,Type,Status,Details Page
data_science,jolly_malanga_wgt7b8mb36,command,Starting,Link to Azure Machine Learning studio


In the previous example there is one error: it seems that we cannot indicate an experiment name unless it is the same as the one indicated in the command function. Since we didn't indicate any experiment name in that function, we try to do it now:

In [7]:
job = command(
    inputs=dict(
        name="Jaume", # default value of our parameter
    ),
    code=f"./",  # location of source code: in this case, the root folder
    command="python hello_world_with_logs.py --name ${{inputs.name}}",
    environment="AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest",
    display_name="Hello World with logging and job",
    experiment_name="Hello World with logging",
)

# submit job
ml_client.create_or_update(job)

[32mUploading data_science (12.66 MBs): 100%|██████████| 12664739/12664739 [00:00<00:00, 18366160.15it/s]
[39m



Experiment,Name,Type,Status,Details Page
Hello World with logging,joyful_brick_2zb5xmvktl,command,Starting,Link to Azure Machine Learning studio


## Logging experiments

Links:

https://mlflow.org/docs/2.0.0/tracking.html#logging-functions

https://mlflow.org/docs/2.0.0/tracking.html#managing-experiments-and-runs-with-the-tracking-service-api

In [1]:
%%writefile hello_world_experiments.py
import mlflow
from hello_world_core import hello_world, parse_args
    
def main():
    """Main function of the script."""
    
    names = ["John", "Mary", "Ana"]
    for idx, name in enumerate(names):
        mlflow.create_experiment (str(idx))
        mlflow.start_run()
        mlflow.log_param ("name to log", name)
        mlflow.log_metric ("length", len(name))
        mlflow.end_run ()
        hello_world (name)
    
if __name__ == "__main__":
    main()

Writing hello_world_experiments.py


In [2]:
# Standard imports
import os

# Third-party imports
import pandas as pd

# AML imports
from azure.ai.ml import command, MLClient
from azure.identity import DefaultAzureCredential

# authenticate
credential = DefaultAzureCredential()

# Get a handle to the workspace
ml_client = MLClient.from_config (
    credential=credential
)

# configure job
job = command(
    code=f"./",  # location of source code: in this case, the root folder
    command="python hello_world_experiments.py",
    environment="AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest",
    display_name="Hello World with experiments",
)

# submit job
ml_client.create_or_update(job)

Found the config file in: /config.json
Class AutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class AutoDeleteConditionSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseAutoDeleteSettingSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class IntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class ProtectionLevelSchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.
Class BaseIntellectualPropertySchema: This is an experimental class, and may change at any time. Please see https://aka.ms/azuremlexperimental for more information.

Experiment,Name,Type,Status,Details Page
data_science,witty_glove_syh5ltdkh6,command,Starting,Link to Azure Machine Learning studio


## start_run receives experiment

In [3]:
%%writefile hello_world_experiments.py
import mlflow
from hello_world_core import hello_world, parse_args
    
def main():
    """Main function of the script."""
    
    names = ["John", "Mary", "Ana"]
    for idx, name in enumerate(names):
        experiment = mlflow.create_experiment (str(idx))
        mlflow.start_run(experiment)
        mlflow.log_param ("name to log", name)
        mlflow.log_metric ("length", len(name))
        mlflow.end_run ()
        hello_world (name)
    
if __name__ == "__main__":
    main()

Overwriting hello_world_experiments.py


In [4]:
# configure job
job = command(
    code=f"./",  # location of source code: in this case, the root folder
    command="python hello_world_experiments.py",
    environment="AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest",
    display_name="Hello World with experiments 2",
)

# submit job
ml_client.create_or_update(job)

[32mUploading data_science (12.73 MBs): 100%|██████████| 12725904/12725904 [00:00<00:00, 16250029.70it/s]
[39m



Experiment,Name,Type,Status,Details Page
data_science,good_helmet_wgcgzlvs99,command,Starting,Link to Azure Machine Learning studio


## start_run receives under experiment_id name

In [5]:
%%writefile hello_world_experiments_id.py
import mlflow
from hello_world_core import hello_world, parse_args
    
def main():
    """Main function of the script."""
    
    names = ["John", "Mary", "Ana"]
    for idx, name in enumerate(names):
        experiment_id = mlflow.create_experiment (str(idx))
        mlflow.start_run(experiment_id=experiment_id)
        mlflow.log_param ("name to log", name)
        mlflow.log_metric ("length", len(name))
        mlflow.end_run ()
        hello_world (name)
    
if __name__ == "__main__":
    main()

Writing hello_world_experiments_id.py


In [6]:
# configure job
job = command(
    code=f"./",  # location of source code: in this case, the root folder
    command="python hello_world_experiments_id.py",
    environment="AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest",
    display_name="Hello World with experiment_id",
)

# submit job
ml_client.create_or_update(job)

[32mUploading data_science (12.74 MBs): 100%|██████████| 12741018/12741018 [00:02<00:00, 5305432.15it/s]
[39m



Experiment,Name,Type,Status,Details Page
data_science,lime_snail_kdddyl016h,command,Starting,Link to Azure Machine Learning studio


## using separate runs instead

In [7]:
%%writefile hello_world_runs.py
import mlflow
from hello_world_core import hello_world, parse_args
    
def main():
    """Main function of the script."""
    
    names = ["John", "Mary", "Ana"]
    experiment_id = mlflow.create_experiment("experiment1")
    for idx, name in enumerate(names):
        mlflow.start_run(run_name=str(idx), experiment_id=experiment_id)
        mlflow.log_param ("name to log", name)
        mlflow.log_metric ("length", len(name))
        mlflow.end_run ()
        hello_world (name)
    
if __name__ == "__main__":
    main()

Writing hello_world_runs.py


In [9]:
# configure job
job = command(
    code=f"./",  # location of source code: in this case, the root folder
    command="python hello_world_runs.py",
    environment="AzureML-sklearn-1.0-ubuntu20.04-py38-cpu@latest",
    display_name="Hello World with runs",
)

# submit job
ml_client.create_or_update(job)

[32mUploading data_science (12.75 MBs): 100%|██████████| 12749252/12749252 [00:00<00:00, 18741643.65it/s]
[39m



Experiment,Name,Type,Status,Details Page
data_science,olive_shelf_r4fzsl1f0d,command,Starting,Link to Azure Machine Learning studio
