# MLFlow step by step 


## 1. creating experiments

In [1]:
import mlflow

mlflow.create_experiment(
    name='testing-mlflow4',
    artifact_location='testing_mlflow1_artifacts',  # ml workflow 
    tags={'env':'dev','version':'1.0.0'}            #
)

'773410609055695727'

In [2]:
# wrapper function to create mlflow experiment 
from typing import Any 
def create_mlflow_experiment(experiment_name:str,artifact_location:str,tags:dict[str,Any])-> str:
    ''' 
    Create a new mlflow experiment with the given name and artifact location
    '''
    try:
        experiment_id=mlflow.create_experiment(
            name=experiment_name, artifact_location=artifact_location,tags=tags
        )
    except:
        print(f'Experiment {experiment_name} already exists')
        experiment_id=mlflow.get_experiment_by_name(experiment_name).experiment_id
    return experiment_id

In [3]:
create_mlflow_experiment('testing-mlflow','testing_mlflow1_artifacts',{'env':'prod','version':'2.0.0'})

'990925180578863486'

## 2. Retrieving info on existing experiments

In [4]:
import mlflow.entities


def get_mlflow_experiment(experiment_id:str=None,experiment_name:str=None)-> mlflow.entities.Experiment:
    ''' 
    Retrieve the mlflow experiment with the given id or name 
    Paramters:
    experiment_id: str
        The id of the experiment to retieve
    experiment_name: str
        The name of the experiment to retrieve
    
        Returns:
        experiment:mlflow.entities.Experiment
            The mlflow experiment with the given id or name
    '''
    if experiment_id is not None:
        experiment=mlflow.get_experiment(experiment_id)
    elif experiment_name is not None:
        experiment=mlflow.get_experiment_by_name(experiment_name)
    else:
        raise ValueError('Either experiment_id or experiment_name must be provided')
    return experiment

In [6]:
## 2. Retrieving info on existing experiments


experiment=get_mlflow_experiment(experiment_name='testing-mlflow')
print(f'Name : {experiment.name}')
print(f'Experiment_id : {experiment.experiment_id}')
print(f'Artifact Location : {experiment.artifact_location}')
print(f'Tags : {experiment.tags}')
print(f'Lifecycle_stage : {experiment.lifecycle_stage}')
print(f'Creation timestamp: : {experiment.creation_time}')

Name : testing-mlflow
Experiment_id : 990925180578863486
Artifact Location : file:///d:/pythonProjects/MLOps_Krish_Naik/MLflow/step-by-step/testing_mlflow1_artifacts
Tags : {'env': 'prod', 'version': '2.0.0'}
Lifecycle_stage : active
Creation timestamp: : 1734999028643


# 3. Delete Experiment 

In [8]:
#mlflow.delete_experiment(experiment_id='897722527704922899')

# 4. Mlflow runs to train the ml models

In [9]:
mlflow.set_experiment('run_test')
with mlflow.start_run():
    mlflow.log_param('learning_rate',0.05)


2024/12/23 18:10:54 INFO mlflow.tracking.fluent: Experiment with name 'run_test' does not exist. Creating a new experiment.


In [10]:
# method 2 

with mlflow.start_run(run_name='mlflow_run'):
    mlflow.log_param('learning_rate',0.06)

In [11]:
# method 3 with run info
with mlflow.start_run(run_name='mlflow_run') as run:
    mlflow.log_param('learning_rate',0.06)
    print(f'runID: {run.info.run_id}')
    print(f'run info: {run.info}')

runID: f83e66bbcf984baa9d1aaceb1a18898d
run info: <RunInfo: artifact_uri='file:///d:/pythonProjects/MLOps_Krish_Naik/MLflow/step-by-step/mlruns/828504590665159851/f83e66bbcf984baa9d1aaceb1a18898d/artifacts', end_time=None, experiment_id='828504590665159851', lifecycle_stage='active', run_id='f83e66bbcf984baa9d1aaceb1a18898d', run_name='mlflow_run', run_uuid='f83e66bbcf984baa9d1aaceb1a18898d', start_time=1734999055951, status='RUNNING', user_id='Immortal_Pi'>


In [12]:
# create and run mlflow experiment 

experiment_id=create_mlflow_experiment(
    experiment_name='testing_mlflow3',
    artifact_location='testing_mlflow3_artifacts',
    tags={'env':'dev','version':'1.0.2'}
)

mlflow.set_experiment(experiment_id=experiment_id)
with mlflow.start_run(run_name='testing') as run:
    mlflow.log_param('learning_rate',0.01)
    print(f'run_id : {run.info.run_id}')
    print(f'experiment_id : {run.info.experiment_id}')
    print(f'status: {run.info.status}')
    print(f'start Time: {run.info.start_time}')
    print(f'end Time : {run.info.end_time}')
    print(f'lifecycle_stage : {run.info.lifecycle_stage}')

run_id : 233d161d809e43a7a6a70b340f9f5019
experiment_id : 285179736425148203
status: RUNNING
start Time: 1734999056836
end Time : None
lifecycle_stage : active


# 5. log parameters

In [13]:
mlflow.set_experiment(experiment_name='logging_experiment')
with mlflow.start_run(run_name='logging_parameters') as run:
    mlflow.log_param('learning_rate',0.01)

    parameters={
        'learning_rate':0.01,
        'epochs':10,
        'batch_size':100,
        'loss_function':'mse',
        'optimizer':'adam'
    }

    mlflow.log_params(parameters)

    #print run info
    print(f'run_id : {run.info.run_id}')
    print(f'experiment_id : {run.info.experiment_id}')
    print(f'status: {run.info.status}')
    print(f'start Time: {run.info.start_time}')
    print(f'end Time : {run.info.end_time}')
    print(f'lifecycle_stage : {run.info.lifecycle_stage}')



2024/12/23 18:11:05 INFO mlflow.tracking.fluent: Experiment with name 'logging_experiment' does not exist. Creating a new experiment.


run_id : 78d7de7f821b40f48681bb7b9ca343a4
experiment_id : 637476786088063623
status: RUNNING
start Time: 1734999065151
end Time : None
lifecycle_stage : active


# 6. logging metrics

In [16]:
mlflow.set_experiment(experiment_name='logging_experiment')
with mlflow.start_run(run_name='logging_metrics') as run:
    mlflow.log_param('learning_rate2',0.01)
    mlflow.log_params(parameters)
    metrics={
        'mse':0.01,
        'mae':0.01,
        'rmse':0.01,
        'r2':0.01

    }

    mlflow.log_metrics(metrics)

    #print run info
    print(f'run_id : {run.info.run_id}')
    print(f'experiment_id : {run.info.experiment_id}')
    print(f'status: {run.info.status}')
    print(f'start Time: {run.info.start_time}')
    print(f'end Time : {run.info.end_time}')
    print(f'lifecycle_stage : {run.info.lifecycle_stage}')

run_id : f006782fecfe424bbd8c3d931dcd4f0e
experiment_id : 637476786088063623
status: RUNNING
start Time: 1735000669212
end Time : None
lifecycle_stage : active


# 7 logging Artifacts
- any output generated by ml workflow 

In [19]:
experiment=get_mlflow_experiment(experiment_name='logging_experiment')

with mlflow.start_run(run_name='logging_artifact',experiment_id=experiment.experiment_id) as run:
    
    with open('helloWorld.txt','w') as file:
        file.write('Helloworld !!')

    mlflow.log_artifact(local_path='HelloWorld.txt',artifact_path='test_files') # give path to artifact
    
    #print run info
    print(f'run_id : {run.info.run_id}')
    print(f'experiment_id : {run.info.experiment_id}')
    print(f'status: {run.info.status}')
    print(f'start Time: {run.info.start_time}')
    print(f'end Time : {run.info.end_time}')
    print(f'lifecycle_stage : {run.info.lifecycle_stage}')


run_id : 61b4b0d2794f40e09306a7c5642df833
experiment_id : 637476786088063623
status: RUNNING
start Time: 1735001358133
end Time : None
lifecycle_stage : active


In [23]:
# second method 
experiment=get_mlflow_experiment(experiment_name='logging_experiment')

with mlflow.start_run(run_name='logging_artifact',experiment_id=experiment.experiment_id) as run:
    
    with open('./okay/helloWorld.txt','w') as file:
        file.write('Helloworld !!')

    mlflow.log_artifacts(local_dir='./okay',artifact_path='test_files') # give path to artifact
    
    #print run info
    print(f'run_id : {run.info.run_id}')
    print(f'experiment_id : {run.info.experiment_id}')
    print(f'status: {run.info.status}')
    print(f'start Time: {run.info.start_time}')
    print(f'end Time : {run.info.end_time}')
    print(f'lifecycle_stage : {run.info.lifecycle_stage}')

run_id : f7e2ed9022a54c2385d065ce817430d2
experiment_id : 637476786088063623
status: RUNNING
start Time: 1735002103425
end Time : None
lifecycle_stage : active


# 8. logging Images