In [1]:
# importing the necessary libraries
import pandas as pd
import mlflow
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

In [2]:
from experiement_utils import retrieve_experiment, create_experiment

In [4]:
# retrieving an experiment
experiment = retrieve_experiment(experiment_name="mlflow")

with mlflow.start_run(run_name="logging model", experiment_id=experiment.experiment_id) as run:
    X, y = make_classification(n_samples=1000, n_features=10, n_informative=5, n_redundant=5, random_state=42)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    mlflow.autolog()

    random_forest = RandomForestClassifier()
    random_forest.fit(X_train, y_train)

    # predicting 
    y_pred = random_forest.predict(X_test)

    # displaying run info
    print(f"Run ID: {run.info.run_id}")
    print(f"Experiment ID: {run.info.experiment_id}")
    print(f"Status: {run.info.status}")
    print(f"Start time: {run.info.start_time}")
    print(f"End time: {run.info.end_time}")
    print(f"Lifecycle stage: {run.info.lifecycle_stage}")
    # print(f"Artifact: {run.info.artifact_uri}")

2025/05/10 17:32:38 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2025/05/10 17:32:38 INFO mlflow.tracking.fluent: Autologging successfully enabled for pyspark.


Run ID: 4af64c965d59405e96b451d3309cae41
Experiment ID: 276082829927872645
Status: RUNNING
Start time: 1746891158707
End time: None
Lifecycle stage: active


In [5]:
# making predictions from the loaded model

import mlflow.sklearn


logged_model = 'runs:/4af64c965d59405e96b451d3309cae41/model'

X, y = make_classification(n_samples=1000, n_features=10, n_informative=5, n_redundant=5, random_state=42)

_, X_test, _, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Load model as a PyFuncModel.
loaded_model = mlflow.sklearn.load_model(logged_model)

# Predict on a Pandas DataFrame.

y_predictions = pd.DataFrame(loaded_model.predict(X_test), columns=['prediction'])
y_predictions.sample(n=10)

Unnamed: 0,prediction
56,1
164,0
2,1
111,0
64,0
144,0
78,1
40,0
53,0
138,1


In [8]:
# ========================= Nested Runs ==============================

# creating a new experiment to implement nested runs
experiment_id = create_experiment(name="nested_experiment", artifact_location="nested_artifact", tags={"purpose": "testing nested runs"})

mlflow.set_experiment(experiment_name="nested_experiment")

# starting the runs
with mlflow.start_run(run_name="parent", experiment_id=experiment_id) as parent:
    print(f"Run ID: {parent.info.run_id}")

    with mlflow.start_run(run_name="child1", nested=True, experiment_id=experiment_id) as child1:
        print(f"Child1 Run ID: {child1.info.run_id}")

        with mlflow.start_run(run_name="child_11", nested=True, experiment_id=experiment_id) as child_11:
            print(f"Child_11 Run ID: {child_11.info.run_id}")

        with mlflow.start_run(run_name="child_12", nested=True, experiment_id=experiment_id) as child_12:
            print(f"Child_12 Run ID: {child_12.info.run_id}")
    
    with mlflow.start_run(run_name="child2", nested=True, experiment_id=experiment_id) as child2:
        print(f"Child2 Run ID: {child2.info.run_id}")

Experiment nested_experiment already exist
Run ID: 6c87495466bc420c992d9e15857cab48
Child1 Run ID: 2917dae054534058b00be52d17f2298d
Child_11 Run ID: 06edaa2388a141ce9022c79a67e51734
Child_12 Run ID: d10ed1cae1824327bd9edbd76f0eb68b
Child2 Run ID: 0d180d9c753143fb9c5660d5790f26c8
