In [0]:
import mlflow
import mlflow.sklearn
import pandas as pd
import matplotlib.pyplot as plt
from numpy import savetxt
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [0]:
db =load_diabetes()
X = db.data
Y = db.target

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [0]:
print("Number of Observation in X_train: ", len(X_train))
print("Number of Observation in X_test: ", len(X_test))
print("Number of Observation in y_train: ", len(y_train))
print("Number of Observation in y_test: ", len(y_test))


In [0]:
print(mlflow.version.VERSION)

In [0]:
#Hyperparametes

n_estimators = 100
max_depth = 5
max_features = 3

rf = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth, max_features=max_features)
rf.fit(X_train, y_train)
prediction = rf.predict(X_test)

With Autolog enabled, all model parameter, model score and the fitted model are automatically stored

In [0]:
#Now we will log the model
mlflow.sklearn.autolog()

with mlflow.start_run():
    #Hyperparametes
    n_estimators = 100
    max_depth = 5
    max_features = 3

    rf = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth, max_features=max_features)
    rf.fit(X_train, y_train)
    prediction = rf.predict(X_test)

#Now are new model is logged under MLFlow Logging API experiment


In [0]:
#Custom logging

with mlflow.start_run():
    #Set model parameter
    n_estimators= 100
    max_depth = 5
    max_features = 3

    #Create and train model
    rf = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth, max_features=max_features)
    rf.fit(X_train, y_train)

    #Use the model to make prediction on test data
    prediction = rf.predict(X_test)

    #Log the model parameters for this run
    mlflow.log_param("n_estimators", n_estimators)
    mlflow.log_param("max_depth", max_depth)
    mlflow.log_param("max_features", max_features)

    #Define a metric to evaluate a model 
    mse = mean_squared_error(y_test, prediction)

    #Log the metric from this run
    mlflow.log_metric("mean_squared_error", mse)

    #Log the model created by this run
    mlflow.sklearn.log_model(rf, "random-forest-model")

    #Save the table of predicted value
    savetxt("prediction.csv", prediction, delimiter=",")

    #Log the saved table as Artifact
    mlflow.log_artifact("prediction.csv")

    #Convert the residuals to pandas dataframe for graphic capabilities
    df=pd.DataFrame(data = prediction - y_test)

    #Create plot of residuals
    plt.plot(df)
    plt.xlabel("Observation")
    plt.ylabel("Residual")
    plt.title("Residual Plot")

    #Save the plot as artifact
    plt.savefig("residual.png")
    mlflow.log_artifact("residual.png")




Method 2

In [0]:
#Instead of creating a model inside MLFLow Logging API Experiment, we can customize the location

experiment_name = "/Shared/diabetes_experiment/"
mlflow.set_experiment(experiment_name)

#Copy paste above code
#Custom logging

with mlflow.start_run():
    #Set model parameter
    n_estimators= 100
    max_depth = 5
    max_features = 3

    #Create and train model
    rf = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth, max_features=max_features)
    rf.fit(X_train, y_train)

    #Use the model to make prediction on test data
    prediction = rf.predict(X_test)

    #Log the model parameters for this run
    mlflow.log_param("n_estimators", n_estimators)
    mlflow.log_param("max_depth", max_depth)
    mlflow.log_param("max_features", max_features)

    #Define a metric to evaluate a model 
    mse = mean_squared_error(y_test, prediction)

    #Log the metric from this run
    mlflow.log_metric("mean_squared_error", mse)

    #Log the model created by this run
    mlflow.sklearn.log_model(rf, "random-forest-model")

    #Save the table of predicted value
    savetxt("prediction.csv", prediction, delimiter=",")

    #Log the saved table as Artifact
    mlflow.log_artifact("prediction.csv")

    #Convert the residuals to pandas dataframe for graphic capabilities
    df=pd.DataFrame(data = prediction - y_test)

    #Create plot of residuals
    plt.plot(df)
    plt.xlabel("Observation")
    plt.ylabel("Residual")
    plt.title("Residual Plot")

    #Save the plot as artifact
    plt.savefig("residual.png")
    mlflow.log_artifact("residual.png")




In [0]:
#Get experiment details
import mlflow
experiment_path =  "/Shared/diabetes_experiment/"
experiment = mlflow.set_experiment(experiment_path)

print("Experiment ID: ", experiment_name.experiment_id)
print("Experiment Artifact Location: ", experiment_name.artifact_location)
print("Tags {}".format(experiment_name.tags))
print("LifeCycle Stage", experiment_name.lifecycle_stage)


Method 3: By providing experiment_id in Start_run function

In [0]:
with mlflow.start_run(experiment_id=1786477809851533):
    #Set model parameter
    n_estimators= 100
    max_depth = 5
    max_features = 3

    #Create and train model
    rf = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth, max_features=max_features)
    rf.fit(X_train, y_train)

    #Use the model to make prediction on test data
    prediction = rf.predict(X_test)

    #Log the model parameters for this run
    mlflow.log_param("n_estimators", n_estimators)
    mlflow.log_param("max_depth", max_depth)
    mlflow.log_param("max_features", max_features)

    #Define a metric to evaluate a model 
    mse = mean_squared_error(y_test, prediction)

    #Log the metric from this run
    mlflow.log_metric("mean_squared_error", mse)

    #Log the model created by this run
    mlflow.sklearn.log_model(rf, "random-forest-model")

    #Save the table of predicted value
    savetxt("prediction.csv", prediction, delimiter=",")

    #Log the saved table as Artifact
    mlflow.log_artifact("prediction.csv")

    #Convert the residuals to pandas dataframe for graphic capabilities
    df=pd.DataFrame(data = prediction - y_test)

    #Create plot of residuals
    plt.plot(df)
    plt.xlabel("Observation")
    plt.ylabel("Residual")
    plt.title("Residual Plot")

    #Save the plot as artifact
    plt.savefig("residual.png")
    mlflow.log_artifact("residual.png")

