# MLOps Tutorial - Intro MLFlow

In [None]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import os
from dotenv import load_dotenv, find_dotenv
import mlflow
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

load_dotenv(find_dotenv())

data = load_wine(as_frame=True)
data.keys()

In [2]:
data.frame

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,target
0,14.23,1.71,2.43,15.6,127.0,2.80,3.06,0.28,2.29,5.64,1.04,3.92,1065.0,0
1,13.20,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.40,1050.0,0
2,13.16,2.36,2.67,18.6,101.0,2.80,3.24,0.30,2.81,5.68,1.03,3.17,1185.0,0
3,14.37,1.95,2.50,16.8,113.0,3.85,3.49,0.24,2.18,7.80,0.86,3.45,1480.0,0
4,13.24,2.59,2.87,21.0,118.0,2.80,2.69,0.39,1.82,4.32,1.04,2.93,735.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,13.71,5.65,2.45,20.5,95.0,1.68,0.61,0.52,1.06,7.70,0.64,1.74,740.0,2
174,13.40,3.91,2.48,23.0,102.0,1.80,0.75,0.43,1.41,7.30,0.70,1.56,750.0,2
175,13.27,4.28,2.26,20.0,120.0,1.59,0.69,0.43,1.35,10.20,0.59,1.56,835.0,2
176,13.17,2.59,2.37,20.0,120.0,1.65,0.68,0.53,1.46,9.30,0.60,1.62,840.0,2


In [3]:
df = data.frame

In [4]:
names = [
    "LogisticRegression A",
    "LogisticRegression B",
    "LogisticRegression C",
    "LogisticRegression D",
    "LogisticRegression E",
]

In [5]:
classifiers = [
    LogisticRegression( penalty="l2", max_iter=10, C=0.5, random_state=42),
    LogisticRegression( penalty="l2", max_iter=100, C=1, random_state=42),
    LogisticRegression( penalty="l1", solver = "liblinear", max_iter=50, C=0.5, random_state=42),
    LogisticRegression( penalty="l1", solver = "liblinear", max_iter=250, C=1, random_state=42),
    LogisticRegression( penalty="l1", solver = "liblinear", max_iter=250, C=1, random_state=42),
]

In [6]:
X = data.data
y = data.target
X.shape, y.shape

((178, 13), (178,))

In [7]:
X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )

In [8]:
for name, clf in zip(names, classifiers):

    clf.fit(X_train, y_train)
    score = clf.score(X_test, y_test)
    print(name, score)

LogisticRegression A 0.7222222222222222
LogisticRegression B 0.9722222222222222
LogisticRegression C 0.9722222222222222
LogisticRegression D 0.9722222222222222
LogisticRegression E 0.9722222222222222


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [9]:
clf.get_params()

{'C': 1,
 'class_weight': None,
 'dual': False,
 'fit_intercept': True,
 'intercept_scaling': 1,
 'l1_ratio': None,
 'max_iter': 250,
 'multi_class': 'auto',
 'n_jobs': None,
 'penalty': 'l1',
 'random_state': 42,
 'solver': 'liblinear',
 'tol': 0.0001,
 'verbose': 0,
 'warm_start': False}

In [10]:
experiment_name = "Example"

mlflow.set_tracking_uri(uri=os.getenv("MLFLOW_ENDPOINT"))
mlflow.set_experiment(experiment_name)

metrics_names = ['accuracy', 'precision', 'recall', 'f1']

def eval_metrics(actual, pred):
    accuracy = accuracy_score(actual, pred)
    precision = precision_score(actual, pred, average="weighted")
    recall = recall_score(actual,pred, average="weighted")
    f1 = f1_score(actual, pred, average="weighted")
    return [accuracy, precision, recall, f1]

2024/06/03 00:39:49 INFO mlflow.tracking.fluent: Experiment with name 'Example' does not exist. Creating a new experiment.


In [11]:
from mlflow.models import infer_signature

for name, clf in zip(names, classifiers):

    with mlflow.start_run(run_name=name):

        # clf = make_pipeline(StandardScaler(), clf)
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)

        metrics = eval_metrics(y_test, y_pred)

        for metric, name in zip(metrics, metrics_names):
            mlflow.log_metric(name, metric)

        mlflow.log_params(clf.get_params())

        signature = infer_signature(X_test, clf.predict(X_test))

        model_info = mlflow.sklearn.log_model(
            sk_model=clf,
            artifact_path="model",
            signature=signature,
            input_example=X_train
        )

        print("Saved model!", model_info.model_uri)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Saved model! runs:/d493f2a37f3e480e863bbd8c9b1275b4/model


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Saved model! runs:/029ca6053eff480fbdf00235a517d738/model




Saved model! runs:/eeb9d7c10c3749b884a0f1b29cc44de2/model




Saved model! runs:/2e2acdc6e69c4b1d9dbf1634be64a0c5/model




Saved model! runs:/7a51d74037804f2b995458fda800e7cb/model


# Load Model from MLFlow

In [13]:
import mlflow
logged_model = 'runs:/7a51d74037804f2b995458fda800e7cb/model'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

# Predict on a Pandas DataFrame.
import pandas as pd
loaded_model.predict(X_test)

Downloading artifacts: 100%|██████████| 6/6 [00:00<00:00, 30.13it/s]


array([0, 0, 2, 0, 1, 0, 1, 2, 1, 2, 1, 2, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1,
       1, 2, 2, 2, 1, 1, 1, 0, 0, 1, 2, 0, 0, 0])