# Jupyter Notebook to demonstrate the use of MLFlow

### import libraries

In [13]:
import pandas as pd
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import mlflow
from mlflow.models import infer_signature

### load IRIS dataset

In [14]:
iris = datasets.load_iris(return_X_y=True)
X, y = iris

### split into train/test sets

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### define parameters for model training

In [16]:
params = {"penalty": "l2", "solver": "lbfgs", "multi_class": "auto", "max_iter": 1000}

### train a logistic regression model

In [17]:
lr = LogisticRegression(**params)
lr.fit(X_train, y_train)



0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,1000


### test the model and calculate accuracy

In [18]:
y_pred = lr.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model accuracy: {accuracy}")

Model accuracy: 1.0


### Log model with MLFlow

In [19]:
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000")
mlflow.set_experiment("Iris Classification Experiment")
with mlflow.start_run():
    mlflow.log_params(params)
    mlflow.log_metric("accuracy", accuracy)
    mlflow.set_tag("Training Info", "Basic LR model for Iris dataset")
    signature = infer_signature(X_train, lr.predict(X_train))
    model_info = mlflow.sklearn.log_model(sk_model=lr, name="model", signature=signature)


🏃 View run silent-cat-711 at: http://127.0.0.1:5000/#/experiments/1/runs/65f938b351714f49844f4d68e4acef3c
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1


### Log model with different parameters

In [20]:
params = {"solver": "newton-cg", "multi_class": "auto", "max_iter": 1000}

lr = LogisticRegression(**params)
lr.fit(X_train, y_train)



0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'newton-cg'
,max_iter,1000


In [21]:
y_pred = lr.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model accuracy: {accuracy}")

Model accuracy: 1.0


In [22]:
with mlflow.start_run():
    mlflow.log_params(params)
    mlflow.log_metric("accuracy", accuracy)
    mlflow.set_tag("Training Info", "Basic LR model for Iris dataset")
    signature = infer_signature(X_train, lr.predict(X_train))
    model_info = mlflow.sklearn.log_model(sk_model=lr, name="model", signature=signature)


🏃 View run dazzling-gnu-692 at: http://127.0.0.1:5000/#/experiments/1/runs/7ec8b0ca5b8f46ff8083b654019fc9c8
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/1


### Load a model from MLFlow to use for prediction as a generic python function model

In [26]:
loaded_model = mlflow.pyfunc.load_model(model_uri=model_info.model_uri)
predictions = loaded_model.predict(X_test)

iris_features_name = datasets.load_iris().feature_names

results_df = pd.DataFrame(X_test, columns=iris_features_name)
results_df["actual_class"] = y_test
results_df["predicted_class"] = predictions
results_df

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),actual_class,predicted_class
0,6.1,2.8,4.7,1.2,1,1
1,5.7,3.8,1.7,0.3,0,0
2,7.7,2.6,6.9,2.3,2,2
3,6.0,2.9,4.5,1.5,1,1
4,6.8,2.8,4.8,1.4,1,1
5,5.4,3.4,1.5,0.4,0,0
6,5.6,2.9,3.6,1.3,1,1
7,6.9,3.1,5.1,2.3,2,2
8,6.2,2.2,4.5,1.5,1,1
9,5.8,2.7,3.9,1.2,1,1
