In [1]:
%load_ext autoreload
%autoreload 2

# MLflow Classification Recipe Notebook

This notebook runs the MLflow Classification Recipe on Databricks and inspects its results. For more information about the MLflow Classification Recipe, including usage examples, see the [Classification Recipe overview documentation](https://mlflow.org/docs/latest/recipes.html#classification-recipe) the [Classification Recipe API documentation](https://mlflow.org/docs/latest/python_api/mlflow.recipes.html#module-mlflow.recipes.classification.v1.recipe).

In [2]:
from mlflow.recipes import Recipe

r = Recipe(profile="local")

2024/10/24 20:27:09 INFO mlflow.recipes.recipe: Creating MLflow Recipe 'mlflow-classification-recipe-learning' with profile: 'local'


In [3]:
r.inspect()

In [4]:
r.run("ingest")

2024/10/24 20:27:10 INFO mlflow.recipes.step: Running step ingest...


name,type
Pregnant,integer
Glucose,number
Diastolic_BP,number
Skin_Fold,number
Serum_Insulin,string
BMI,number
Diabetes_Pedigree,number
Age,integer
Class,integer

Pregnant,Glucose,Diastolic_BP,Skin_Fold,Serum_Insulin,BMI,Diabetes_Pedigree,Age,Class
6,148.0,72.0,35.0,,33.6,0.627,50,1
1,85.0,66.0,29.0,,26.6,0.351,31,0
8,183.0,64.0,,,23.3,0.672,32,1
1,89.0,66.0,23.0,94.0,28.1,0.167,21,0
0,137.0,40.0,35.0,168.0,43.1,2.288,33,1


In [5]:
r.run("split")

2024/10/24 20:27:10 INFO mlflow.recipes.utils.execution: ingest: No changes. Skipping.


Run MLflow Recipe step: split
2024/10/24 20:27:11 INFO mlflow.recipes.step: Running step split...
  return bound(*args, **kwds)


In [6]:
training_data = r.get_artifact("training_data")
training_data.describe()

Unnamed: 0,Pregnant,Glucose,Diastolic_BP,Skin_Fold,BMI,Diabetes_Pedigree,Age,Class
count,590.0,587.0,561.0,418.0,582.0,590.0,590.0,590.0
mean,3.949153,121.943782,72.45098,28.937799,32.353093,0.478854,33.452542,0.349153
std,3.421314,30.530663,12.361439,10.575342,6.937941,0.342374,11.611727,0.477107
min,0.0,44.0,24.0,7.0,18.2,0.078,21.0,0.0
25%,1.0,100.0,64.0,21.0,27.5,0.24075,24.0,0.0
50%,3.0,117.0,72.0,29.0,32.0,0.368,29.5,0.0
75%,6.0,141.0,80.0,36.0,36.4,0.64675,41.0,1.0
max,15.0,199.0,122.0,99.0,67.1,2.42,81.0,1.0


In [7]:
r.run("transform")

2024/10/24 20:27:12 INFO mlflow.recipes.utils.execution: ingest, split: No changes. Skipping.


Run MLflow Recipe step: transform
2024/10/24 20:27:13 INFO mlflow.recipes.step: Running step transform...


Name,Type
Pregnant,int64
Glucose,float64
Diastolic_BP,float64
Skin_Fold,float64
Serum_Insulin,object
BMI,float64
Diabetes_Pedigree,float64
Age,int64
Class,int64

Name,Type
Pregnant,int64
Glucose,float64
Diastolic_BP,float64
Skin_Fold,float64
Serum_Insulin,object
BMI,float64
Diabetes_Pedigree,float64
Age,int64
Class,int64

Pregnant,Glucose,Diastolic_BP,Skin_Fold,Serum_Insulin,BMI,Diabetes_Pedigree,Age,Class
1,89.0,66.0,23.0,94.0,28.1,0.167,21,0
5,116.0,74.0,,,25.6,0.201,30,0
10,115.0,,,,35.3,0.134,29,0
4,110.0,92.0,,,37.6,0.191,30,0
10,139.0,80.0,,,27.1,1.441,57,0


In [8]:
r.run("train")

2024/10/24 20:27:14 INFO mlflow.recipes.utils.execution: ingest, split, transform: No changes. Skipping.


Run MLflow Recipe step: train
2024/10/24 20:27:15 INFO mlflow.recipes.step: Running step train...
2024/10/24 20:27:16 INFO mlflow.store.db.utils: Creating initial MLflow database tables...
2024/10/24 20:27:16 INFO mlflow.store.db.utils: Updating database tables
INFO  [alembic.runtime.migration] Context impl SQLiteImpl.
INFO  [alembic.runtime.migration] Will assume non-transactional DDL.
INFO  [alembic.runtime.migration] Running upgrade  -> 451aebb31d03, add metric step
INFO  [alembic.runtime.migration] Running upgrade 451aebb31d03 -> 90e64c465722, migrate user column to tags
INFO  [alembic.runtime.migration] Running upgrade 90e64c465722 -> 181f10493468, allow nulls for metric values
INFO  [alembic.runtime.migration] Running upgrade 181f10493468 -> df50e92ffc5e, Add Experiment Tags Table
INFO  [alembic.runtime.migration] Running upgrade df50e92ffc5e -> 7ac759974ad8, Update run tags with larger limit
INFO  [alembic.runtime.migration] Running upgrade 7ac759974ad8 -> 89d4b8295536, create l

Metric,training,validation
f1_score,1.0,0.644068
accuracy_score,1.0,0.764045
example_count,590.0,89.0
false_negatives,0.0,14.0
false_positives,0.0,7.0
log_loss,0.134493,0.53118
precision_recall_auc,1.0,0.711533
precision_score,1.0,0.730769
recall_score,1.0,0.575758
roc_auc,1.0,0.786526

Name,Type
Pregnant,long
Glucose,double
Diastolic_BP,double
Skin_Fold,double
Serum_Insulin,string
BMI,double
Diabetes_Pedigree,double
Age,long

Name,Type
predicted_score_0,double
predicted_score_1,double
predicted_score,double
predicted_label,long

absolute_error,prediction,Class,Pregnant,Glucose,Diastolic_BP,Skin_Fold,Serum_Insulin,BMI,Diabetes_Pedigree,Age
0.43,0,0,0,165.0,76.0,43.0,255.0,47.9,0.259,26
0.39,1,1,12,84.0,72.0,31.0,,29.7,0.297,46
0.39,1,1,0,95.0,85.0,25.0,36.0,37.4,0.247,24
0.39,0,0,7,150.0,66.0,42.0,342.0,34.7,0.718,42
0.39,1,1,4,134.0,72.0,,,23.8,0.277,60
0.37,1,1,3,78.0,50.0,32.0,88.0,31.0,0.248,26
0.37,1,1,5,109.0,62.0,41.0,129.0,35.8,0.514,25
0.37,0,0,6,183.0,94.0,,,40.8,1.461,45
0.37,1,1,10,111.0,70.0,27.0,,27.5,0.141,40
0.37,1,1,3,107.0,62.0,13.0,48.0,22.9,0.678,23

Unnamed: 0,Latest
Model Rank,> 0
f1_score,0.644068
accuracy_score,0.764045
false_negatives,14
false_positives,7
log_loss,0.53118
precision_score,0.730769
recall_score,0.575758
roc_auc,0.786526
true_negatives,49


In [9]:
trained_model = r.get_artifact("model")
print(trained_model)

mlflow.pyfunc.loaded_model:
  artifact_path: train/model
  flavor: mlflow.pyfunc.model
  run_id: 19adc81a76984690af101780df8c9e6f



In [10]:
r.run("evaluate")

2024/10/24 20:27:33 INFO mlflow.recipes.utils.execution: ingest, split, transform, train: No changes. Skipping.


Run MLflow Recipe step: evaluate
2024/10/24 20:27:34 INFO mlflow.recipes.step: Running step evaluate...
2024/10/24 20:27:36 INFO mlflow.models.evaluation.default_evaluator: Computing model predictions.
2024/10/24 20:27:36 INFO mlflow.models.evaluation.default_evaluator: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2024/10/24 20:27:36 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2024/10/24 20:27:37 INFO mlflow.models.evaluation.default_evaluator: Computing model predictions.
2024/10/24 20:27:37 INFO mlflow.models.evaluation.default_evaluator: The evaluation dataset is inferred as binary dataset, positive label is 1, negative label is 0.
2024/10/24 20:27:37 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...


Metric,validation,test
f1_score,0.644068,0.703704
accuracy_score,0.764045,0.820225
example_count,89.0,89.0
false_negatives,14.0,10.0
false_positives,7.0,6.0
log_loss,0.53118,0.438676
precision_recall_auc,0.711533,0.733009
precision_score,0.730769,0.76
recall_score,0.575758,0.655172
roc_auc,0.786526,0.856609

metric,greater_is_better,value,threshold,validated
f1_score,True,0.703704,0.7,✅


In [11]:
r.run("register")

2024/10/24 20:27:39 INFO mlflow.recipes.utils.execution: ingest, split, transform, train, evaluate: No changes. Skipping.


Run MLflow Recipe step: register
2024/10/24 20:27:40 INFO mlflow.recipes.step: Running step register...
Successfully registered model 'Ensemble-Classifi'.
Created version '1' of model 'Ensemble-Classifi'.
