In [5]:
%load_ext autoreload
%autoreload 2

# AdaBoost

This notebook runs the MLflow Regression Recipe on Databricks and inspects its results. For more information about the MLflow Regression Recipe, including usage examples, see the [Regression Recipe overview documentation](https://mlflow.org/docs/latest/recipes.html#regression-recipe) the [Regression Recipe API documentation](https://mlflow.org/docs/latest/python_api/mlflow.recipes.html#module-mlflow.recipes.regression.v1.recipe).

In [6]:
from mlflow.recipes import Recipe

r = Recipe(profile="adab")


2024/09/22 18:23:47 INFO mlflow.recipes.recipe: Creating MLflow Recipe 'student_performance' with profile: 'adab'


In [7]:
r.clean()

In [8]:
r.inspect()

In [9]:
r.run("ingest")

2024/09/22 18:23:53 INFO mlflow.recipes.step: Running step ingest...

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading artifacts: 100%|##########| 1/1 [00:00<?, ?it/s]
Downloading artifacts: 100%|##########| 1/1 [00:00<?, ?it/s]


name,type
race_ethnicity,string
parental_level_of_education,string
lunch,string
test_preparation_course,string
math_score,integer
reading_score,integer
writing_score,integer

race_ethnicity,parental_level_of_education,lunch,test_preparation_course,math_score,reading_score,writing_score
group B,bachelor's degree,standard,none,72,72,74
group C,some college,standard,completed,69,90,88
group B,master's degree,standard,none,90,95,93
group A,associate's degree,free/reduced,none,47,57,44
group C,some college,standard,none,76,78,75


In [10]:
r.run("split")

2024/09/22 18:23:57 INFO mlflow.recipes.utils.execution: ingest: No changes. Skipping.


"Run MLflow Recipe step: split"
2024/09/22 18:24:00 INFO mlflow.recipes.step: Running step split...
  return bound(*args, **kwds)


In [11]:
r.run("transform")

2024/09/22 18:24:16 INFO mlflow.recipes.utils.execution: ingest, split: No changes. Skipping.


"Run MLflow Recipe step: transform"
2024/09/22 18:24:19 INFO mlflow.recipes.step: Running step transform...


Name,Type
race_ethnicity,object
parental_level_of_education,object
lunch,object
test_preparation_course,object
math_score,int64
reading_score,int64
writing_score,int64

Name,Type
num_pipeline__reading_score,float64
num_pipeline__writing_score,float64
cat_pipeline__race_ethnicity_group A,float64
cat_pipeline__race_ethnicity_group B,float64
cat_pipeline__race_ethnicity_group C,float64
cat_pipeline__race_ethnicity_group D,float64
cat_pipeline__race_ethnicity_group E,float64
cat_pipeline__parental_level_of_education_associate's degree,float64
cat_pipeline__parental_level_of_education_bachelor's degree,float64
cat_pipeline__parental_level_of_education_high school,float64

num_pipeline__reading_score,num_pipeline__writing_score,cat_pipeline__race_ethnicity_group A,cat_pipeline__race_ethnicity_group B,cat_pipeline__race_ethnicity_group C,cat_pipeline__race_ethnicity_group D,cat_pipeline__race_ethnicity_group E,cat_pipeline__parental_level_of_education_associate's degree,cat_pipeline__parental_level_of_education_bachelor's degree,cat_pipeline__parental_level_of_education_high school,cat_pipeline__parental_level_of_education_master's degree,cat_pipeline__parental_level_of_education_some college,cat_pipeline__parental_level_of_education_some high school,cat_pipeline__lunch_free/reduced,cat_pipeline__lunch_standard,cat_pipeline__test_preparation_course_completed,cat_pipeline__test_preparation_course_none,math_score
6.554413158143052,6.177127029134177,0.0,2.5657440185169285,0.0,0.0,0.0,0.0,0.0,0.0,4.100355739219689,0.0,0.0,0.0,2.0915714079217245,0.0,2.0882729220707734,90
5.381518171949033,4.981554055753368,0.0,0.0,2.124650912267445,0.0,0.0,0.0,0.0,0.0,0.0,2.3885484287466188,0.0,0.0,2.0915714079217245,0.0,2.0882729220707734,76
5.726487285535509,5.180816217983503,0.0,2.5657440185169285,0.0,0.0,0.0,2.432328446512315,0.0,0.0,0.0,0.0,0.0,0.0,2.0915714079217245,0.0,2.0882729220707734,71
2.9667343768436973,2.590408108991752,0.0,2.5657440185169285,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.3885484287466188,0.0,2.0915714079217294,0.0,0.0,2.0882729220707734,40
4.4156046539068985,4.450188289806342,0.0,0.0,0.0,2.28616326831259,0.0,0.0,0.0,2.521444771803698,0.0,0.0,0.0,2.0915714079217294,0.0,2.088272922070775,0.0,64


In [12]:
r.run("train")

2024/09/22 18:24:23 INFO mlflow.recipes.utils.execution: ingest, split, transform: No changes. Skipping.


"Run MLflow Recipe step: train"
2024/09/22 18:24:26 INFO mlflow.recipes.step: Running step train...

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]
Downloading artifacts:  20%|##        | 1/5 [00:00<00:00, 1002.22it/s]
Downloading artifacts:  40%|####      | 2/5 [00:00<00:00, 997.34it/s] 
Downloading artifacts:  60%|######    | 3/5 [00:00<00:00, 1496.01it/s]
Downloading artifacts:  80%|########  | 4/5 [00:00<00:00, 1994.68it/s]
Downloading artifacts: 100%|##########| 5/5 [00:00<00:00, 1665.60it/s]
Downloading artifacts: 100%|##########| 5/5 [00:00<00:00, 1665.60it/s]

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]
Downloading artifacts:  20%|##        | 1/5 [00:00<?, ?it/s]
Downloading artifacts:  40%|####      | 2/5 [00:00<00:00, 2005.88it/s]
Downloading artifacts:  60%|######    | 3/5 [00:00<00:00, 1502.08it/s]
Downloading artifacts:  80%|########  | 4/5 [00:00<00:00, 1000.79it/s]
Downloading artifacts: 100%|##########| 5/5 [00:00<00:00, 1250.99it/s]
Downl

Metric,training,validation
r2_score,0.716436,0.605647
r2_score,0.716436,0.605647
example_count,803.0,113.0
max_error,19.0658,21.3364
mean_absolute_error,6.63693,7.14737
mean_absolute_percentage_error,0.109564,0.107552
mean_on_target,65.7522,69.6283
mean_squared_error,63.2307,80.0097
root_mean_squared_error,7.95177,8.94481
score,0.716436,0.605647

Name,Type
race_ethnicity,string
parental_level_of_education,string
lunch,string
test_preparation_course,string
reading_score,long
writing_score,long

Name,Type
-,"Tensor('float64', (-1,))"

absolute_error,prediction,math_score,race_ethnicity,parental_level_of_education,lunch,test_preparation_course,reading_score,writing_score
19.06578947368421,60.93421052631579,80,group C,some college,free/reduced,none,64,66
18.340909090909093,57.34090909090909,39,group C,associate's degree,standard,none,64,57
18.336448598130843,73.33644859813084,55,group A,high school,standard,none,73,73
18.19178082191781,60.19178082191781,42,group C,some college,free/reduced,completed,66,69
18.16083916083916,54.83916083916084,73,group B,bachelor's degree,free/reduced,none,56,57
17.791489361702126,62.791489361702126,45,group C,some college,free/reduced,completed,73,70
17.663551401869157,73.33644859813084,91,group E,associate's degree,free/reduced,completed,73,80
17.596491228070178,64.40350877192982,82,group E,bachelor's degree,standard,none,62,62
17.42758620689655,62.42758620689655,45,group A,bachelor's degree,standard,none,59,64
17.333333333333336,52.333333333333336,35,group C,high school,free/reduced,none,61,54

Unnamed: 0,Latest
Model Rank,> 0
r2_score,0.605647
max_error,21.3364
mean_absolute_error,7.14737
mean_absolute_percentage_error,0.107552
mean_squared_error,80.0097
root_mean_squared_error,8.94481
Run Time,2024-09-22 18:24:30
Run ID,465fd54ff099404c9364b62d2c69ac46


In [13]:
r.run("evaluate")

2024/09/22 18:25:58 INFO mlflow.recipes.utils.execution: ingest, split, transform, train: No changes. Skipping.


"Run MLflow Recipe step: evaluate"
2024/09/22 18:26:01 INFO mlflow.recipes.step: Running step evaluate...
2024/09/22 18:26:10 INFO mlflow.models.evaluation.default_evaluator: Computing model predictions.
2024/09/22 18:26:10 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2024/09/22 18:26:13 INFO mlflow.models.evaluation.default_evaluator: Computing model predictions.
2024/09/22 18:26:13 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2024/09/22 18:26:13 INFO mlflow.tracking._tracking_service.client: \U0001f3c3 View run classy-eel-550 at: http://127.0.0.1:8080/#/experiments/361567546446726864/runs/465fd54ff099404c9364b62d2c69ac46.
2024/09/22 18:26:13 INFO mlflow.tracking._tracking_service.client: \U0001f9ea View experiment at: http://127.0.0.1:8080/#/experiments/361567546446726864.
Index(['prediction', 'target'], dtype='object')
Index(['prediction', 'target'], dtype='object')
Index(['prediction', 'target'], dtype='object'

Metric,validation,test
r2_score,0.605647,0.731733
r2_score,0.605647,0.731733
example_count,113.0,84.0
max_error,21.3364,31.129032
mean_absolute_error,7.14737,7.218448
mean_absolute_percentage_error,0.107552,1668960691402660.8
mean_on_target,69.6283,64.547619
mean_squared_error,80.0097,83.031173
root_mean_squared_error,8.94481,9.112144
score,0.605647,0.731733

metric,greater_is_better,value,threshold,validated
r2_score,True,0.731733,0.7,✅


In [14]:
r.run("register")

2024/09/22 18:26:15 INFO mlflow.recipes.utils.execution: ingest, split, transform, train, evaluate: No changes. Skipping.


"Run MLflow Recipe step: register"
2024/09/22 18:26:18 INFO mlflow.recipes.step: Running step register...
Registered model 'stud_perf_regressor' already exists. Creating a new version of this model...
2024/09/22 18:26:19 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: stud_perf_regressor, version 2
Created version '2' of model 'stud_perf_regressor'.


In [15]:
r.inspect("train")

Metric,training,validation
r2_score,0.716436,0.605647
r2_score,0.716436,0.605647
example_count,803.0,113.0
max_error,19.0658,21.3364
mean_absolute_error,6.63693,7.14737
mean_absolute_percentage_error,0.109564,0.107552
mean_on_target,65.7522,69.6283
mean_squared_error,63.2307,80.0097
root_mean_squared_error,7.95177,8.94481
score,0.716436,0.605647

Name,Type
race_ethnicity,string
parental_level_of_education,string
lunch,string
test_preparation_course,string
reading_score,long
writing_score,long

Name,Type
-,"Tensor('float64', (-1,))"

absolute_error,prediction,math_score,race_ethnicity,parental_level_of_education,lunch,test_preparation_course,reading_score,writing_score
19.06578947368421,60.93421052631579,80,group C,some college,free/reduced,none,64,66
18.340909090909093,57.34090909090909,39,group C,associate's degree,standard,none,64,57
18.336448598130843,73.33644859813084,55,group A,high school,standard,none,73,73
18.19178082191781,60.19178082191781,42,group C,some college,free/reduced,completed,66,69
18.16083916083916,54.83916083916084,73,group B,bachelor's degree,free/reduced,none,56,57
17.791489361702126,62.791489361702126,45,group C,some college,free/reduced,completed,73,70
17.663551401869157,73.33644859813084,91,group E,associate's degree,free/reduced,completed,73,80
17.596491228070178,64.40350877192982,82,group E,bachelor's degree,standard,none,62,62
17.42758620689655,62.42758620689655,45,group A,bachelor's degree,standard,none,59,64
17.333333333333336,52.333333333333336,35,group C,high school,free/reduced,none,61,54

Unnamed: 0,Latest
Model Rank,> 0
r2_score,0.605647
max_error,21.3364
mean_absolute_error,7.14737
mean_absolute_percentage_error,0.107552
mean_squared_error,80.0097
root_mean_squared_error,8.94481
Run Time,2024-09-22 18:24:30
Run ID,465fd54ff099404c9364b62d2c69ac46


In [16]:
training_data = r.get_artifact("training_data")
training_data.describe()

Unnamed: 0,math_score,reading_score,writing_score
count,803.0,803.0,803.0
mean,65.752179,68.941469,67.933998
std,14.942019,14.503085,15.064926
min,18.0,23.0,15.0
25%,56.5,59.0,57.0
50%,66.0,70.0,69.0
75%,76.0,79.0,78.5
max,100.0,100.0,100.0


In [17]:
trained_model = r.get_artifact("model")
print(trained_model)

mlflow.pyfunc.loaded_model:
  artifact_path: train/model
  flavor: mlflow.pyfunc.model
  run_id: 465fd54ff099404c9364b62d2c69ac46



# CatBoost

This notebook runs the MLflow Regression Recipe on Databricks and inspects its results. For more information about the MLflow Regression Recipe, including usage examples, see the [Regression Recipe overview documentation](https://mlflow.org/docs/latest/recipes.html#regression-recipe) the [Regression Recipe API documentation](https://mlflow.org/docs/latest/python_api/mlflow.recipes.html#module-mlflow.recipes.regression.v1.recipe).

In [18]:
from mlflow.recipes import Recipe

r = Recipe(profile="catb")


2024/09/22 18:26:25 INFO mlflow.recipes.recipe: Creating MLflow Recipe 'student_performance' with profile: 'catb'


In [19]:
r.clean()

In [20]:
r.inspect()

In [21]:
r.run("ingest")

2024/09/22 18:26:29 INFO mlflow.recipes.step: Running step ingest...

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading artifacts: 100%|##########| 1/1 [00:00<?, ?it/s]
Downloading artifacts: 100%|##########| 1/1 [00:00<?, ?it/s]


name,type
race_ethnicity,string
parental_level_of_education,string
lunch,string
test_preparation_course,string
math_score,integer
reading_score,integer
writing_score,integer

race_ethnicity,parental_level_of_education,lunch,test_preparation_course,math_score,reading_score,writing_score
group B,bachelor's degree,standard,none,72,72,74
group C,some college,standard,completed,69,90,88
group B,master's degree,standard,none,90,95,93
group A,associate's degree,free/reduced,none,47,57,44
group C,some college,standard,none,76,78,75


In [22]:
r.run("split")

2024/09/22 18:26:32 INFO mlflow.recipes.utils.execution: ingest: No changes. Skipping.


"Run MLflow Recipe step: split"
2024/09/22 18:26:35 INFO mlflow.recipes.step: Running step split...
  return bound(*args, **kwds)


In [23]:
r.run("transform")

2024/09/22 18:26:55 INFO mlflow.recipes.utils.execution: ingest, split: No changes. Skipping.


"Run MLflow Recipe step: transform"
2024/09/22 18:26:59 INFO mlflow.recipes.step: Running step transform...


Name,Type
race_ethnicity,object
parental_level_of_education,object
lunch,object
test_preparation_course,object
math_score,int64
reading_score,int64
writing_score,int64

Name,Type
num_pipeline__reading_score,float64
num_pipeline__writing_score,float64
cat_pipeline__race_ethnicity_group A,float64
cat_pipeline__race_ethnicity_group B,float64
cat_pipeline__race_ethnicity_group C,float64
cat_pipeline__race_ethnicity_group D,float64
cat_pipeline__race_ethnicity_group E,float64
cat_pipeline__parental_level_of_education_associate's degree,float64
cat_pipeline__parental_level_of_education_bachelor's degree,float64
cat_pipeline__parental_level_of_education_high school,float64

num_pipeline__reading_score,num_pipeline__writing_score,cat_pipeline__race_ethnicity_group A,cat_pipeline__race_ethnicity_group B,cat_pipeline__race_ethnicity_group C,cat_pipeline__race_ethnicity_group D,cat_pipeline__race_ethnicity_group E,cat_pipeline__parental_level_of_education_associate's degree,cat_pipeline__parental_level_of_education_bachelor's degree,cat_pipeline__parental_level_of_education_high school,cat_pipeline__parental_level_of_education_master's degree,cat_pipeline__parental_level_of_education_some college,cat_pipeline__parental_level_of_education_some high school,cat_pipeline__lunch_free/reduced,cat_pipeline__lunch_standard,cat_pipeline__test_preparation_course_completed,cat_pipeline__test_preparation_course_none,math_score
6.554413158143052,6.177127029134177,0.0,2.5657440185169285,0.0,0.0,0.0,0.0,0.0,0.0,4.100355739219689,0.0,0.0,0.0,2.0915714079217245,0.0,2.0882729220707734,90
5.381518171949033,4.981554055753368,0.0,0.0,2.124650912267445,0.0,0.0,0.0,0.0,0.0,0.0,2.3885484287466188,0.0,0.0,2.0915714079217245,0.0,2.0882729220707734,76
5.726487285535509,5.180816217983503,0.0,2.5657440185169285,0.0,0.0,0.0,2.432328446512315,0.0,0.0,0.0,0.0,0.0,0.0,2.0915714079217245,0.0,2.0882729220707734,71
2.9667343768436973,2.590408108991752,0.0,2.5657440185169285,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.3885484287466188,0.0,2.0915714079217294,0.0,0.0,2.0882729220707734,40
4.4156046539068985,4.450188289806342,0.0,0.0,0.0,2.28616326831259,0.0,0.0,0.0,2.521444771803698,0.0,0.0,0.0,2.0915714079217294,0.0,2.088272922070775,0.0,64


In [24]:
r.run("train")

2024/09/22 18:27:03 INFO mlflow.recipes.utils.execution: ingest, split, transform: No changes. Skipping.


"Run MLflow Recipe step: train"
2024/09/22 18:27:06 INFO mlflow.recipes.step: Running step train...
2024/09/22 18:27:11 INFO mlflow.tracking._tracking_service.client: \U0001f3c3 View run selective-grouse-132 at: http://127.0.0.1:8080/#/experiments/813857939957099372/runs/642e5062535f4898a2190bb01a7378eb.
2024/09/22 18:27:11 INFO mlflow.tracking._tracking_service.client: \U0001f9ea View experiment at: http://127.0.0.1:8080/#/experiments/813857939957099372.
Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "C:\Users\Dell\PycharmProjects\nycopendata\venv\lib\site-packages\mlflow\recipes\step.py", line 132, in run
    self.step_card = self._run(output_directory=output_directory)
  File "C:\Users\Dell\PycharmProjects\nycopendata\venv\lib\site-packages\mlflow\recipes\steps\train.py", line 369, in _run
    estimator = self._resolve_estimator(
  File "C:\Users\Dell\PycharmProjects\nycopendata\venv\lib\site-packages\mlflow\recipes\steps\train.py", line 698, in _re

MlflowException: Failed to run step 'train' of recipe 'student_performance':
The following error occurred while running step 'Step:train':
Traceback (most recent call last):
  File "C:\Users\Dell\PycharmProjects\nycopendata\venv\lib\site-packages\mlflow\recipes\step.py", line 132, in run
    self.step_card = self._run(output_directory=output_directory)
  File "C:\Users\Dell\PycharmProjects\nycopendata\venv\lib\site-packages\mlflow\recipes\steps\train.py", line 369, in _run
    estimator = self._resolve_estimator(
  File "C:\Users\Dell\PycharmProjects\nycopendata\venv\lib\site-packages\mlflow\recipes\steps\train.py", line 698, in _resolve_estimator
    return self._get_user_defined_estimator(
  File "C:\Users\Dell\PycharmProjects\nycopendata\venv\lib\site-packages\mlflow\recipes\steps\train.py", line 667, in _get_user_defined_estimator
    estimator = estimator_fn()
  File "C:\Users\Dell\PycharmProjects\nycopendata\student_performance\steps\train.py", line 91, in estimator_catb_fn
    from catboost import CatBoostRegressor
  File "C:\Users\Dell\PycharmProjects\nycopendata\venv\lib\site-packages\catboost\__init__.py", line 1, in <module>
    from .core import (
  File "C:\Users\Dell\PycharmProjects\nycopendata\venv\lib\site-packages\catboost\core.py", line 45, in <module>
    from .plot_helpers import save_plot_file, try_plot_offline, OfflineMetricVisualizer
  File "C:\Users\Dell\PycharmProjects\nycopendata\venv\lib\site-packages\catboost\plot_helpers.py", line 5, in <module>
    from . import _catboost
  File "_catboost.pyx", line 1, in init _catboost
ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject

Last step status: 'StepStatus.FAILED'


In [None]:
r.run("evaluate")

In [None]:
r.run("register")

In [None]:
r.inspect("train")

In [None]:
training_data = r.get_artifact("training_data")
training_data.describe()

In [None]:
trained_model = r.get_artifact("model")
print(trained_model)

# Decision Tree

This notebook runs the MLflow Regression Recipe on Databricks and inspects its results. For more information about the MLflow Regression Recipe, including usage examples, see the [Regression Recipe overview documentation](https://mlflow.org/docs/latest/recipes.html#regression-recipe) the [Regression Recipe API documentation](https://mlflow.org/docs/latest/python_api/mlflow.recipes.html#module-mlflow.recipes.regression.v1.recipe).

In [25]:
from mlflow.recipes import Recipe

r = Recipe(profile="decision_tree")


2024/09/22 18:28:02 INFO mlflow.recipes.recipe: Creating MLflow Recipe 'student_performance' with profile: 'decision_tree'


In [26]:
r.clean()

In [27]:
r.inspect()

In [28]:
r.run("ingest")

2024/09/22 18:28:07 INFO mlflow.recipes.step: Running step ingest...

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading artifacts: 100%|##########| 1/1 [00:00<?, ?it/s]
Downloading artifacts: 100%|##########| 1/1 [00:00<?, ?it/s]


name,type
race_ethnicity,string
parental_level_of_education,string
lunch,string
test_preparation_course,string
math_score,integer
reading_score,integer
writing_score,integer

race_ethnicity,parental_level_of_education,lunch,test_preparation_course,math_score,reading_score,writing_score
group B,bachelor's degree,standard,none,72,72,74
group C,some college,standard,completed,69,90,88
group B,master's degree,standard,none,90,95,93
group A,associate's degree,free/reduced,none,47,57,44
group C,some college,standard,none,76,78,75


In [29]:
r.run("split")

2024/09/22 18:28:10 INFO mlflow.recipes.utils.execution: ingest: No changes. Skipping.


"Run MLflow Recipe step: split"
2024/09/22 18:28:14 INFO mlflow.recipes.step: Running step split...
  return bound(*args, **kwds)


In [30]:
r.run("transform")

2024/09/22 18:28:30 INFO mlflow.recipes.utils.execution: ingest, split: No changes. Skipping.


"Run MLflow Recipe step: transform"
2024/09/22 18:28:34 INFO mlflow.recipes.step: Running step transform...


Name,Type
race_ethnicity,object
parental_level_of_education,object
lunch,object
test_preparation_course,object
math_score,int64
reading_score,int64
writing_score,int64

Name,Type
num_pipeline__reading_score,float64
num_pipeline__writing_score,float64
cat_pipeline__race_ethnicity_group A,float64
cat_pipeline__race_ethnicity_group B,float64
cat_pipeline__race_ethnicity_group C,float64
cat_pipeline__race_ethnicity_group D,float64
cat_pipeline__race_ethnicity_group E,float64
cat_pipeline__parental_level_of_education_associate's degree,float64
cat_pipeline__parental_level_of_education_bachelor's degree,float64
cat_pipeline__parental_level_of_education_high school,float64

num_pipeline__reading_score,num_pipeline__writing_score,cat_pipeline__race_ethnicity_group A,cat_pipeline__race_ethnicity_group B,cat_pipeline__race_ethnicity_group C,cat_pipeline__race_ethnicity_group D,cat_pipeline__race_ethnicity_group E,cat_pipeline__parental_level_of_education_associate's degree,cat_pipeline__parental_level_of_education_bachelor's degree,cat_pipeline__parental_level_of_education_high school,cat_pipeline__parental_level_of_education_master's degree,cat_pipeline__parental_level_of_education_some college,cat_pipeline__parental_level_of_education_some high school,cat_pipeline__lunch_free/reduced,cat_pipeline__lunch_standard,cat_pipeline__test_preparation_course_completed,cat_pipeline__test_preparation_course_none,math_score
6.554413158143052,6.177127029134177,0.0,2.5657440185169285,0.0,0.0,0.0,0.0,0.0,0.0,4.100355739219689,0.0,0.0,0.0,2.0915714079217245,0.0,2.0882729220707734,90
5.381518171949033,4.981554055753368,0.0,0.0,2.124650912267445,0.0,0.0,0.0,0.0,0.0,0.0,2.3885484287466188,0.0,0.0,2.0915714079217245,0.0,2.0882729220707734,76
5.726487285535509,5.180816217983503,0.0,2.5657440185169285,0.0,0.0,0.0,2.432328446512315,0.0,0.0,0.0,0.0,0.0,0.0,2.0915714079217245,0.0,2.0882729220707734,71
2.9667343768436973,2.590408108991752,0.0,2.5657440185169285,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.3885484287466188,0.0,2.0915714079217294,0.0,0.0,2.0882729220707734,40
4.4156046539068985,4.450188289806342,0.0,0.0,0.0,2.28616326831259,0.0,0.0,0.0,2.521444771803698,0.0,0.0,0.0,2.0915714079217294,0.0,2.088272922070775,0.0,64


In [31]:
r.run("train")

2024/09/22 18:28:38 INFO mlflow.recipes.utils.execution: ingest, split, transform: No changes. Skipping.


"Run MLflow Recipe step: train"
2024/09/22 18:28:42 INFO mlflow.recipes.step: Running step train...

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]
Downloading artifacts:  20%|##        | 1/5 [00:00<00:00, 1001.27it/s]
Downloading artifacts:  40%|####      | 2/5 [00:00<00:00, 2002.53it/s]
Downloading artifacts:  60%|######    | 3/5 [00:00<00:00, 3003.80it/s]
Downloading artifacts:  80%|########  | 4/5 [00:00<00:00, 4005.06it/s]
Downloading artifacts: 100%|##########| 5/5 [00:00<00:00, 5006.33it/s]
Downloading artifacts: 100%|##########| 5/5 [00:00<00:00, 5006.33it/s]

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]
Downloading artifacts:  20%|##        | 1/5 [00:00<?, ?it/s]
Downloading artifacts:  40%|####      | 2/5 [00:00<?, ?it/s]
Downloading artifacts:  60%|######    | 3/5 [00:00<?, ?it/s]
Downloading artifacts:  80%|########  | 4/5 [00:00<?, ?it/s]
Downloading artifacts: 100%|##########| 5/5 [00:00<00:00, 5005.14it/s]
Downloading artifacts: 100%|#######

Metric,training,validation
r2_score,0.993899,0.33823
r2_score,0.993899,0.33823
example_count,803.0,113.0
max_error,17.0,28.0
mean_absolute_error,0.13325,9.73451
mean_absolute_percentage_error,0.00195701,0.145636
mean_on_target,65.7522,69.6283
mean_squared_error,1.36052,134.265
root_mean_squared_error,1.16641,11.5873
score,0.993899,0.33823

Name,Type
race_ethnicity,string
parental_level_of_education,string
lunch,string
test_preparation_course,string
reading_score,long
writing_score,long

Name,Type
-,"Tensor('float64', (-1,))"

absolute_error,prediction,math_score,race_ethnicity,parental_level_of_education,lunch,test_preparation_course,reading_score,writing_score
17.0,63.0,80,group C,some college,free/reduced,none,64,66
17.0,63.0,46,group C,some college,free/reduced,none,64,66
9.5,78.5,88,group D,some college,standard,none,77,77
9.5,78.5,69,group D,some college,standard,none,77,77
8.5,70.5,62,group B,some college,standard,none,67,67
8.5,70.5,79,group B,some college,standard,none,67,67
7.0,88.0,95,group D,master's degree,standard,none,81,84
7.0,88.0,81,group D,master's degree,standard,none,81,84
5.5,67.5,73,group D,master's degree,standard,none,70,75
5.5,67.5,62,group D,master's degree,standard,none,70,75

Unnamed: 0,Latest
Model Rank,> 0
r2_score,0.33823
max_error,28
mean_absolute_error,9.73451
mean_absolute_percentage_error,0.145636
mean_squared_error,134.265
root_mean_squared_error,11.5873
Run Time,2024-09-22 18:28:45
Run ID,fa29f9be1bc3475ba5c8dd12d9cced00


In [32]:
r.run("evaluate")

2024/09/22 18:30:23 INFO mlflow.recipes.utils.execution: ingest, split, transform, train: No changes. Skipping.


"Run MLflow Recipe step: evaluate"
2024/09/22 18:30:27 INFO mlflow.recipes.step: Running step evaluate...
2024/09/22 18:30:38 INFO mlflow.models.evaluation.default_evaluator: Computing model predictions.
2024/09/22 18:30:38 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2024/09/22 18:30:40 INFO mlflow.models.evaluation.default_evaluator: Computing model predictions.
2024/09/22 18:30:40 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2024/09/22 18:30:41 INFO mlflow.tracking._tracking_service.client: \U0001f3c3 View run useful-zebra-288 at: http://127.0.0.1:8080/#/experiments/417741253264815420/runs/fa29f9be1bc3475ba5c8dd12d9cced00.
2024/09/22 18:30:41 INFO mlflow.tracking._tracking_service.client: \U0001f9ea View experiment at: http://127.0.0.1:8080/#/experiments/417741253264815420.
Index(['prediction', 'target'], dtype='object')
Index(['prediction', 'target'], dtype='object')
Index(['prediction', 'target'], dtype='objec

Metric,validation,test
r2_score,0.33823,0.525728
r2_score,0.33823,0.525728
example_count,113.0,84.0
max_error,28.0,34.0
mean_absolute_error,9.73451,9.535714
mean_absolute_percentage_error,0.145636,1608428438346605.5
mean_on_target,69.6283,64.547619
mean_squared_error,134.265,146.791667
root_mean_squared_error,11.5873,12.115761
score,0.33823,0.525728

metric,greater_is_better,value,threshold,validated
r2_score,True,0.525728,0.7,❌


In [33]:
r.run("register")

2024/09/22 18:30:43 INFO mlflow.recipes.utils.execution: ingest, split, transform, train, evaluate: No changes. Skipping.


"Run MLflow Recipe step: register"
2024/09/22 18:30:46 INFO mlflow.recipes.step: Running step register...
Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "C:\Users\Dell\PycharmProjects\nycopendata\venv\lib\site-packages\mlflow\recipes\step.py", line 132, in run
    self.step_card = self._run(output_directory=output_directory)
  File "C:\Users\Dell\PycharmProjects\nycopendata\venv\lib\site-packages\mlflow\recipes\steps\register.py", line 93, in _run
    raise MlflowException(
mlflow.exceptions.MlflowException: Model registration on runs:/fa29f9be1bc3475ba5c8dd12d9cced00/train/model failed because it is not validated. Bypass by setting allow_non_validated_model to True. 
make: *** [Makefile:58: steps/register/outputs/registered_model_version.json] Error 1


MlflowException: Failed to run step 'register' of recipe 'student_performance':
The following error occurred while running step 'Step:register':
Traceback (most recent call last):
  File "C:\Users\Dell\PycharmProjects\nycopendata\venv\lib\site-packages\mlflow\recipes\step.py", line 132, in run
    self.step_card = self._run(output_directory=output_directory)
  File "C:\Users\Dell\PycharmProjects\nycopendata\venv\lib\site-packages\mlflow\recipes\steps\register.py", line 93, in _run
    raise MlflowException(
mlflow.exceptions.MlflowException: Model registration on runs:/fa29f9be1bc3475ba5c8dd12d9cced00/train/model failed because it is not validated. Bypass by setting allow_non_validated_model to True. 

Last step status: 'StepStatus.FAILED'


In [None]:
r.inspect("train")

In [None]:
training_data = r.get_artifact("training_data")
training_data.describe()

In [None]:
trained_model = r.get_artifact("model")
print(trained_model)

# Gradient Boost

This notebook runs the MLflow Regression Recipe on Databricks and inspects its results. For more information about the MLflow Regression Recipe, including usage examples, see the [Regression Recipe overview documentation](https://mlflow.org/docs/latest/recipes.html#regression-recipe) the [Regression Recipe API documentation](https://mlflow.org/docs/latest/python_api/mlflow.recipes.html#module-mlflow.recipes.regression.v1.recipe).

In [34]:
from mlflow.recipes import Recipe

r = Recipe(profile="gb")


2024/09/22 18:34:06 INFO mlflow.recipes.recipe: Creating MLflow Recipe 'student_performance' with profile: 'gb'


In [35]:
r.clean()

In [36]:
r.inspect()

In [37]:
r.run("ingest")

2024/09/22 18:34:11 INFO mlflow.recipes.step: Running step ingest...

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading artifacts: 100%|##########| 1/1 [00:00<?, ?it/s]
Downloading artifacts: 100%|##########| 1/1 [00:00<?, ?it/s]


name,type
race_ethnicity,string
parental_level_of_education,string
lunch,string
test_preparation_course,string
math_score,integer
reading_score,integer
writing_score,integer

race_ethnicity,parental_level_of_education,lunch,test_preparation_course,math_score,reading_score,writing_score
group B,bachelor's degree,standard,none,72,72,74
group C,some college,standard,completed,69,90,88
group B,master's degree,standard,none,90,95,93
group A,associate's degree,free/reduced,none,47,57,44
group C,some college,standard,none,76,78,75


In [38]:
r.run("split")

2024/09/22 18:34:14 INFO mlflow.recipes.utils.execution: ingest: No changes. Skipping.


"Run MLflow Recipe step: split"
2024/09/22 18:34:17 INFO mlflow.recipes.step: Running step split...
  return bound(*args, **kwds)


In [39]:
r.run("transform")

2024/09/22 18:34:35 INFO mlflow.recipes.utils.execution: ingest, split: No changes. Skipping.


"Run MLflow Recipe step: transform"
2024/09/22 18:34:38 INFO mlflow.recipes.step: Running step transform...


Name,Type
race_ethnicity,object
parental_level_of_education,object
lunch,object
test_preparation_course,object
math_score,int64
reading_score,int64
writing_score,int64

Name,Type
num_pipeline__reading_score,float64
num_pipeline__writing_score,float64
cat_pipeline__race_ethnicity_group A,float64
cat_pipeline__race_ethnicity_group B,float64
cat_pipeline__race_ethnicity_group C,float64
cat_pipeline__race_ethnicity_group D,float64
cat_pipeline__race_ethnicity_group E,float64
cat_pipeline__parental_level_of_education_associate's degree,float64
cat_pipeline__parental_level_of_education_bachelor's degree,float64
cat_pipeline__parental_level_of_education_high school,float64

num_pipeline__reading_score,num_pipeline__writing_score,cat_pipeline__race_ethnicity_group A,cat_pipeline__race_ethnicity_group B,cat_pipeline__race_ethnicity_group C,cat_pipeline__race_ethnicity_group D,cat_pipeline__race_ethnicity_group E,cat_pipeline__parental_level_of_education_associate's degree,cat_pipeline__parental_level_of_education_bachelor's degree,cat_pipeline__parental_level_of_education_high school,cat_pipeline__parental_level_of_education_master's degree,cat_pipeline__parental_level_of_education_some college,cat_pipeline__parental_level_of_education_some high school,cat_pipeline__lunch_free/reduced,cat_pipeline__lunch_standard,cat_pipeline__test_preparation_course_completed,cat_pipeline__test_preparation_course_none,math_score
6.554413158143052,6.177127029134177,0.0,2.5657440185169285,0.0,0.0,0.0,0.0,0.0,0.0,4.100355739219689,0.0,0.0,0.0,2.0915714079217245,0.0,2.0882729220707734,90
5.381518171949033,4.981554055753368,0.0,0.0,2.124650912267445,0.0,0.0,0.0,0.0,0.0,0.0,2.3885484287466188,0.0,0.0,2.0915714079217245,0.0,2.0882729220707734,76
5.726487285535509,5.180816217983503,0.0,2.5657440185169285,0.0,0.0,0.0,2.432328446512315,0.0,0.0,0.0,0.0,0.0,0.0,2.0915714079217245,0.0,2.0882729220707734,71
2.9667343768436973,2.590408108991752,0.0,2.5657440185169285,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.3885484287466188,0.0,2.0915714079217294,0.0,0.0,2.0882729220707734,40
4.4156046539068985,4.450188289806342,0.0,0.0,0.0,2.28616326831259,0.0,0.0,0.0,2.521444771803698,0.0,0.0,0.0,2.0915714079217294,0.0,2.088272922070775,0.0,64


In [40]:
r.run("train")

2024/09/22 18:34:42 INFO mlflow.recipes.utils.execution: ingest, split, transform: No changes. Skipping.


"Run MLflow Recipe step: train"
2024/09/22 18:34:45 INFO mlflow.recipes.step: Running step train...

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]
Downloading artifacts:  20%|##        | 1/5 [00:00<?, ?it/s]
Downloading artifacts:  40%|####      | 2/5 [00:00<?, ?it/s]
Downloading artifacts:  60%|######    | 3/5 [00:00<00:00, 3000.93it/s]
Downloading artifacts:  80%|########  | 4/5 [00:00<00:00, 4001.24it/s]
Downloading artifacts: 100%|##########| 5/5 [00:00<00:00, 5001.55it/s]
Downloading artifacts: 100%|##########| 5/5 [00:00<00:00, 2499.59it/s]

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]
Downloading artifacts:  20%|##        | 1/5 [00:00<?, ?it/s]
Downloading artifacts:  40%|####      | 2/5 [00:00<?, ?it/s]
Downloading artifacts:  60%|######    | 3/5 [00:00<?, ?it/s]
Downloading artifacts:  80%|########  | 4/5 [00:00<?, ?it/s]
Downloading artifacts: 100%|##########| 5/5 [00:00<00:00, 1667.45it/s]
Downloading artifacts: 100%|##########| 5/5 [00:00<00:0

Metric,training,validation
r2_score,0.788805,0.625458
r2_score,0.788805,0.625458
example_count,803.0,113.0
max_error,21.1652,19.8283
mean_absolute_error,5.4971,7.12552
mean_absolute_percentage_error,0.0894693,0.107286
mean_on_target,65.7522,69.6283
mean_squared_error,47.0936,75.9903
root_mean_squared_error,6.86248,8.71724
score,0.788805,0.625458

Name,Type
race_ethnicity,string
parental_level_of_education,string
lunch,string
test_preparation_course,string
reading_score,long
writing_score,long

Name,Type
-,"Tensor('float64', (-1,))"

absolute_error,prediction,math_score,race_ethnicity,parental_level_of_education,lunch,test_preparation_course,reading_score,writing_score
21.165189613801974,58.83481038619802,80,group C,some college,free/reduced,none,64,66
19.2969752720164,58.2969752720164,39,group C,associate's degree,standard,none,64,57
19.07502036583351,71.92497963416649,91,group C,some college,standard,none,74,76
18.78884473454576,54.21115526545424,73,group B,bachelor's degree,free/reduced,none,56,57
18.405846036090395,71.5941539639096,90,group C,high school,standard,none,75,69
18.15911438913037,66.15911438913037,48,group A,some high school,standard,none,66,65
18.05419621053917,79.05419621053917,61,group B,associate's degree,standard,completed,86,87
17.94071919154851,69.05928080845149,87,group C,associate's degree,free/reduced,none,73,72
17.85828491471935,70.14171508528065,88,group B,bachelor's degree,free/reduced,none,75,76
17.4897978009121,59.4897978009121,42,group C,some college,free/reduced,completed,66,69

Unnamed: 0,Latest
Model Rank,> 0
r2_score,0.625458
max_error,19.8283
mean_absolute_error,7.12552
mean_absolute_percentage_error,0.107286
mean_squared_error,75.9903
root_mean_squared_error,8.71724
Run Time,2024-09-22 18:34:49
Run ID,e3b2e7ab972d425290b5061944c667bc


In [41]:
r.run("evaluate")

2024/09/22 18:36:06 INFO mlflow.recipes.utils.execution: ingest, split, transform, train: No changes. Skipping.


"Run MLflow Recipe step: evaluate"
2024/09/22 18:36:09 INFO mlflow.recipes.step: Running step evaluate...
2024/09/22 18:36:20 INFO mlflow.models.evaluation.default_evaluator: Computing model predictions.
2024/09/22 18:36:20 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2024/09/22 18:36:23 INFO mlflow.models.evaluation.default_evaluator: Computing model predictions.
2024/09/22 18:36:23 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2024/09/22 18:36:24 INFO mlflow.tracking._tracking_service.client: \U0001f3c3 View run amusing-gull-924 at: http://127.0.0.1:8080/#/experiments/804360702375255715/runs/e3b2e7ab972d425290b5061944c667bc.
2024/09/22 18:36:24 INFO mlflow.tracking._tracking_service.client: \U0001f9ea View experiment at: http://127.0.0.1:8080/#/experiments/804360702375255715.
Index(['prediction', 'target'], dtype='object')
Index(['prediction', 'target'], dtype='object')
Index(['prediction', 'target'], dtype='objec

Metric,validation,test
r2_score,0.625458,0.77334
r2_score,0.625458,0.77334
example_count,113.0,84.0
max_error,19.8283,28.093779
mean_absolute_error,7.12552,6.511462
mean_absolute_percentage_error,0.107286,1506227758963761.5
mean_on_target,69.6283,64.547619
mean_squared_error,75.9903,70.153578
root_mean_squared_error,8.71724,8.375773
score,0.625458,0.77334

metric,greater_is_better,value,threshold,validated
r2_score,True,0.77334,0.7,✅


In [42]:
r.run("register")

2024/09/22 18:36:25 INFO mlflow.recipes.utils.execution: ingest, split, transform, train, evaluate: No changes. Skipping.


"Run MLflow Recipe step: register"
2024/09/22 18:36:29 INFO mlflow.recipes.step: Running step register...
Registered model 'stud_perf_regressor' already exists. Creating a new version of this model...
2024/09/22 18:36:30 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: stud_perf_regressor, version 3
Created version '3' of model 'stud_perf_regressor'.


In [43]:
r.inspect("train")

Metric,training,validation
r2_score,0.788805,0.625458
r2_score,0.788805,0.625458
example_count,803.0,113.0
max_error,21.1652,19.8283
mean_absolute_error,5.4971,7.12552
mean_absolute_percentage_error,0.0894693,0.107286
mean_on_target,65.7522,69.6283
mean_squared_error,47.0936,75.9903
root_mean_squared_error,6.86248,8.71724
score,0.788805,0.625458

Name,Type
race_ethnicity,string
parental_level_of_education,string
lunch,string
test_preparation_course,string
reading_score,long
writing_score,long

Name,Type
-,"Tensor('float64', (-1,))"

absolute_error,prediction,math_score,race_ethnicity,parental_level_of_education,lunch,test_preparation_course,reading_score,writing_score
21.165189613801974,58.83481038619802,80,group C,some college,free/reduced,none,64,66
19.2969752720164,58.2969752720164,39,group C,associate's degree,standard,none,64,57
19.07502036583351,71.92497963416649,91,group C,some college,standard,none,74,76
18.78884473454576,54.21115526545424,73,group B,bachelor's degree,free/reduced,none,56,57
18.405846036090395,71.5941539639096,90,group C,high school,standard,none,75,69
18.15911438913037,66.15911438913037,48,group A,some high school,standard,none,66,65
18.05419621053917,79.05419621053917,61,group B,associate's degree,standard,completed,86,87
17.94071919154851,69.05928080845149,87,group C,associate's degree,free/reduced,none,73,72
17.85828491471935,70.14171508528065,88,group B,bachelor's degree,free/reduced,none,75,76
17.4897978009121,59.4897978009121,42,group C,some college,free/reduced,completed,66,69

Unnamed: 0,Latest
Model Rank,> 0
r2_score,0.625458
max_error,19.8283
mean_absolute_error,7.12552
mean_absolute_percentage_error,0.107286
mean_squared_error,75.9903
root_mean_squared_error,8.71724
Run Time,2024-09-22 18:34:49
Run ID,e3b2e7ab972d425290b5061944c667bc


In [44]:
training_data = r.get_artifact("training_data")
training_data.describe()

Unnamed: 0,math_score,reading_score,writing_score
count,803.0,803.0,803.0
mean,65.752179,68.941469,67.933998
std,14.942019,14.503085,15.064926
min,18.0,23.0,15.0
25%,56.5,59.0,57.0
50%,66.0,70.0,69.0
75%,76.0,79.0,78.5
max,100.0,100.0,100.0


In [45]:
trained_model = r.get_artifact("model")
print(trained_model)

mlflow.pyfunc.loaded_model:
  artifact_path: train/model
  flavor: mlflow.pyfunc.model
  run_id: e3b2e7ab972d425290b5061944c667bc



# Linear Regression

This notebook runs the MLflow Regression Recipe on Databricks and inspects its results. For more information about the MLflow Regression Recipe, including usage examples, see the [Regression Recipe overview documentation](https://mlflow.org/docs/latest/recipes.html#regression-recipe) the [Regression Recipe API documentation](https://mlflow.org/docs/latest/python_api/mlflow.recipes.html#module-mlflow.recipes.regression.v1.recipe).

In [58]:
from mlflow.recipes import Recipe

r = Recipe(profile="linear_regression")


2024/09/22 19:29:42 INFO mlflow.recipes.recipe: Creating MLflow Recipe 'student_performance' with profile: 'linear_regression'


In [59]:
r.clean()

In [60]:
r.inspect()

In [61]:
r.run("ingest")

2024/09/22 19:29:48 INFO mlflow.recipes.step: Running step ingest...

Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading artifacts: 100%|##########| 1/1 [00:00<?, ?it/s]
Downloading artifacts: 100%|##########| 1/1 [00:00<00:00, 998.64it/s]


name,type
gender,string
race_ethnicity,string
parental_level_of_education,string
lunch,string
test_preparation_course,string
math_score,integer
reading_score,integer
writing_score,integer

gender,race_ethnicity,parental_level_of_education,lunch,test_preparation_course,math_score,reading_score,writing_score
female,group B,bachelor's degree,standard,none,72,72,74
female,group C,some college,standard,completed,69,90,88
female,group B,master's degree,standard,none,90,95,93
male,group A,associate's degree,free/reduced,none,47,57,44
male,group C,some college,standard,none,76,78,75


In [62]:
r.run("split")

2024/09/22 19:29:50 INFO mlflow.recipes.utils.execution: ingest: No changes. Skipping.


"Run MLflow Recipe step: split"
2024/09/22 19:29:54 INFO mlflow.recipes.step: Running step split...
  return bound(*args, **kwds)


In [63]:
r.run("transform")

2024/09/22 19:30:12 INFO mlflow.recipes.utils.execution: ingest, split: No changes. Skipping.


"Run MLflow Recipe step: transform"
2024/09/22 19:30:15 INFO mlflow.recipes.step: Running step transform...


Name,Type
gender,object
race_ethnicity,object
parental_level_of_education,object
lunch,object
test_preparation_course,object
math_score,int64
reading_score,int64
writing_score,int64

Name,Type
num_pipeline__reading_score,float64
num_pipeline__writing_score,float64
cat_pipeline__race_ethnicity_group A,float64
cat_pipeline__race_ethnicity_group B,float64
cat_pipeline__race_ethnicity_group C,float64
cat_pipeline__race_ethnicity_group D,float64
cat_pipeline__race_ethnicity_group E,float64
cat_pipeline__parental_level_of_education_associate's degree,float64
cat_pipeline__parental_level_of_education_bachelor's degree,float64
cat_pipeline__parental_level_of_education_high school,float64

num_pipeline__reading_score,num_pipeline__writing_score,cat_pipeline__race_ethnicity_group A,cat_pipeline__race_ethnicity_group B,cat_pipeline__race_ethnicity_group C,cat_pipeline__race_ethnicity_group D,cat_pipeline__race_ethnicity_group E,cat_pipeline__parental_level_of_education_associate's degree,cat_pipeline__parental_level_of_education_bachelor's degree,cat_pipeline__parental_level_of_education_high school,cat_pipeline__parental_level_of_education_master's degree,cat_pipeline__parental_level_of_education_some college,cat_pipeline__parental_level_of_education_some high school,cat_pipeline__lunch_free/reduced,cat_pipeline__lunch_standard,cat_pipeline__test_preparation_course_completed,cat_pipeline__test_preparation_course_none,math_score
4.877402650988941,4.833118943827281,0.0,2.4893743678204605,0.0,0.0,0.0,0.0,3.140484624729267,0.0,0.0,0.0,0.0,0.0,2.080085818491918,0.0,2.088181635044545,72
3.861277098699578,2.873746399032437,3.488513040610848,0.0,0.0,0.0,0.0,2.394017201314638,0.0,0.0,0.0,0.0,0.0,2.0800858184919244,0.0,0.0,2.088181635044545,47
6.435461831165964,6.008742470704187,0.0,2.4893743678204605,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.4136220081486823,0.0,0.0,2.080085818491918,2.0881816350445512,0.0,88
2.912893249896173,2.547184308233297,0.0,2.4893743678204605,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.4136220081486823,0.0,2.0800858184919244,0.0,0.0,2.088181635044545,40
4.335469023101281,4.375932016708484,0.0,0.0,0.0,2.263400963400674,0.0,0.0,0.0,2.460964957427185,0.0,0.0,0.0,2.0800858184919244,0.0,2.0881816350445512,0.0,64


In [64]:
r.run("train")

2024/09/22 19:30:19 INFO mlflow.recipes.utils.execution: ingest, split, transform: No changes. Skipping.


"Run MLflow Recipe step: train"
2024/09/22 19:30:23 INFO mlflow.recipes.step: Running step train...

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]
Downloading artifacts:  20%|##        | 1/5 [00:00<?, ?it/s]
Downloading artifacts:  40%|####      | 2/5 [00:00<?, ?it/s]
Downloading artifacts:  60%|######    | 3/5 [00:00<00:00, 3004.52it/s]
Downloading artifacts:  80%|########  | 4/5 [00:00<00:00, 4006.02it/s]
Downloading artifacts: 100%|##########| 5/5 [00:00<00:00, 5007.53it/s]
Downloading artifacts: 100%|##########| 5/5 [00:00<00:00, 5007.53it/s]

Downloading artifacts:   0%|          | 0/5 [00:00<?, ?it/s]
Downloading artifacts:  20%|##        | 1/5 [00:00<?, ?it/s]
Downloading artifacts:  40%|####      | 2/5 [00:00<00:00, 1989.24it/s]
Downloading artifacts:  60%|######    | 3/5 [00:00<00:00, 749.70it/s] 
Downloading artifacts:  80%|########  | 4/5 [00:00<00:00, 799.91it/s]
Downloading artifacts: 100%|##########| 5/5 [00:00<00:00, 833.00it/s]
Downloading artifacts: 100%

Metric,training,validation
r2_score,0.717934,0.667066
r2_score,0.717934,0.667066
example_count,786.0,104.0
max_error,22.875,21.25
mean_absolute_error,6.71295,6.01743
mean_absolute_percentage_error,103136000000000.0,0.0987494
mean_on_target,66.0802,65.8077
mean_squared_error,66.3651,60.3319
root_mean_squared_error,8.14648,7.76736
score,0.717934,0.667066

Name,Type
gender,string
race_ethnicity,string
parental_level_of_education,string
lunch,string
test_preparation_course,string
reading_score,long
writing_score,long

Name,Type
-,"Tensor('float64', (-1,))"

absolute_error,prediction,math_score,gender,race_ethnicity,parental_level_of_education,lunch,test_preparation_course,reading_score,writing_score
22.875,61.875,39,female,group C,associate's degree,standard,none,64,57
21.5625,40.5625,19,female,group B,some college,standard,none,38,32
21.1875,65.8125,87,male,group C,associate's degree,free/reduced,none,73,72
20.8125,67.1875,88,male,group B,bachelor's degree,free/reduced,none,75,76
19.5625,71.4375,91,male,group E,associate's degree,free/reduced,completed,73,80
19.0,80.0,61,female,group B,associate's degree,standard,completed,86,87
18.9375,72.0625,91,male,group C,some college,standard,none,74,76
18.875,71.125,90,male,group C,high school,standard,none,75,69
18.5,58.5,40,female,group D,some high school,free/reduced,completed,65,64
18.4375,75.5625,94,male,group E,high school,standard,none,73,71

Unnamed: 0,Latest
Model Rank,> 0
r2_score,0.667066
max_error,21.25
mean_absolute_error,6.01743
mean_absolute_percentage_error,0.0987494
mean_squared_error,60.3319
root_mean_squared_error,7.76736
Run Time,2024-09-22 19:30:27
Run ID,a12c10ce350746a08b9e0d8c94ca195d


In [65]:
r.run("evaluate")

2024/09/22 19:31:52 INFO mlflow.recipes.utils.execution: ingest, split, transform, train: No changes. Skipping.


"Run MLflow Recipe step: evaluate"
2024/09/22 19:31:55 INFO mlflow.recipes.step: Running step evaluate...
2024/09/22 19:32:05 INFO mlflow.models.evaluation.default_evaluator: Computing model predictions.
2024/09/22 19:32:05 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2024/09/22 19:32:09 INFO mlflow.models.evaluation.default_evaluator: Computing model predictions.
2024/09/22 19:32:09 INFO mlflow.models.evaluation.default_evaluator: Testing metrics on first row...
2024/09/22 19:32:09 INFO mlflow.tracking._tracking_service.client: \U0001f3c3 View run rare-cod-315 at: http://127.0.0.1:8080/#/experiments/287848601967458510/runs/a12c10ce350746a08b9e0d8c94ca195d.
2024/09/22 19:32:09 INFO mlflow.tracking._tracking_service.client: \U0001f9ea View experiment at: http://127.0.0.1:8080/#/experiments/287848601967458510.
Index(['prediction', 'target'], dtype='object')
Index(['prediction', 'target'], dtype='object')
Index(['prediction', 'target'], dtype='object')


Metric,validation,test
r2_score,0.667618,0.746921
r2_score,0.667618,0.746921
example_count,104.0,110.0
max_error,21.25,20.875
mean_absolute_error,6.01022,6.323295
mean_absolute_percentage_error,0.0986193,0.09845
mean_on_target,65.8077,66.418182
mean_squared_error,60.2319,59.567436
root_mean_squared_error,7.76092,7.717994
score,0.667618,0.746921

metric,greater_is_better,value,threshold,validated
r2_score,True,0.746921,0.7,✅


In [66]:
r.run("register")

2024/09/22 19:32:11 INFO mlflow.recipes.utils.execution: ingest, split, transform, train, evaluate: No changes. Skipping.


"Run MLflow Recipe step: register"
2024/09/22 19:32:14 INFO mlflow.recipes.step: Running step register...
Registered model 'stud_perf_regressor' already exists. Creating a new version of this model...
2024/09/22 19:32:15 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: stud_perf_regressor, version 5
Created version '5' of model 'stud_perf_regressor'.


In [67]:
r.inspect("train")

Metric,training,validation
r2_score,0.717934,0.667066
r2_score,0.717934,0.667066
example_count,786.0,104.0
max_error,22.875,21.25
mean_absolute_error,6.71295,6.01743
mean_absolute_percentage_error,103136000000000.0,0.0987494
mean_on_target,66.0802,65.8077
mean_squared_error,66.3651,60.3319
root_mean_squared_error,8.14648,7.76736
score,0.717934,0.667066

Name,Type
gender,string
race_ethnicity,string
parental_level_of_education,string
lunch,string
test_preparation_course,string
reading_score,long
writing_score,long

Name,Type
-,"Tensor('float64', (-1,))"

absolute_error,prediction,math_score,gender,race_ethnicity,parental_level_of_education,lunch,test_preparation_course,reading_score,writing_score
22.875,61.875,39,female,group C,associate's degree,standard,none,64,57
21.5625,40.5625,19,female,group B,some college,standard,none,38,32
21.1875,65.8125,87,male,group C,associate's degree,free/reduced,none,73,72
20.8125,67.1875,88,male,group B,bachelor's degree,free/reduced,none,75,76
19.5625,71.4375,91,male,group E,associate's degree,free/reduced,completed,73,80
19.0,80.0,61,female,group B,associate's degree,standard,completed,86,87
18.9375,72.0625,91,male,group C,some college,standard,none,74,76
18.875,71.125,90,male,group C,high school,standard,none,75,69
18.5,58.5,40,female,group D,some high school,free/reduced,completed,65,64
18.4375,75.5625,94,male,group E,high school,standard,none,73,71

Unnamed: 0,Latest
Model Rank,> 0
r2_score,0.667066
max_error,21.25
mean_absolute_error,6.01743
mean_absolute_percentage_error,0.0987494
mean_squared_error,60.3319
root_mean_squared_error,7.76736
Run Time,2024-09-22 19:30:27
Run ID,a12c10ce350746a08b9e0d8c94ca195d


In [68]:
training_data = r.get_artifact("training_data")
training_data.describe()

Unnamed: 0,math_score,reading_score,writing_score
count,786.0,786.0,786.0
mean,66.080153,69.104326,68.005089
std,15.348684,14.771355,15.320773
min,0.0,17.0,10.0
25%,57.0,59.0,57.0
50%,66.0,70.0,69.0
75%,76.0,80.0,79.0
max,100.0,100.0,100.0


In [69]:
trained_model = r.get_artifact("model")
print(trained_model)

mlflow.pyfunc.loaded_model:
  artifact_path: train/model
  flavor: mlflow.pyfunc.model
  run_id: a12c10ce350746a08b9e0d8c94ca195d



# Random Forest

This notebook runs the MLflow Regression Recipe on Databricks and inspects its results. For more information about the MLflow Regression Recipe, including usage examples, see the [Regression Recipe overview documentation](https://mlflow.org/docs/latest/recipes.html#regression-recipe) the [Regression Recipe API documentation](https://mlflow.org/docs/latest/python_api/mlflow.recipes.html#module-mlflow.recipes.regression.v1.recipe).

In [None]:
from mlflow.recipes import Recipe

r = Recipe(profile="random_forest")


In [None]:
r.clean()

In [None]:
r.inspect()

In [None]:
r.run("ingest")

In [None]:
r.run("split")

In [None]:
r.run("transform")

In [None]:
r.run("train")

In [None]:
r.run("evaluate")

In [None]:
r.run("register")

In [None]:
r.inspect("train")

In [None]:
training_data = r.get_artifact("training_data")
training_data.describe()

In [None]:
trained_model = r.get_artifact("model")
print(trained_model)

# Stochastic Gradient Descent

This notebook runs the MLflow Regression Recipe on Databricks and inspects its results. For more information about the MLflow Regression Recipe, including usage examples, see the [Regression Recipe overview documentation](https://mlflow.org/docs/latest/recipes.html#regression-recipe) the [Regression Recipe API documentation](https://mlflow.org/docs/latest/python_api/mlflow.recipes.html#module-mlflow.recipes.regression.v1.recipe).

In [None]:
from mlflow.recipes import Recipe

r = Recipe(profile="sgd")


In [None]:
r.clean()

In [None]:
r.inspect()

In [None]:
r.run("ingest")

In [None]:
r.run("split")

In [None]:
r.run("transform")

In [None]:
r.run("train")

In [None]:
r.run("evaluate")

In [None]:
r.run("register")

In [None]:
r.inspect("train")

In [None]:
training_data = r.get_artifact("training_data")
training_data.describe()

In [None]:
trained_model = r.get_artifact("model")
print(trained_model)

# XG Boost

This notebook runs the MLflow Regression Recipe on Databricks and inspects its results. For more information about the MLflow Regression Recipe, including usage examples, see the [Regression Recipe overview documentation](https://mlflow.org/docs/latest/recipes.html#regression-recipe) the [Regression Recipe API documentation](https://mlflow.org/docs/latest/python_api/mlflow.recipes.html#module-mlflow.recipes.regression.v1.recipe).

In [None]:
from mlflow.recipes import Recipe

r = Recipe(profile="xgb")


In [None]:
r.clean()

In [None]:
r.inspect()

In [None]:
r.run("ingest")

In [None]:
r.run("split")

In [None]:
r.run("transform")

In [None]:
r.run("train")

In [None]:
r.run("evaluate")

In [None]:
r.run("register")

In [None]:
r.inspect("train")

In [None]:
training_data = r.get_artifact("training_data")
training_data.describe()

In [None]:
trained_model = r.get_artifact("model")
print(trained_model)