# MLOps with MLflow

#### Import data

In [1]:
import pycaret
pycaret.__version__

'3.3.2'

In [2]:
# load the dataset from pycaret
from pycaret.datasets import get_data
data = get_data('diamond')

Unnamed: 0,Carat Weight,Cut,Color,Clarity,Polish,Symmetry,Report,Price
0,1.1,Ideal,H,SI1,VG,EX,GIA,5169
1,0.83,Ideal,H,VS1,ID,ID,AGSL,3470
2,0.85,Ideal,H,SI1,EX,EX,GIA,3183
3,0.91,Ideal,E,SI1,VG,VG,GIA,4370
4,0.83,Ideal,G,SI1,EX,EX,GIA,3171


#### Preprocesamiento

In [4]:
# initialize setup
from pycaret.regression import *

s = setup(data, target = 'Price',
          session_id = 123,
          log_experiment = 'dagshub', 
          #log_plots = True,
          experiment_name = 'project_diamond',
          verbose = True)

Unnamed: 0,Description,Value
0,Session id,123
1,Target,Price
2,Target type,Regression
3,Original data shape,"(6000, 8)"
4,Transformed data shape,"(6000, 29)"
5,Transformed train set shape,"(4200, 29)"
6,Transformed test set shape,"(1800, 29)"
7,Numeric features,1
8,Categorical features,6
9,Preprocess,True


2024/10/18 16:57:27 INFO mlflow.tracking.fluent: Experiment with name 'project_diamond' does not exist. Creating a new experiment.


#### Training model

In [5]:
# compare all models
best = compare_models()

Unnamed: 0,Model,MAE,MSE,RMSE,R2,RMSLE,MAPE,TT (Sec)
et,Extra Trees Regressor,719.6871,2033874.0284,1390.8879,0.9809,0.0786,0.0585,0.43
rf,Random Forest Regressor,725.2299,2336386.4997,1491.5615,0.9781,0.0785,0.0577,0.393
lightgbm,Light Gradient Boosting Machine,721.5692,2747712.106,1593.0198,0.9748,0.075,0.0551,0.114
gbr,Gradient Boosting Regressor,870.4619,2677900.4116,1616.3756,0.9744,0.099,0.075,0.145
dt,Decision Tree Regressor,919.7628,3495425.391,1821.5832,0.9664,0.1025,0.0744,0.063
llar,Lasso Least Angle Regression,2489.5031,14919265.5729,3837.0972,0.8571,0.6592,0.2962,0.066
ridge,Ridge Regression,2491.2859,14957594.1466,3840.8633,0.8568,0.647,0.2966,0.063
lasso,Lasso Regression,2490.799,14993880.8447,3845.5124,0.8565,0.6525,0.2961,0.08
br,Bayesian Ridge,2493.3298,14989750.3796,3844.9434,0.8565,0.6497,0.2967,0.06
lr,Linear Regression,2525.3029,15210864.15,3878.1299,0.8539,0.6506,0.3017,0.416


2024/10/18 17:00:03 INFO mlflow.tracking._tracking_service.client: 🏃 View run Extra Trees Regressor at: https://dagshub.com/Brycenvn/MLOps-course.mlflow/#/experiments/2/runs/65e4a5c38628470fb59b30df4ab08cd9.
2024/10/18 17:00:03 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Brycenvn/MLOps-course.mlflow/#/experiments/2.
2024/10/18 17:00:13 INFO mlflow.tracking._tracking_service.client: 🏃 View run Random Forest Regressor at: https://dagshub.com/Brycenvn/MLOps-course.mlflow/#/experiments/2/runs/b95484f8ce654a009d97be7061b3159a.
2024/10/18 17:00:13 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/Brycenvn/MLOps-course.mlflow/#/experiments/2.
2024/10/18 17:00:23 INFO mlflow.tracking._tracking_service.client: 🏃 View run Light Gradient Boosting Machine at: https://dagshub.com/Brycenvn/MLOps-course.mlflow/#/experiments/2/runs/39f1ce768ae842fbbb988f46eb65cfe8.
2024/10/18 17:00:23 INFO mlflow.tracking._tracking_ser

In [6]:
save_model(best, 'best_diamond_model')

Transformation Pipeline and Model Successfully Saved


(Pipeline(memory=Memory(location=None),
          steps=[('numerical_imputer',
                  TransformerWrapper(include=['Carat Weight'],
                                     transformer=SimpleImputer())),
                 ('categorical_imputer',
                  TransformerWrapper(include=['Cut', 'Color', 'Clarity',
                                              'Polish', 'Symmetry', 'Report'],
                                     transformer=SimpleImputer(strategy='most_frequent'))),
                 ('ordinal_encoding',
                  TransformerWrapper(include=['Rep...
                 ('onehot_encoding',
                  TransformerWrapper(include=['Cut', 'Color', 'Clarity',
                                              'Polish', 'Symmetry'],
                                     transformer=OneHotEncoder(cols=['Cut',
                                                                     'Color',
                                                                     'Clarity',
