In [1]:
import pandas as pd 
import mlflow 

from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_wine
from sklearn.metrics import accuracy_score 

In [2]:
print(f'The current MLFlow version: {mlflow.__version__}')

The current MLFlow version: 2.22.0


In [3]:
mlflow.set_tracking_uri("http://127.0.0.1:5001")
mlflow.set_experiment('wine_model')

<Experiment: artifact_location='mlflow-artifacts:/1', creation_time=1745739033781, experiment_id='1', last_update_time=1745739033781, lifecycle_stage='active', name='wine_model', tags={}>

In [4]:
wine = load_wine()
df_wine = pd.DataFrame(data=wine.data, columns=wine.feature_names)
df_wine.head(3)

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0


In [5]:
# Specify target and feature values 
y = wine.target
X = df_wine

# Fit model 
dt = DecisionTreeClassifier(max_depth=4)
dt.fit(X, y)

In [6]:
# Predict values 
score = accuracy_score(y, dt.predict(df_wine))
print(f'Accuracy score: {score:.4f}')

Accuracy score: 0.9888


In [7]:
# Train with Descion Tree 

with mlflow.start_run(): 
    # Log parameters and metrics 

    mlflow.set_tags({"Model": "decision-tree", "Train Data": "all-data"})

    tree_depth = 5 
    dt = DecisionTreeClassifier(max_depth=tree_depth)
    dt.fit(X, y)
    acc = accuracy_score(y, dt.predict(df_wine))

    mlflow.log_param('max_depth', tree_depth)
    mlflow.log_metric('accuracy', acc)

mlflow.end_run()

🏃 View run merciful-sloth-836 at: http://127.0.0.1:5001/#/experiments/1/runs/b23de22f35fb4836bbdb9c4c5e45128a
🧪 View experiment at: http://127.0.0.1:5001/#/experiments/1


In [8]:
from sklearn.ensemble import RandomForestClassifier

# Train with Random Forest 
with mlflow.start_run():

    mlflow.set_tags({"Model": "random-forest", "Train Data": "all-data"})

    ntree = 1000
    mtry = 4 

    mlflow.log_param('n_estimators', ntree)
    mlflow.log_param('max_features', mtry)

    rf = RandomForestClassifier(n_estimators = ntree, max_features = mtry, oob_score = True)
    rf.fit(X, y)
    # acc = rf.oob_score_
    acc = accuracy_score(y, rf.predict(X))

    mlflow.log_metric('accuracy', acc)

mlflow.end_run()


🏃 View run agreeable-sloth-563 at: http://127.0.0.1:5001/#/experiments/1/runs/2a99f032823d46e2bc77895008e1c558
🧪 View experiment at: http://127.0.0.1:5001/#/experiments/1


In [9]:
ntrees = [20, 40, 60, 80, 100]
mtrys = [3, 4, 5]

for i in ntrees:
    for j in mtrys:
        with mlflow.start_run():
            mlflow.set_tags({"Model": "randon-forest", "Train-data":"all-data"})

            mlflow.log_params({"n_estimators": i, "max_features": j})

            rf = RandomForestClassifier(n_estimators=i, max_features=j, oob_score=True)
            rf.fit(X, y)
            acc = rf.oob_score_

            mlflow.log_metric('accuracy', acc)
        mlflow.end_run()

🏃 View run bright-grouse-475 at: http://127.0.0.1:5001/#/experiments/1/runs/d16f72636bbd43eb85cc6efc9e45b0f3
🧪 View experiment at: http://127.0.0.1:5001/#/experiments/1
🏃 View run honorable-worm-305 at: http://127.0.0.1:5001/#/experiments/1/runs/01e7f93a84cd44a787b789827c2ba4e2
🧪 View experiment at: http://127.0.0.1:5001/#/experiments/1
🏃 View run gentle-newt-68 at: http://127.0.0.1:5001/#/experiments/1/runs/feb24daf58ab4452ad301d60f4aba58f
🧪 View experiment at: http://127.0.0.1:5001/#/experiments/1
🏃 View run powerful-fowl-161 at: http://127.0.0.1:5001/#/experiments/1/runs/eac5a2b3e0eb4d78bf001127c12f0925
🧪 View experiment at: http://127.0.0.1:5001/#/experiments/1
🏃 View run gaudy-croc-513 at: http://127.0.0.1:5001/#/experiments/1/runs/b5e0827e4b434b638dbdf5c15cf7e63a
🧪 View experiment at: http://127.0.0.1:5001/#/experiments/1
🏃 View run beautiful-snipe-901 at: http://127.0.0.1:5001/#/experiments/1/runs/99e0df4e76894fcaa1e22cc61c8332d6
🧪 View experiment at: http://127.0.0.1:5001/#/exp

## Training a model with mlflow and hyperopt

In [10]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from sklearn.model_selection import cross_val_score, train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)


In [11]:
def objective(params): 
    with mlflow.start_run():
        
        classifier_type = params['type']
        del params['type']
        if classifier_type == 'dt': 
            clf = DecisionTreeClassifier(**params)
        elif classifier_type == 'rf':
            clf = RandomForestClassifier(**params)
        else:
            return 0
        acc = cross_val_score(clf, X, y).mean()

        mlflow.set_tag("Model", classifier_type)
        mlflow.log_params(params)
        mlflow.log_metric("accuracy", acc)
        mlflow.end_run()
        return {'loss': -acc, 'status': STATUS_OK}
    
search_space = hp.choice('classifier_type', [
    {
        'type': 'dt',
        'criterion': hp.choice('dtree_criterion', ['gini', 'entropy']),
        'max_depth': hp.choice('dtree_max_depth', [None, hp.randint('dtree_max_depth_int', 1, 10)]),
        'min_samples_split': hp.randint('dtree_min_samples_split', 2, 10)
    },
    {
        'type': 'rf',
        'n_estimators': hp.randint('rf_n_estimators', 20, 500),
        'max_features': hp.randint('rf_max_features', 2, 9),
        'criterion': hp.choice('criterion', ['gini', 'entropy'])
    }
])


algo = tpe.suggest
trials = Trials()

In [12]:
best_result = fmin(
    fn=objective,
    space=search_space,
    algo=algo,
    max_evals=32,
    trials=trials
)

🏃 View run rare-penguin-179 at: http://127.0.0.1:5001/#/experiments/1/runs/8c53d5fb845e46709102f1c853005629

🧪 View experiment at: http://127.0.0.1:5001/#/experiments/1

🏃 View run sneaky-perch-613 at: http://127.0.0.1:5001/#/experiments/1/runs/e137f3c924174482832a2862ab553b0e

🧪 View experiment at: http://127.0.0.1:5001/#/experiments/1                     

🏃 View run unruly-cow-121 at: http://127.0.0.1:5001/#/experiments/1/runs/1c82ec0a896446d0bb0fc1c3461e2527

🧪 View experiment at: http://127.0.0.1:5001/#/experiments/1                     

🏃 View run smiling-hog-189 at: http://127.0.0.1:5001/#/experiments/1/runs/3ed43936078047cc879be2005e74c397

🧪 View experiment at: http://127.0.0.1:5001/#/experiments/1                     

🏃 View run luminous-fish-859 at: http://127.0.0.1:5001/#/experiments/1/runs/fa6094182e214a57b012bc70cd3af433

🧪 View experiment at: http://127.0.0.1:5001/#/experiments/1                     

🏃 View run amazing-sponge-45 at: http://127.0.0.1:5001/#/experiments

In [13]:
best_result

{'classifier_type': np.int64(1),
 'criterion': np.int64(0),
 'rf_max_features': np.int64(2),
 'rf_n_estimators': np.int64(350)}

### Using Autologging 

In [14]:
import mlflow.sklearn
import mlflow.sklearn


with mlflow.start_run():
    mlflow.sklearn.autolog()
    tree_depth = 5
    dt = DecisionTreeClassifier(max_depth=tree_depth)
    dt.fit(X_train, y_train)
    mlflow.sklearn.autolog(disable=True)
mlflow.end_run()

🏃 View run adaptable-cod-817 at: http://127.0.0.1:5001/#/experiments/1/runs/065f6e059f1c45cfb052ac219ed3cfbb
🧪 View experiment at: http://127.0.0.1:5001/#/experiments/1


## Artifact Tracking and Model Registry 

In [17]:
import os 
os.makedirs('../save_data', exist_ok=True)
X_train.to_parquet('../save_data/x_train.parquet')
mlflow.log_artifact('../save_data/x_train.parquet')

In [18]:
X_test.to_parquet('../save_data/x_test.parquet')
mlflow.log_artifacts('../save_data/')