In [28]:
from sklearn import datasets
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import average_precision_score, f1_score, plot_roc_curve, roc_auc_score

import matplotlib.pyplot as plt
import mlflow
import pickle

In [10]:
x, y = datasets.make_classification(n_samples = 500, n_features=10, class_sep=0.75, random_state=42)
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.1, random_state=42, stratify=y)

### Parameter cell

- Parameters are set within a cell
- Cell is tagged with 'parameters'
- Given parameters are then defined via a dictionary in another notebook

In [1]:
n_estimators = 500
max_depth = 2
experiment_id = '1'
lr = 1.0

### MLFlow tracking

- Tracking is initialized by mlflow.start_run with respective experiment ID
- Metrics/Parameters can be logged via key value relationship
- Artifacts, which are arbitrary files, are given via their local path
- Those are assigned to each run

In [22]:
with mlflow.start_run(experiment_id=experiment_id) as run:
    
    gb = GradientBoostingClassifier(n_estimators=n_estimators, max_depth=max_depth, learning_rate=lr)
    
    mlflow.log_param( 'Learning Rate', lr )
    mlflow.log_param( 'n_estimators', n_estimators )
    mlflow.log_param( 'max_depth', max_depth )
    
    cv = cross_val_score( gb, X=x_train, y=y_train, cv=5, n_jobs=-1, verbose=2 )
    gb.fit(x_train, y_train)

    f1 = f1_score( y_test, gb.predict(x_test) )
    mlflow.log_metric( 'F1-Score', f1 )
    
    roc_auc = roc_auc_score( y_test, gb.predict(x_test) )
    mlflow.log_metric( 'ROC-AUC Score', roc_auc )
    
    average_cv = cv.mean()
    mlflow.log_metric( 'Average CV score', average_cv )
    
    plot_path = 'artifacts_dir/plots/roc_auc_curve_{}_{}_{}.png'.format(n_estimators, max_depth, lr)
    plot_roc_curve( gb, x_test, y_test )
    plt.savefig(plot_path)
    mlflow.log_artifact(plot_path)
    
    model_path = 'artifacts_dir/models/model_{}_{}_{}.pckl'.format(n_estimators, max_depth, lr)
    pickle.dump(gb, open( model_path, 'bw'))
    mlflow.log_artifact( model_path )
    
    nb_path = 'artifacts_dir/notebooks/boosting_example_{}_{}_{}.ipynb'.format(n_estimators, max_depth, lr)
    mlflow.log_artifact( nb_path )