In [1]:
from sklearn import datasets
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import average_precision_score, f1_score, plot_roc_curve, roc_auc_score

import matplotlib.pyplot as plt
import mlflow
import pickle

## The typical workflow

### 1. Setting up a workflow with data and arbitrary algorithm

In [2]:
x, y = datasets.make_classification(n_samples = 500, n_features=10, class_sep=0.75, random_state=42)
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.1, random_state=42, stratify=y)

### 2. Pre-defining parameters for modeling

In [1]:
n_estimators = 500
max_depth = 2
lr = 1.0

### 3. Run algorithm plus storage

In [22]:
# Fit model via Cross-Validation
gb = GradientBoostingClassifier(n_estimators=n_estimators, max_depth=max_depth, learning_rate=lr)
cv = cross_val_score( gb, X=x_train, y=y_train, cv=5, n_jobs=-1, verbose=2 )
gb.fit(x_train, y_train)

# Generate metrics for evaluation
f1 = f1_score( y_test, gb.predict(x_test) )
roc_auc = roc_auc_score( y_test, gb.predict(x_test) )
average_cv = cv.mean()

# Save metrics in an arbitraty file
with open( 'Metrics_Boosting_{}_{}_{}.txt'.format(n_estimators, max_depth, lr), 'w' ) as f:
    f.write( 'F1 Score: {}\n'.format(f1) )
    f.write( 'ROC AUC Score: {}\n'.format(roc_auc) )
    f.write( 'Average CV Precision: {}\n'.format(average_cv) )

# Save some plots
plot_path = 'artifacts_dir/plots/roc_auc_curve_{}_{}_{}.png'.format(n_estimators, max_depth, lr)
plot_roc_curve( gb, x_test, y_test )
plt.savefig(plot_path)

# Save the model
model_path = 'artifacts_dir/models/model_{}_{}_{}.pckl'.format(n_estimators, max_depth, lr)
pickle.dump(gb, open( model_path, 'bw'))

### 4. Revisit results and optimize model 

- Change notebook or algorithm based on the results

_______________________

### What is so problematic about such a workflow?

- Losing overview is pretty easy to achieve
- Sorting of results takes another script
- Nothing is standardized, everyone is doing it in another way
- Re-optimization of script loses previous versions (if Git is not used)

### What if we could...

- Define only one notebook and run it several times with different parameters?
- Save each individual notebook in order to revisit it later on?
- Keep track of each run and visualize results as well as sort those?
- Store individual files reffered to each run?

### Thanks to the IT gods, there are solutions!

1. Papermill - Automated run of notebooks with varying parameters/functions
2. MLFlow - Tracking, organising and packaging of Machine Learning (or whatever) projects.

-----------