# Introducción a MLFlow y Databricks: acelerando el Machine Learning LifeCycle - Python Sevilla 2019

## MLFlow Tracking

### Basic example

In [1]:
import mlflow

In [2]:
mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment('test_1')

In [3]:
run =  mlflow.start_run()
# with mlflow.start_run() as run: -> another alternative

In [4]:
mlflow.log_param('param1', 1)
mlflow.log_metric('metric1', 2)

In [5]:
mlflow.log_param('param1', 1)
mlflow.log_metric('metric1', 2)

In [6]:
mlflow.end_run()

### Breast cancer: Scikit-learn

In [7]:
mlflow.set_experiment('breast_cancer')

In [8]:
import numpy as np
import pandas
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

In [9]:
cancer = load_breast_cancer()
cancer.keys()

dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename'])

In [10]:
X = np.array(cancer.data)
y = np.array(cancer.target)
print(f'X: {X.shape}, y: {y.shape}')

X: (569, 30), y: (569,)


In [11]:
x_train, x_test, y_train, y_test = train_test_split(X, y, train_size=426, test_size=143, random_state=0)

In [12]:
#Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

In [17]:
# Model 1: Logistic Regression
def breast_cancer_lr(solver="lbfgs", C=1.0):
    from sklearn.linear_model import LogisticRegression
    import mlflow.sklearn
    with mlflow.start_run() as run:
        lr = LogisticRegression(solver = solver, C = C)
        mlflow.log_param("solver", solver)
        mlflow.log_param("C", C)
        mlflow.set_tag("model type", "sklearn - LogisticRegression")
        lr.fit(x_train, y_train)
        accuracy = lr.score(x_test, y_test)
        print("Accuracy: %s" % accuracy)
        mlflow.log_metric("accuracy", accuracy)
        mlflow.sklearn.log_model(lr, "model")
        print("Model saved in run %s" % mlflow.active_run().info.run_uuid)

In [18]:
breast_cancer_lr()
breast_cancer_lr(solver="liblinear")
breast_cancer_lr(solver="liblinear", C=0.5)

Accuracy: 0.958041958041958
Model saved in run b191ff233bb842468dbf1f46fcac5de7
Accuracy: 0.958041958041958
Model saved in run c67d75d18f9640f7a673fa1a5f2ab4a5
Accuracy: 0.972027972027972
Model saved in run ef593013b297488aa90b547cd400b617


In [19]:
# Model 2: Random Forest
def breast_cancer_rf(n_estimators=100, max_depth=2, criterion="gini"):
    from sklearn.ensemble import RandomForestClassifier
    import mlflow.sklearn
    with mlflow.start_run() as run:
        clf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, criterion=criterion)
        mlflow.log_param("n_estimators", n_estimators)
        mlflow.log_param("max_depth", max_depth)
        mlflow.log_param("criterion", criterion)
        mlflow.set_tag("model type", "sklearn - RandomForest")
        clf.fit(x_train, y_train)
        accuracy = clf.score(x_test, y_test)
        print("Accuracy: %s" % accuracy)
        mlflow.log_metric("accuracy", accuracy)
        mlflow.sklearn.log_model(clf, "model")
        print("Model saved in run %s" % mlflow.active_run().info.run_uuid)

In [20]:
breast_cancer_rf()
breast_cancer_rf(max_depth=5)
breast_cancer_rf(n_estimators=500, criterion="entropy")

Accuracy: 0.965034965034965
Model saved in run b8fe5a2fdcc94cb8986c65413706ebfe
Accuracy: 0.965034965034965
Model saved in run 2d1e13a4e7af4a8ca79e97e6d7459434
Accuracy: 0.958041958041958
Model saved in run 44a805bc3816468291d086844cb4c428


In [53]:
# Model 3: Keras
from keras.callbacks import Callback
class LossHistory(Callback):
    def on_train_begin(self, logs={}):
        self.losses = []

    def on_epoch_end(self, epoch, logs={}):
        loss = logs.get('loss')
        acc = logs.get('accuracy')
        mlflow.log_metric("loss", loss, step=epoch)
        mlflow.log_metric("val_accuracy", acc, step=epoch)
        precision, recall, accuracy = validate_keras(self.model, x_test, y_test)        
        mlflow.log_metric("precision", precision, step=epoch)
        mlflow.log_metric("recall", recall, step=epoch)
        mlflow.log_metric("accuracy", accuracy, step=epoch)
        self.losses.append(loss)
        
        
def validate_keras(model, x_test, y_test):    
    y_pred = model.predict(x_test)
    y_pred = (y_pred > 0.5)
    from sklearn.metrics import confusion_matrix
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    accuracy = (tp + tn) / (tp + fp + tn + fn)
    
    return precision, recall, accuracy
    

def breast_cancer_keras(optimizer='adam',dropout=0.00, nb_epoch=20):
    import mlflow.keras
    import keras
    from keras.models import Sequential
    from keras.layers import Dense, Dropout
    with mlflow.start_run() as run:
        mlflow.set_tag("model type", "keras - MLP")
        model = Sequential()
        # Adding the input layer and the first hidden layer
        model.add(Dense(output_dim=16, init='uniform', activation='relu', input_dim=30))
        # Adding dropout to prevent overfitting
        model.add(Dropout(p=dropout))
        # Adding the second hidden layer
        model.add(Dense(output_dim=16, init='uniform', activation='relu'))
        # Adding dropout to prevent overfitting
        model.add(Dropout(p=dropout))
        # Adding the output layer
        model.add(Dense(output_dim=1, init='uniform', activation='sigmoid'))
        # Compiling the ANN
        model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
        history = LossHistory()
        model.fit(x_train, y_train, batch_size=100, nb_epoch=nb_epoch, callbacks=[history])
        precision, recall, accuracy = validate_keras(model, x_test, y_test)
        mlflow.log_param("optimizer", optimizer)
        mlflow.log_param("dropout", dropout)
        mlflow.keras.log_model(model, "model")

In [50]:
breast_cancer_keras()

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [54]:
breast_cancer_keras(nb_epoch=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [55]:
breast_cancer_keras(dropout=0.25, nb_epoch=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
