<img src="https://databricks.com/wp-content/uploads/2019/10/model-registry-new.png" height = 1200 width = 800>

### Read and prep data

In [0]:
import pandas as pd
import numpy as np
import mlflow
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Read data
data = spark.table("sr_ibm_telco_churn.churn_features").toPandas().drop(["customerID"], axis=1)

train, test = train_test_split(data, test_size=0.30, random_state=206)
colLabel = 'churn'

# The predicted column is colLabel which is a scalar from [3, 9]
train_x = train.drop([colLabel], axis=1)
test_x = test.drop([colLabel], axis=1)
train_y = train[colLabel]
test_y = test[colLabel]

display(data)

seniorCitizen,tenure,monthlyCharges,totalCharges,churn,gender_Female,gender_Male,partner_No,partner_Yes,dependents_No,dependents_Yes,phoneService_No,phoneService_Yes,multipleLines_No,multipleLines_Nophoneservice,multipleLines_Yes,internetService_DSL,internetService_Fiberoptic,internetService_No,onlineSecurity_No,onlineSecurity_Nointernetservice,onlineSecurity_Yes,onlineBackup_No,onlineBackup_Nointernetservice,onlineBackup_Yes,deviceProtection_No,deviceProtection_Nointernetservice,deviceProtection_Yes,techSupport_No,techSupport_Nointernetservice,techSupport_Yes,streamingTV_No,streamingTV_Nointernetservice,streamingTV_Yes,streamingMovies_No,streamingMovies_Nointernetservice,streamingMovies_Yes,contract_Month-to-month,contract_Oneyear,contract_Twoyear,paperlessBilling_No,paperlessBilling_Yes,paymentMethod_Banktransfer-automatic,paymentMethod_Creditcard-automatic,paymentMethod_Electroniccheck,paymentMethod_Mailedcheck
0.0,1.0,29.85,29.85,0,1,0,0,1,1,0,1,0,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,0
0.0,34.0,56.95,1889.5,0,0,1,1,0,1,0,0,1,1,0,0,1,0,0,0,0,1,1,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,0,1,0,0,0,0,1
0.0,2.0,53.85,108.15,1,0,1,1,0,1,0,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,0,1
0.0,45.0,42.3,1840.75,0,0,1,1,0,1,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,0,1,0,1,0,1,0,0,0
0.0,2.0,70.7,151.65,1,1,0,1,0,1,0,0,1,1,0,0,0,1,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,0
0.0,8.0,99.65,820.5,1,1,0,1,0,1,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,1,1,0,0,0,1,0,0,1,0
0.0,22.0,89.1,1949.4,0,0,1,1,0,0,1,0,1,0,0,1,0,1,0,1,0,0,0,0,1,1,0,0,1,0,0,0,0,1,1,0,0,1,0,0,0,1,0,1,0,0
0.0,10.0,29.75,301.9,0,1,0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,1,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,0,0,1
0.0,28.0,104.8,3046.05,1,1,0,0,1,1,0,0,1,0,0,1,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,1,0,0,1,0
0.0,62.0,56.15,3487.95,0,0,1,1,0,0,1,0,1,1,0,0,1,0,0,0,0,1,0,0,1,1,0,0,1,0,0,1,0,0,1,0,0,0,1,0,1,0,1,0,0,0


### Fit model and log with MLflow

##### Wrappers around your training code

In [0]:
# Set experiment
mlflow.set_experiment("/Users/rafi.kurlansik@databricks.com/first_churn_experiment")

# Begin training run
with mlflow.start_run(run_name="sklearn") as run:
    run_id = run.info.run_uuid
    print("MLflow:")
    print("  run_id:",run_id)
    print("  experiment_id:",run.info.experiment_id)
    
    # Fit model
    model = DecisionTreeClassifier(max_depth=4, max_leaf_nodes=32)
    model.fit(train_x, train_y)
    predictions = model.predict(test_x)
    
    # Get metrics
    acc = accuracy_score(predictions, test_y)
    print("Metrics:")
    print("  mean accuracy:",acc)
    
    # Log
    mlflow.log_param("max_depth", 4)
    mlflow.log_param("max_leaf_nodes", 32)
    mlflow.log_metric("mean_acc", acc)
        
    mlflow.sklearn.log_model(model, "sklearn-model")

##### With auto-logging

In [0]:
# Turn on auto-logging
mlflow.sklearn.autolog()

# Fit model
model = DecisionTreeClassifier(max_depth=4, max_leaf_nodes=32)
model.fit(train_x, train_y)

##### With AUTO-auto-logging :)

In [0]:
model = DecisionTreeClassifier(max_depth=4, max_leaf_nodes=32)
model.fit(train_x, train_y)

Wait, what?  What kind of magic is this?

**Databricks Autologging is a no-code solution that extends MLflow automatic logging to deliver automatic experiment tracking for machine learning training sessions on Databricks.** 

With Databricks Autologging, model parameters, metrics, files, and lineage information are automatically captured when you train models from a variety of popular machine learning libraries. Training sessions are recorded as MLflow tracking runs. Model files are also tracked so you can easily log them to the MLflow Model Registry and deploy them for real-time scoring with MLflow Model Serving.

##### Autologging options and configuration

In [0]:
mlflow.autolog(
    log_input_examples=False,
    log_model_signatures=True,
    log_models=True,
    disable=False,
    exclusive=True,
    disable_for_unsupported_versions=True,
    silent=True
)

### MLflow Model Registry

<img src="https://databricks.com/wp-content/uploads/2019/10/model-registry-new.png" height = 1200 width = 800>

#### Promote to Registry

In [0]:
import mlflow.pyfunc

# Grab the run ID from a prior run to promote artifact in tracking server to registry
model_uri = f"runs:/{run_id}/model"
model_details = mlflow.register_model(model_uri, "rk_churn")

##### Load from Registry

In [0]:
# Load model version 1 and predict!
model = mlflow.pyfunc.load_model("models:/rk_churn/1")
model.predict(test_x)