**1. Start Local MLflow tracking server:** <br> `mlflow server --backend-store-uri sqlite:///mlflow.db --default-artifact-root ./mlruns`

The MLflow model registry does not work unless your metadata is stored in a SQL database

extra parameters: <br>
`--host 0.0.0.0 -p 5000 --gunicorn-opts "--timeout 180"`

`backend-store-uri` represents the location and type of database we want to use to store high level metadata associated with our runs. <br> `default-artifact-root` specifies a separate path where artifacts should be stored. A separate path is provided for artifacts because artifacts can be very large and therefore may need to be stored in a cloud-based data store such as S3 for some projects. 

**2. Visit MLflow UI url:** <br> `http://127.0.0.1:5000` <br>
Solo existirá el experimento "Default".

**3. Set Tracking URI:** <br>
`mlflow.set_tracking_uri('http://127.0.0.1:5000')` <br>
Un paso muy importante para decirle a MLflow dónde está el servidor de seguimiento del modelo.

**4. Create experiment or use existing one**: <br>
`mlflow.set_experiment(_experiment_name)`

**5. Track things:** <br>
E.g. `mlflow.log_metrics(metrics)`

**6. Save model on MLflow Model Registry:** <br>
`model_uri = mlflow.get_artifact_uri("logistic_regression_model")` <br>
`MODEL_NAME = "logistic_regression_model"` <br>
`mv = mlflow.register_model(model_uri, MODEL_NAME)`

**7. Load a model and predict:** <br>
`mlflow.set_tracking_uri('http://127.0.0.1:5000')` <br>
`_model_name = "logistic_regression_model"` <br>
`version = "4"` <br>
`my_clf = mlflow.pyfunc.load_model(f"models:/{MODEL_NAME}/{version}")`

In [None]:
import os
import mlflow
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import cross_val_score

from sklearn.metrics import (
    accuracy_score,
    f1_score,
)

from feature_engine.encoding import (
    OrdinalEncoder,
    OneHotEncoder,
)

from feature_engine.transformation import (
    YeoJohnsonTransformer,
)

from sklearn.linear_model import LogisticRegression

pd.set_option('display.max_columns', 25)

In [None]:
data = pd.read_csv("../data/input_data/telco_customer_churn_1.csv")
data.head(3)

In [None]:
# replace NaNs of TotalCharges with '-1' and covert col to float (from string)
data['TotalCharges'] = data['TotalCharges'].str.replace(' ', '-1').astype(float)

# Train-Test Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    data.drop(['customerID', 'Churn'], axis=1),
    data['Churn'],
    test_size=0.2,
    random_state=0,
)

# Categorical Features

In [None]:
cat_vars_onehot = ['gender', 'Partner', 'Dependents', 'PhoneService', 'PaperlessBilling']
cat_vars_ordinal_arbitrary = ['MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup',
                    'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract', 'PaymentMethod']

ordinal_encoder_arbitrary = OrdinalEncoder(encoding_method='arbitrary', variables=cat_vars_ordinal_arbitrary)
ordinal_encoder_arbitrary.fit(X_train, y_train)

onehot_encoder = OneHotEncoder(variables=cat_vars_onehot)
onehot_encoder.fit(X_train)

X_train = ordinal_encoder_arbitrary.transform(X_train)
X_test = ordinal_encoder_arbitrary.transform(X_test)

X_train = onehot_encoder.transform(X_train)
X_test = onehot_encoder.transform(X_test)

# Numerical Features

In [None]:
num_vars_yeo_johnson = ['TotalCharges']

yeo_transformer = YeoJohnsonTransformer(variables=num_vars_yeo_johnson)

X_train = yeo_transformer.fit_transform(X_train)
X_test = yeo_transformer.transform(X_test)

# Target

In [None]:
le = LabelEncoder()
le.fit(y_train)

y_train = le.transform(y_train)
y_test = le.transform(y_test)

# Scaling

In [None]:
min_max_scaler = MinMaxScaler()
min_max_scaler.fit(X_train) 

X_train = pd.DataFrame(min_max_scaler.transform(X_train), columns=X_train.columns)
X_test = pd.DataFrame(min_max_scaler.transform(X_test), columns=X_train.columns)

# ML

In [None]:
# MLflow: tell MLflow where the model tracking server is
# mlflow.set_tracking_uri(os.environ["MLFLOW_TRACKING_URI"])

# MLflow: experiment name
MODEL_NAME = "logistic_regression_model"
_experiment_name = "churn-prediction"
mlflow.set_experiment(_experiment_name)

with mlflow.start_run() as run:

    # MLflow: print run specific info
    print(f"\nActive run_id: {run.info.run_id}")

    # Choose parameters
    param_C = 0.8
    param_max_iter = 200
    clf = LogisticRegression(C=param_C, max_iter=param_max_iter, random_state=0)

    # Train on all set and evaluate on test
    clf.fit(X_train, y_train)
    y_test_pred = clf.predict(X_test)
    test_accuracy = accuracy_score(y_test, y_test_pred)
    test_f1 = f1_score(y_test, y_test_pred)

    # Cross validation on train set
    cv_accuracy = cross_val_score(clf, X_train, y_train, cv=5, scoring='accuracy').mean()
    cv_f1 = cross_val_score(clf, X_train, y_train, cv=5, scoring='f1_macro').mean()

    print(f"CV accuracy: {cv_accuracy:.2f}, Test accuracy: {test_accuracy:.2f}\n"
          f"CV f1: {cv_f1:.2f}, Test f1: {test_f1:.2f}")

    # MLflow: Log the metrics
    metrics = {"cv_accuracy": cv_accuracy, "cv_f1": cv_f1, "test_accuracy": test_accuracy, "test_f1": test_f1}
    mlflow.log_metrics(metrics)

    # MLflow: Log the parameters
    params = {"C": param_C, "max_iter": param_max_iter}
    mlflow.log_params(params)

    # MLflow log the model
    mlflow.sklearn.log_model(clf, "logistic_regression_model")

    # MLflow: save model on MLflow Model Registry
    model_uri = mlflow.get_artifact_uri("logistic_regression_model")
    mv = mlflow.register_model(model_uri, MODEL_NAME)

In [None]:
# Load a model and predict
# mlflow.set_tracking_uri(os.environ["MLFLOW_TRACKING_URI"])

version = "latest"
my_clf = mlflow.pyfunc.load_model(f"models:/{MODEL_NAME}/{version}")

In [None]:
print(my_clf.predict(X_test))

In [None]:
my_clf.metadata.to_dict()