In [None]:
# The data set used in this example is from http://archive.ics.uci.edu/ml/datasets/Wine+Quality
# P. Cortez, A. Cerdeira, F. Almeida, T. Matos and J. Reis.
# Modeling wine preferences by data mining from physicochemical properties. In Decision Support Systems, Elsevier, 47(4):547-553, 2009.

import os
import warnings
import sys

import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet

In [None]:
np.random.seed(40)

# Read the wine-quality csv file (make sure you're running this from the root of MLflow!)
data = pd.read_csv("../data/airbnb-cleaned-mlflow.csv").iloc[3:]

# Split the data into training and test sets. (0.75, 0.25) split.
train, test = train_test_split(data)

# The predicted column is "quality" which is a scalar from [3, 9]
train_x = train.drop(["price"], axis=1)
test_x = test.drop(["price"], axis=1)
train_y = train[["price"]]
test_y = test[["price"]]

alpha = 1.0
l1_ratio = 0.1

lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)


In [None]:
lr.fit(train_x, train_y)

In [None]:
def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2

predicted_qualities = lr.predict(test_x)

(rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
print("  RMSE: %s" % rmse)
print("  MAE: %s" % mae)
print("  R2: %s" % r2)

## Cross Validation via GridSearchCV

In [None]:
from sklearn import svm, datasets
from sklearn.model_selection import GridSearchCV

parameters = {'alpha':(1.0, 0.5, 0.1), 'l1_ratio':[0.1, 0.5, 1.0]}
cv = GridSearchCV(lr, parameters, cv=5, return_train_score=True)
cv.fit(train_x, train_y)                        

## Log result of grid search to Tracking Server

In [None]:
cv.cv_results_

In [None]:
import mlflow
import mlflow.sklearn
import os

def track(clf, name):
    params = list(clf.param_grid.keys())
    cv_results = clf.cv_results_
    rank = cv_results["rank_test_score"]

    for i in range(len(rank)):
        if i == clf.best_index_:
            run_name = "run %d (best run):" % i
        else:
            run_name = "run %d:" % i
        print(run_name)
        with mlflow.start_run(run_name=run_name, nested=True) as run:
            mlflow.log_param("folds", clf.cv)
            for param in params:
                print("  -", param, cv_results["param_%s" % param][i])
                mlflow.log_param(param, cv_results["param_%s" % param][i])
    
            mlflow.log_metric("rank_test_score" , cv_results["rank_test_score"][i])
            mlflow.log_metric("mean_train_score", cv_results["mean_train_score"][i])
            mlflow.log_metric("std_train_score",  cv_results["std_train_score"][i])
            mlflow.log_metric("mean_test_score",  cv_results["mean_test_score"][i])
            mlflow.log_metric("std_test_score",   cv_results["std_test_score"][i])
            if i == clf.best_index_:
                mlflow.sklearn.log_model(cv.best_estimator_, "model")
                local_path = os.path.join(".", "%s" % name)
                local_csv = os.path.join(local_path, "cv_results.csv")
                if not os.path.exists(local_path):
                    os.mkdir(local_path)
                pd.DataFrame(cv_results).sort_values(by='rank_test_score').to_csv(local_csv, index=False)
                mlflow.log_artifact(local_csv, "cv_results")
    print("Done")

**Local tracking server**

```bash
cd /opt/mlflow-tracking-server/
mkdir -p backend
mkdir -p artifacts
mlflow server --backend-store-uri ./backend --default-artifact-root ./artifacts/  --host 0.0.0.0
```

**In the project folder**

```bash
ln -s /opt/mlflow-tracking-server/artifacts artifacts
```

In [None]:
mlflow.set_tracking_uri("http://0.0.0.0:5000")

experiment="airbnb-jupyter"
mlflow.set_experiment(experiment)

track(cv, "airbnb")

In [None]:
#mlflow.set_tracking_uri("databricks://westeu")

#experiment="/Shared/experiments/airbnb-jupyter"
#mlflow.set_experiment(experiment)

#track(cv)