In [12]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
import mlflow
import mlflow.sklearn
import logging

In [13]:
logging.basicConfig(level=logging.WARN)
logger = logging.getLogger(__name__)

In [24]:
def eval_metrics(actual, pred):
    # Root Mean Square Error (https://en.wikipedia.org/wiki/Root-mean-square_deviation)
    rmse = np.sqrt(mean_squared_error(actual, pred))
    # Mean Absolute Error (https://en.wikipedia.org/wiki/Mean_absolute_error)
    mae = mean_absolute_error(actual, pred)
    # R squared / Coefficient of Determination (https://en.wikipedia.org/wiki/Coefficient_of_determination)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2


# set fixed random seed for numpy for reproducibility
np.random.seed(40)

# read the csv file from the local directory (needs to have been imported / pulled with DVC)
try:
    data = pd.read_csv("winequality-red.csv", sep=",")
except Exception as e:
    logger.exception(
        "Unable to find CSV file! Have you imported it with DVC? Error: %s", e
    )
    exit(-1)

# split the data into training and test sets (0.75 and 0.25)
train, test = train_test_split(data)

# the column to be predicted is "quality" which is a scalar from [3, 9]
train_x = train.drop(["quality"], axis=1)
test_x = test.drop(["quality"], axis=1)
train_y = train[["quality"]]
test_y = test[["quality"]]

# start training loop to test 10 different hyperparameter settings
for x in range(10):

    # set hyperparameters based on loop variable (0.01 to 0.91)
    alpha = 0.01 + x / 10
    l1_ratio = 0.01 + x / 10

    with mlflow.start_run():
        # instantiate the model
        lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
        # train the model with the training data
        lr.fit(train_x, train_y)
        # evaluate the model on the test data
        predicted_qualities = lr.predict(test_x)
        # calculate evaluation metrics based on prediction results
        (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

        print("Training loop #%s" % (x + 1))
        print("  Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
        print("    RMSE: %s" % rmse)
        print("    MAE: %s" % mae)
        print("    R2: %s" % r2)

        # TODO: log the two experiment params and the three evaluation metrics using mlflow
        
        mlflow.log_param("alpha", alpha)
        mlflow.log_param("l1_ratio", l1_ratio)
        
        mlflow.log_metric("RMSE", rmse)
        mlflow.log_metric("MAE", mae)
        mlflow.log_metric("R2", r2)


        # TODO: store the model using mlflow
mlflow.end_run()

Training loop #1
  Elasticnet model (alpha=0.010000, l1_ratio=0.010000):
    RMSE: 0.6732505914716177
    MAE: 0.5159917822903961
    R2: 0.3577749864802763
Training loop #2
  Elasticnet model (alpha=0.110000, l1_ratio=0.110000):
    RMSE: 0.7155677792916657
    MAE: 0.5486214155650978
    R2: 0.27450356450652713
Training loop #3
  Elasticnet model (alpha=0.210000, l1_ratio=0.210000):
    RMSE: 0.7343933838507652
    MAE: 0.565233046671688
    R2: 0.23582778896422052
Training loop #4
  Elasticnet model (alpha=0.310000, l1_ratio=0.310000):
    RMSE: 0.7458420569398436
    MAE: 0.577102262443361
    R2: 0.2118162673652979
Training loop #5
  Elasticnet model (alpha=0.410000, l1_ratio=0.410000):
    RMSE: 0.767168079876864
    MAE: 0.5995366837903091
    R2: 0.16609845519895972
Training loop #6
  Elasticnet model (alpha=0.510000, l1_ratio=0.510000):
    RMSE: 0.7965201447169763
    MAE: 0.6307567529438781
    R2: 0.10106713762891473
Training loop #7
  Elasticnet model (alpha=0.610000, l1_r

MlflowException: Path 'models/ElasticNet' already exists and is not empty

In [None]:
mlflow.sklearn.save_model(lr, "models/ElasticNet")