In [1]:
import mlflow
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

csv_url = "http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
data = pd.read_csv(csv_url, sep=";")
data.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [2]:
train, test = train_test_split(data)
train_x = train.drop(["quality"], axis=1)
test_x = test.drop(["quality"], axis=1)
train_y = train[["quality"]]
test_y = test[["quality"]]
train_x, test_x, train_y, test_y

(      fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
 381            13.7             0.415         0.68             2.9      0.085   
 1316            5.4             0.740         0.00             1.2      0.041   
 868             6.8             0.560         0.22             1.8      0.074   
 1246            7.4             0.740         0.07             1.7      0.086   
 485            10.2             0.670         0.39             1.9      0.054   
 ...             ...               ...          ...             ...        ...   
 442            15.6             0.685         0.76             3.7      0.100   
 1233           10.2             0.230         0.37             2.2      0.057   
 429            12.8             0.840         0.63             2.4      0.088   
 1060           11.6             0.230         0.57             1.8      0.074   
 460             9.2             0.430         0.52             2.3      0.083   
 
       free su

In [3]:
def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2
# Parameters
alpha = 0.5
l1_ratio = 0.5
# Execute ElasticNet
lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
lr.fit(train_x, train_y)
# Evaluate Metrics
predicted_qualities = lr.predict(test_x)
(rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)
# Print out metrics
print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
print("  RMSE: %s" % rmse)
print("  MAE: %s" % mae)
print("  R2: %s" % r2)

Elasticnet model (alpha=0.500000, l1_ratio=0.500000):
  RMSE: 0.7274192805429398
  MAE: 0.6014619988593539
  R2: 0.14232244073206324


In [None]:
# MLflow
mlflow.set_experiment("test")
try:
    mlflow.create_experiment("test", artifact_location="s3://mlflow")
except:
    pass
with mlflow.start_run():
    mlflow.log_param("alpha", alpha)
    mlflow.log_param("l1_ratio", l1_ratio)
    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("r2", r2)
    mlflow.log_metric("mae", mae)
    mlflow.sklearn.log_model(lr, "model")
    mlflow.log_artifact("wine-quality")
