In [1]:
import prefect
from prefect import task, Flow, Parameter, Client
from prefect.run_configs import KubernetesRun
from prefect.schedules import IntervalSchedule
from prefect.storage import S3

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet

from datetime import timedelta
import time
import getpass

import numpy as np
import pandas as pd

import mlflow
import requests

In [2]:
def fetch_data():
    # Fetch data from the internet
    csv_url = "http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
    data = pd.read_csv(csv_url, sep=";")
    return data

def train_model(data, alpha=0.5, l1_ratio=0.5):
    # Train a regression model
    train, test = train_test_split(data)
    
    # The predicted column is "quality" which is a scalar from [3, 9]
    train_x = train.drop(["quality"], axis=1)
    test_x = test.drop(["quality"], axis=1)
    train_y = train[["quality"]]
    test_y = test[["quality"]]
    
    lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
    lr.fit(train_x, train_y)
    predicted_qualities = lr.predict(test_x)
    # Evaluate the model with the helper function
    (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

    print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
    print("  RMSE: %s" % rmse)
    print("  MAE: %s" % mae)
    print("  R2: %s" % r2)
    
def eval_metrics(actual, pred):
    # Helper function to evalue the regression model
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2


In [3]:
# Fetch data and train the model
data = fetch_data()
train_model(data)

Elasticnet model (alpha=0.500000, l1_ratio=0.500000):
  RMSE: 0.7402936992830257
  MAE: 0.5972094574803722
  R2: 0.1343630371218656
