### Inspired by the [e2e-wine-kfp-mlflow example](https://github.com/canonical/kubeflow-examples/tree/main/e2e-wine-kfp-mlflow)

# Install Python packages

In [None]:
!pip install pyarrow scikit-learn pandas joblib

# Preprocess data

In [None]:
def preprocess(file_path, output_file):
    import pandas as pd
    df = pd.read_csv(file_path, header=0, sep=";")
    df.columns = [c.lower().replace(' ', '_') for c in df.columns]
    df.to_parquet(output_file)

In [None]:
preprocess('data/winequality-red.csv', 'data/preprocessed.parquet')

# Train model

In [None]:
def trainning(file_path):
    import pandas as pd
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import classification_report
    from sklearn.linear_model import ElasticNet
    
    df = pd.read_parquet(file_path)
    
    target_column='quality'
    train_x, test_x, train_y, test_y = train_test_split(df.drop(columns=[target_column]),
                                                    df[target_column], test_size=.25,
                                                    random_state=1337, stratify=df[target_column])    
   
    alpha =  0.5
    l1_ratio =  0.5
    lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
    lr.fit(train_x, train_y)
    return lr

In [None]:
lr = trainning('data/preprocessed.parquet')

# Save the trained model

In [None]:
from joblib import dump, load

dump(lr, 'model/wine-rater.joblib')

# (Optional) Load the saved model and test it

In [None]:
wine_rater = load('model/wine-rater.joblib')
wine_rater.predict([[5.6, 0.31, 0.37, 1.4, 0.074, 12.0, 96.0, 0.9954, 3.32, 0.58, 9.2]])