In [1]:
import os
import sys
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings("ignore")
from urllib.parse import urlparse

from sklearn.linear_model import ElasticNet
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error

import mlflow
import mlflow.sklearn
from mlflow.models.signature import infer_signature

In [2]:
import logging
logging.basicConfig(level=logging.WARN)
logger=logging.getLogger(__name__)

In [13]:
def evaluate(Y_test,Y_pred):
    r2=r2_score(Y_test,Y_pred)
    mae=mean_absolute_error(Y_test,Y_pred)
    rmse=np.sqrt(mean_squared_error(Y_test,Y_pred))
    return r2,mae,rmse

In [4]:
csv_url=("https://raw.githubusercontent.com/mlflow/mlflow/master/tests/datasets/winequality-red.csv")
try:
    df=pd.read_csv(csv_url,sep=";")
except Exception as e:
    logging.exception("FAILED TO READ THE DATA %S",e)

In [5]:
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [7]:
train,test=train_test_split(df)

In [14]:
X_train=train.drop(["quality"],axis=1)
X_test=test.drop(["quality"],axis=1)
Y_train=train["quality"]
Y_test=test["quality"]


alpha=0.5
l1=0.5

with mlflow.start_run():
    lr=ElasticNet(alpha=alpha,l1_ratio=l1)
    lr.fit(X_train,Y_train)

    Y_preds=lr.predict(X_test)

    (r2,mae,rmse)=evaluate(Y_test,Y_preds)

    print("ElasticNet model(alpha={:f}),l1_ratio={:f}".format(alpha,l1))
    print("R2_score: %s"%r2)
    print("mae: %s"%mae)
    print("rmse: %s"%rmse)

    mlflow.log_param("alpha",alpha)
    mlflow.log_param("l1_ratio",l1)
    mlflow.log_param("r2_score",r2)
    mlflow.log_param("mae",mae)
    mlflow.log_param("rmse",rmse)

    predictions=lr.predict(X_train)

    signature=infer_signature(X_train,predictions)


    tracking_url_type_store=urlparse(mlflow.get_tracking_uri()).scheme    

    if tracking_url_type_store!="file":
        mlflow.sklearn.log_model(lr,"model",registered_model_name="ElasticNetWineModel",signature=signature)
    else:
         mlflow.sklearn.log_model(lr,"model",signature=signature)

ElasticNet model(alpha=0.500000),l1_ratio=0.500000
R2_score: 0.1432779709965163
mae: 0.5934327396647645
rmse: 0.735507831842441


In [10]:
import sys


ValueError: could not convert string to float: '--f=c:\\Users\\User\\AppData\\Roaming\\jupyter\\runtime\\kernel-v2-6476QivQZbdllV7x.json'

In [11]:
print(sys.argv)

['c:\\Users\\User\\anaconda3\\Lib\\site-packages\\ipykernel_launcher.py', '--f=c:\\Users\\User\\AppData\\Roaming\\jupyter\\runtime\\kernel-v2-6476QivQZbdllV7x.json']
