In [9]:
%%writefile example_mlflow.py

import os
import sys
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from urllib.parse import urlparse
import mlflow
from mlflow.models.signature import infer_signature
import mlflow.sklearn
import logging
logging.basicConfig(level=logging.WARN)
logger = logging.getLogger(__name__)

def eval_metrics(actual,pred):
    rmse = np.sqrt(mean_squared_error(actual,pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2

if __name__=="__main__":

    csv_url = (
            "http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
        )
    
    try:
        data = pd.read_csv(csv_url, sep=";")
    except Exception as e:
        logger.excetption("unable to download the training dataset: %s", e)

    ### splitting data
    np.random.seed(40)
    train, test = train_test_split(data)

    ### getting x and y
    train_x = train.drop(['quality'], axis=1)
    test_x = test.drop(['quality'], axis=1)
    train_y = train[['quality']]
    test_y = test[['quality']]

    ### alpha and l1_ratio
    alpha = float(sys.argv[1]) if len(sys.argv)>1 else 0.5
    l1_ratio = float(sys.argv[2]) if len(sys.argv)>2 else 0.5

    ### starting mlflow tracking
    with mlflow.start_run():
        lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
        lr.fit(train_x, train_y)
        pred = lr.predict(test_x)

        (rmse, mae, r2) = eval_metrics(test_y, pred)
        print(f"Elastic Net model: alpha{alpha}, l1_ratio:{l1_ratio}")
        print(f'RMSE: {rmse}')
        print(f"MAE :{mae}")
        print(f"R2: {r2}")

        mlflow.log_param("alpha", alpha)
        mlflow.log_param("l1_ratio", l1_ratio)
        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("mae", mae)
        mlflow.log_metric("r2", r2)

        train_pred = lr.predict(train_x)
        signature = infer_signature(train_x, train_pred)

        tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
        if tracking_url_type_store=='file':
            mlflow.sklearn.log_model(lr, "model", signature=signature)
        else:
            mlflow.sklearn.log_model(lr, "model", registered_model_name="ElasticNetWineModel", signature=signature)

Overwriting example_mlflow.py


In [36]:
import sys
import os
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from mlflow.models.signature import infer_signature
import mlflow.sklearn
from sklearn.datasets import load_iris
from urllib.parse import urlparse

In [18]:
data = load_iris()
feat = data['data']
target = data['target']

In [23]:
df = pd.DataFrame(feat)

In [24]:
df.head()

Unnamed: 0,0,1,2,3
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [25]:
df['target'] = target
df.head()

Unnamed: 0,0,1,2,3,target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [28]:
np.random.seed(40)
X = df.drop(['target'], axis=1)
y = df['target']
x_train,x_test,y_train,y_test = train_test_split(X,y, test_size=0.2)
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(120, 4)
(30, 4)
(120,)
(30,)


In [45]:
def eval_metrics(actual, pred):
    acc = accuracy_score(actual, pred)
    precision = precision_score(actual, pred, average='micro')
    recall = recall_score(actual, pred, average='micro')
    f1 = f1_score(actual, pred, average='micro')
    return acc, precision, recall, f1

In [58]:
C_logistic = 0.8
max_depth = 4
C_svm = 0.9
model_dispatcher  = {'logistic_regression':LogisticRegression(C=C_logistic),
                     'Decision_Tree':DecisionTreeClassifier(max_depth=max_depth),
                    "SVM":SVC(C=C_svm)}

In [60]:
train_model = "SVM"

with mlflow.start_run():
    model = model_dispatcher[train_model]
    model.fit(x_train, y_train)
    pred = model.predict(x_test)

    # metrics
    acc, prec, rec, f1 = eval_metrics(y_test, pred)

    mlflow.log_param("C_svm", C_svm)
    mlflow.log_metric("accuarcy", acc)
    mlflow.log_metric("Micro precision", prec)
    mlflow.log_metric("Micro Recall", rec)
    mlflow.log_metric("Micro F1 score", f1)

    tracking_url_mlfow = urlparse(mlflow.get_tracking_uri()).scheme
    if tracking_url_mlfow=="file":
        mlflow.sklearn.log_model(model, train_model)
    else:
        mlflow.sklearn.log_model(model, train_model, registered_model_name=f"Iris_{a}")