In [None]:
# Import credentials
from resource_credentials import (subscription_key, 
    resource_group_name, 
    workspace_name)

from azure.ai.ml import MLClient

In [None]:
# Getting client
from azure.ai.ml.entities import AzureBlobDatastore, AccountKeyConfiguration
from azure.identity import DefaultAzureCredential

ml_client = MLClient(credential=DefaultAzureCredential(), 
                     subscription_id=subscription_key, 
                     resource_group_name=resource_group_name,
                     workspace_name=workspace_name)

### Preparing the data

In [None]:
import pandas as pd

print("Reading diabetes data")
df = pd.read_csv("./src/diabetes.csv")
df.head()


In [None]:
print("Splitting data...")
X, y = df[['Pregnancies','PlasmaGlucose','DiastolicBloodPressure','TricepsThickness','SerumInsulin','BMI','DiabetesPedigree','Age']].values, df['Diabetic'].values

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=0)

### Create MLflow experiment

In [None]:
import mlflow

## Track in azure
mlflow_tracking_uri = ml_client.workspaces.get(ml_client.workspace_name).mlflow_tracking_uri
mlflow.set_tracking_uri(mlflow_tracking_uri)

experiment_name = "mlflow-experiment-diabetes"
mlflow.set_experiment(experiment_name)

### Train model and track with MLflow


In [None]:
from sklearn.linear_model import LogisticRegression

with mlflow.start_run():
    mlflow.sklearn.autolog()

    model = LogisticRegression(C=1/0.1, solver="liblinear").fit(X_train, y_train)


### MLflow manual tracking

In [None]:
# disable autologging
mlflow.sklearn.autolog(disable=True)

In [None]:

from sklearn.linear_model import LogisticRegression
import numpy as np

with mlflow.start_run():
    model = LogisticRegression(C=1/0.1, solver="liblinear").fit(X_train, y_train)

    y_hat = model.predict(X_test)
    acc = np.average(y_hat == y_test)

    mlflow.log_param("regularization_rate", 0.1)
    mlflow.log_metric("Accuracy", acc)

### Logging artifacts

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import roc_curve
import matplotlib.pyplot as plt
import numpy as np

with mlflow.start_run():
    model = DecisionTreeClassifier().fit(X_train, y_train)

    y_hat = model.predict(X_test)
    acc = np.average(y_hat == y_test)

    # plot ROC curve
    y_scores = model.predict_proba(X_test)

    fpr, tpr, thresholds = roc_curve(y_test, y_scores[:,1])
    fig = plt.figure(figsize=(6, 4))
    # Plot the diagonal 50% line
    plt.plot([0, 1], [0, 1], 'k--')
    # Plot the FPR and TPR achieved by our model
    plt.plot(fpr, tpr)
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve')
    plt.savefig("ROC-Curve.png")

    mlflow.log_param("estimator", "DecisionTreeClassifier")
    mlflow.log_metric("Accuracy", acc)
    mlflow.log_artifact("ROC-Curve.png")