In [None]:
subscription_id = "59a62e46-b799-4da2-8314-f56ef5acf82b"
resource_group = "rg-azuremltraining"
workspace = "dummy-workspace"

In [None]:
from azure.ai.ml import MLClient
from azure.identity import DefaultAzureCredential

# get a handle to the workspace
ml_client = MLClient(
    DefaultAzureCredential(), subscription_id, resource_group, workspace
)

In [None]:
# Upload the data
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes

local = "../data/diabetes.csv"

data = Data(
    path=local,
    type=AssetTypes.URI_FILE,
    description="diabetes file",
    name="diabetes_dummy",
)
ml_client.data.create_or_update(data)

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

df = pd.read_csv(ml_client.data.get(name="diabetes_dummy", version=1).path)
# Some boilerplate ML code
X, y = (
    df[
        [
            "Pregnancies",
            "PlasmaGlucose",
            "DiastolicBloodPressure",
            "TricepsThickness",
            "SerumInsulin",
            "BMI",
            "DiabetesPedigree",
            "Age",
        ]
    ],
    df["Diabetic"],
)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.30, random_state=0
)
model = LogisticRegression(C=1 / 0.1, solver="liblinear").fit(X_train, y_train)
acc = accuracy_score(y_test, model.predict(X_test))
print(acc)

None of this is being tracked, which is not very MLOps. Instead, we'll keep track of model performance etc. in Azure ML through MLFlow.

Make sure to pip install `azureml-mlflow` and `mlflow`.

In [None]:
# Set up experiment tracking
import mlflow

# Uncomment the following to connect MLFlow when running locally. Not necessary on Compute Instance
# mlflow_tracking_uri = ml_client.workspaces.get(ml_client.workspace_name).mlflow_tracking_uri
# mlflow.set_tracking_uri(mlflow_tracking_uri)
experiment_name = "my_experiment"
mlflow.set_experiment(experiment_name)

In [None]:
with mlflow.start_run():
    mlflow.sklearn.autolog()
    model = LogisticRegression(C=1 / 0.1, solver="liblinear").fit(X_train, y_train)
    # Optionally, log your own metrics
    mlflow.log_param("estimator", "DecisionTreeClassifier")
    mlflow.log_metric("Accuracy test", acc)

Metrics are now logged in an experiment run in Azure ML! By default also saves the model.