First we fetch the data

In [None]:
import shap
from sklearn.model_selection import train_test_split


X, y = shap.datasets.adult()
print("Data fetched")
target_feature = "income"
y = [1 if y_i else 0 for y_i in y]

full_data = X.copy()
full_data[target_feature] = y

data_train, data_test = train_test_split(
    full_data, test_size=4000, random_state=96132, stratify=full_data[target_feature]
)

# Don't write out the row indices to the CSV.....
print("Saving to files")
data_train.to_parquet("adult_train.parquet", index=False)
data_test.to_parquet("adult_test.parquet", index=False)

Train a simple model on the data:

In [None]:
y_train = data_train[target_feature]
X_train = data_train.drop(labels=target_feature, axis="columns")

In [None]:
import mlflow
import mlflow.sklearn

import pandas as pd
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(solver="liblinear")
model.fit(X_train, y_train)

Now create an MLClient:

In [None]:
subscription_id = '589c7ae9-223e-45e3-a191-98433e0821a9'
resource_group = 'amlisdkv2-rg-1638871354'
workspace_name = 'amlisdkv21638871354'

In [None]:
from azure.ml import MLClient
from azure.identity import DefaultAzureCredential
ml_client = MLClient(credential=DefaultAzureCredential(exclude_shared_token_cache_credential=True),
                     subscription_id=subscription_id,
                     resource_group_name=resource_group,
                     workspace_name=workspace_name,
                     logging_enable=True)

Upload the datasets

In [None]:
from azure.ml.entities import Data

train_dataset = Data(
    name="Adult_Train_from_Notebook",
    local_path="adult_train.parquet",
    version="1"
)

In [None]:
ml_client.data.create_or_update(train_dataset)

In [None]:
test_dataset = Data(
    name="Adult_Test_from_Notebook",
    local_path="adult_test.parquet",
    version="1"
)
ml_client.data.create_or_update(test_dataset)

Now, register the model:

In [None]:
from azureml.core import Workspace


v1_workspace = Workspace(subscription_id, resource_group, workspace_name, auth=None)

mlflow.set_tracking_uri(v1_workspace.get_mlflow_tracking_uri())
mlflow.set_experiment("nb_model_creation")

In [None]:
mlflow.sklearn.log_model(sk_model=model, registered_model_name="nb_classifier_mlflow", artifact_path='my_path')

In [None]:
mlflow.__version__