Based on Labs:

1. Environments: [https://github.com/MicrosoftLearning/mslearn-azure-ml/blob/main/Labs/04/Work%20with%20environments.ipynb]

2. AutoML - [https://github.com/MicrosoftLearning/mslearn-azure-ml/blob/main/Labs/06/Classification%20with%20Automated%20Machine%20Learning.ipynb]

In [None]:
# Import the required libraries
from azure.identity import DefaultAzureCredential
from azure.ai.ml import MLClient

# The workspace information from the previous experiment has been pre-filled for you.
subscription_id = "your-subscription-id"
resource_group = "ml-corp-test-rg"
workspace_name = "ml-corp-test-ws"

credential = DefaultAzureCredential()
ml_client = MLClient(credential, subscription_id, resource_group, workspace_name)
workspace = ml_client.workspaces.get(name=ml_client.workspace_name)
print(ml_client.workspace_name, workspace.resource_group, workspace.location, ml_client.connections._subscription_id, sep = '\n')

ml-workspace-corp-test-swn-001
rg-corp-data-ai-platform-001
switzerlandnorth
7dae9e0f-de34-4921-91be-67945119f760


In [6]:
%%writefile diabetes-training.py
# import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve 

# load the diabetes dataset
print("Loading Data...")

diabetes = pd.read_csv('./Users/boris.zaikin/diabetes-data/diabetes.csv')


# separate features and labels
X, y = diabetes[['Pregnancies','PlasmaGlucose','DiastolicBloodPressure','TricepsThickness','SerumInsulin','BMI','DiabetesPedigree','Age']].values, diabetes['Diabetic'].values

# split data into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=0)

# set regularization hyperparameter
reg = 0.01

# train a logistic regression model
print('Training a logistic regression model with regularization rate of', reg)
model = LogisticRegression(C=1/reg, solver="liblinear").fit(X_train, y_train)

# calculate accuracy
y_hat = model.predict(X_test)
acc = np.average(y_hat == y_test)
print('Accuracy:', acc)

# calculate AUC
y_scores = model.predict_proba(X_test)
auc = roc_auc_score(y_test,y_scores[:,1])
print('AUC: ' + str(auc))




Overwriting diabetes-training.py


Create custom environment Conda file

In [26]:
%%writefile conda-env.yml
name: basic-env-cpu
channels:
  - conda-forge
dependencies:
  - python=3.11
  - scikit-learn
  - pandas
  - numpy
  - matplotlib

Overwriting conda-env.yml


In [31]:
from azure.ai.ml import command
from azure.ai.ml.entities import Environment

env_docker_image = Environment(
    image="mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04",
    name="docker-image-example",
    conda_file="conda-env.yml",
    description="Environment created from a Docker image."
)
ml_client.environments.create_or_update(env_docker_image)

# Job contains Custom environment
job = command(
    code="./",
    command="python diabetes-training.py",
    environment="docker-image-example:2",
    compute="ml-compute-ntb",
    display_name="diabetes-pythonv2-train",
    experiment_name="diabetes-training"
)

# configure job (with currated already prepared environments)
# job = command(
#     code="./",
#     command="python diabetes-training.py",
#     environment="AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest",
#     compute="ml-compute-ntb",
#     display_name="diabetes-pythonv2-train",
#     experiment_name="diabetes-training"
# )


# submit job
returned_job = ml_client.create_or_update(job)
aml_url = returned_job.studio_url
print("Monitor your job at", aml_url)

Uploading code (0.8 MBs): 100%|██████████| 804027/804027 [00:00<00:00, 18458110.27it/s]






In [20]:
envs = ml_client.environments.list()
for env in envs:
    print(env.name, env.version)

AzureML-AI-Studio-Development None
AzureML-VowpalWabbit-8.8.0 None
AzureML-Triton None
AzureML-PyTorch-1.3-CPU None
AzureML-Designer-Score None
AzureML-ACPT-pytorch-1.13-py38-cuda11.7-gpu None
AzureML-ACPT-pytorch-1.12-py38-cuda11.6-gpu None
AzureML-ACPT-pytorch-1.12-py39-cuda11.6-gpu None
AzureML-ACPT-pytorch-1.11-py38-cuda11.5-gpu None
AzureML-ACPT-pytorch-1.11-py38-cuda11.3-gpu None
AzureML-responsibleai-0.21-ubuntu20.04-py38-cpu None
AzureML-responsibleai-0.20-ubuntu20.04-py38-cpu None
AzureML-pytorch-1.10-ubuntu18.04-py38-cuda11-gpu None
AzureML-pytorch-1.9-ubuntu18.04-py37-cuda11-gpu None
AzureML-pytorch-1.8-ubuntu18.04-py37-cuda11-gpu None
AzureML-pytorch-1.7-ubuntu18.04-py37-cuda11-gpu None
AzureML-tensorflow-2.6-ubuntu20.04-py38-cuda11-gpu None
AzureML-tensorflow-2.7-ubuntu20.04-py38-cuda11-gpu None
AzureML-tensorflow-2.4-ubuntu18.04-py37-cuda11-gpu None
AzureML-tensorflow-2.5-ubuntu20.04-py38-cuda11-gpu None
AzureML-sklearn-0.24-ubuntu18.04-py37-cpu None
AzureML-lightgbm-3.2-

In [23]:
env = ml_client.environments.get("AzureML-tensorflow-2.5-ubuntu20.04-py38-cuda11-gpu", version=1)
print(env.description, env.tags)

An environment for deep learning with Tensorflow containing the Azure ML SDK and additional python packages. {'TensorFlow': '2.5', 'GPU': 'Cuda11', 'OS': 'Ubuntu20.04', 'Training': '', 'Preview': ''}


### Configure and Run AutoML Job.__

In [None]:
from azure.ai.ml.constants import AssetTypes
from azure.ai.ml import Input

# creates a dataset based on the files in the local data folder
my_training_data_input = Input(type=AssetTypes.MLTABLE, path="azureml:diabetes-tab:1")
print(my_training_data_input)


In [None]:
from azure.ai.ml import automl

# configure the classification job
classification_job = automl.classification(
    compute="ml-compute-cluster-corp-test-swn",
    experiment_name="ml-compute-ntb",
    training_data=my_training_data_input,
    target_column_name="Diabetic",
    primary_metric="accuracy",
    n_cross_validations=5,
    enable_model_explainability=True
)

# set the limits (optional)
# classification_job.set_limits(
#     timeout_minutes=260, 
#     trial_timeout_minutes=40, 
#     max_trials=5,
#     enable_early_termination=True,
# )

# set the training properties (optional)
classification_job.set_training(
    blocked_training_algorithms=["LogisticRegression"], 
    enable_onnx_compatible_models=True
)

status_job = ml_client.create_or_update(classification_job)


