In [None]:
# Import credentials
from resource_credentials import (subscription_key, 
    resource_group_name, 
    workspace_name,
    datastore_name,
    storage_account_name,
    blob_container_name,
    storage_account_key,
    data_asset_name)

from azure.ai.ml import MLClient
from azure.ai.ml.entities import Workspace
from azure.identity import DefaultAzureCredential

## Create Workspace

In [None]:
# Create a ml client
ml_client = MLClient(credential=DefaultAzureCredential(), 
                     subscription_id=subscription_key, 
                     resource_group_name=resource_group_name)

# Define the workspace
ws = Workspace(name=workspace_name, location="eastus") 

ws = ml_client.workspaces.begin_create(ws).result()
print(ws)

### List workspaces

In [None]:
current_workspaces = ml_client.workspaces.list()
for workspace in current_workspaces:
    print(workspace.name)

## Create and register a Datastore

In [None]:
from azure.ai.ml.entities import AzureBlobDatastore, AccountKeyConfiguration

ml_client = MLClient(credential=DefaultAzureCredential(), 
                     subscription_id=subscription_key, 
                     resource_group_name=resource_group_name,
                     workspace_name=workspace_name)
# Define the datastore
datastore = AzureBlobDatastore(name=datastore_name, 
                                account_name=storage_account_name, 
                                container_name=blob_container_name, 
                                credentials=AccountKeyConfiguration(account_key = storage_account_key ))

datastore = ml_client.create_or_update(datastore)

### Add a dataset to a datastore

In [None]:
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes

# Get the datastore
datastore = ml_client.datastores.get(datastore_name)

# Define path with the datastore name
path = "azureml://datastores/" + datastore_name + "/paths/loan-data/Loan+Approval+Prediction.csv"
data_asset = Data(name=data_asset_name, 
                  type=AssetTypes.URI_FILE, 
                  path=path)

ml_client.data.create_or_update(data_asset)

### Train a model using jobs

In [None]:
# Create a ml client
ml_client = MLClient(credential=DefaultAzureCredential(), 
                     subscription_id=subscription_key, 
                     resource_group_name=resource_group_name,
                     workspace_name=workspace_name)

In [None]:
%%writefile src/diabetes-training.py
# import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve

# load the diabetes dataset
print("Loading Data...")
diabetes = pd.read_csv('diabetes.csv')

# separate features and labels
X, y = diabetes[['Pregnancies','PlasmaGlucose','DiastolicBloodPressure','TricepsThickness','SerumInsulin','BMI','DiabetesPedigree','Age']].values, diabetes['Diabetic'].values

# split data into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=0)

# set regularization hyperparameter
reg = 0.01

# train a logistic regression model
print('Training a logistic regression model with regularization rate of', reg)
model = LogisticRegression(C=1/reg, solver="liblinear").fit(X_train, y_train)

# calculate accuracy
y_hat = model.predict(X_test)
acc = np.average(y_hat == y_test)
print('Accuracy:', acc)

# calculate AUC
y_scores = model.predict_proba(X_test)
auc = roc_auc_score(y_test,y_scores[:,1])
print('AUC: ' + str(auc))


In [None]:
from azure.ai.ml import command

# configure job
job = command(
    code="./src",
    command="python diabetes-training.py",
    environment="AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest",
    compute="compute-instance-sdk",
    display_name="diabetes-pythonv2-train",
    experiment_name="diabetes-training"
)

# submit job
returned_job = ml_client.create_or_update(job)
aml_url = returned_job.studio_url
print("Monitor your job at", aml_url)

### Creating data asset from local folder

In [None]:
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes
from azure.ai.ml import command


data_asset_path = "./src/diabetes.csv"

data = Data(path=data_asset_path, 
            type=AssetTypes.URI_FILE,
            name="diabetes-local")

ml_client.data.create_or_update(data)

### Creating ML Table data asset from local folder

In [None]:
from azure.ai.ml.entities import Data
from azure.ai.ml.constants import AssetTypes

data_asset_path = "./src/ml-table"

data = Data(path=data_asset_path, 
            type=AssetTypes.MLTABLE,
            name="diabetes-table")

ml_client.data.create_or_update(data)

### Using data in job command

In [None]:
import os

# create a folder for the script files
script_folder = 'src'
os.makedirs(script_folder, exist_ok=True)
print(script_folder, 'folder created')

In [None]:
%%writefile $script_folder/move-data.py
# import libraries
import argparse
import pandas as pd
import numpy as np
from pathlib import Path

def main(args):
    # read data
    df = get_data(args.input_data)

    output_df = df.to_csv((Path(args.output_datastore) / "diabetes.csv"), index = False)

# function that reads the data
def get_data(path):
    df = pd.read_csv(path)

    # Count the rows and print the result
    row_count = (len(df))
    print('Analyzing {} rows of data'.format(row_count))
    
    return df

def parse_args():
    # setup arg parser
    parser = argparse.ArgumentParser()

    # add arguments
    parser.add_argument("--input_data", dest='input_data',
                        type=str)
    parser.add_argument("--output_datastore", dest='output_datastore',
                        type=str)

    # parse args
    args = parser.parse_args()

    # return args
    return args

# run script
if __name__ == "__main__":
    # add space in logs
    print("\n\n")
    print("*" * 60)

    # parse args
    args = parse_args()

    # run main function
    main(args)

    # add space in logs
    print("*" * 60)
    print("\n\n")

In [None]:
from azure.ai.ml import Input, Output
from azure.ai.ml.constants import AssetTypes
from azure.ai.ml import command

# configure input and output
my_job_inputs = {
    "local_data": Input(type=AssetTypes.URI_FILE, path="azureml:diabetes-local:1")
}

my_job_outputs = {
    "datastore_data": Output(type=AssetTypes.URI_FOLDER, path=f"azureml://datastores/{datastore_name}/paths/diabetes-data")
}

# configure job
job = command(
    code="./src",
    command="python move-data.py --input_data ${{inputs.local_data}} --output_datastore ${{outputs.datastore_data}}",
    inputs=my_job_inputs,
    outputs=my_job_outputs,
    environment="AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest",
    compute="compute-instance-sdk",
    display_name="move-diabetes-data",
    experiment_name="move-diabetes-data"
)

# submit job
returned_job = ml_client.create_or_update(job)
aml_url = returned_job.studio_url
print("Monitor your job at", aml_url)

### List Azure ML environments

In [None]:
envs = ml_client.environments.list()
for env in envs:
    print(f"{env.name} - Version: {env.latest_version}")

In [None]:
# Environment details
env = ml_client.environments.get(name="AzureML-pytorch-1.10-ubuntu18.04-py38-cuda11-gpu", version="38")
print(env)

### Create a custom environment with conda dependencies

In [None]:
%%writefile src/conda-env.yml
name: basic-env-cpu
channels:
  - conda-forge
dependencies:
  - python=3.11
  - scikit-learn
  - pandas
  - numpy
  - matplotlib

In [None]:
from azure.ai.ml.entities import Environment

env = Environment(name="diabetes-training-env",
                  image="mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04",
                  conda_file="./src/conda-env.yml")

ml_client.environments.create_or_update(env)

In [None]:
# Run a script with the new environment
from azure.ai.ml import command

# configure job
job = command(
    code="./src",
    command="python diabetes-training.py",
    environment="diabetes-training-env@latest",
    compute="compute-instance-sdk",
    display_name="diabetes-pythonv2-train",
    experiment_name="diabetes-training-with-custom-env"
)

# submit job
returned_job = ml_client.create_or_update(job)
aml_url = returned_job.studio_url
print("Monitor your job at", aml_url)

## Automated ML

### Prepare data

In [None]:
from azure.ai.ml.constants import AssetTypes
from azure.ai.ml import Input

# creates a dataset based on the files in the local data folder
training_data = Input(type=AssetTypes.MLTABLE, path="azureml:diabetes-table:1")

### Run AutoML experiment

In [30]:
from azure.ai.ml import automl

classification_job = automl.classification(
    training_data=training_data,
    target_column_name="Diabetic",
    compute="compute-instance-sdk",
    primary_metric="accuracy",
    experiment_name="diabetes-auto-ml",
    display_name="diabetes-auto-ml"
    n_cross_validations=5,
    enable_model_explainability=True
)

# set the limits (optional)
classification_job.set_limits(
    timeout_minutes=60, 
    trial_timeout_minutes=20, 
    max_trials=5,
    enable_early_termination=True,
)

# set the training properties (optional)
classification_job.set_training(
    blocked_training_algorithms=["LogisticRegression"], 
    enable_onnx_compatible_models=True
)

#classification_job.set_featurization()

# Submit the AutoML job
returned_job = ml_client.jobs.create_or_update(
    classification_job
)  

# submit the job to the backend
aml_url = returned_job.studio_url
print("Monitor your job at", aml_url)

Monitor your job at https://ml.azure.com/runs/loving_fig_3y7d50k8b4?wsid=/subscriptions/1eba97e5-2de8-4817-b3d4-dc76880fb329/resourcegroups/DP-100-Certification/workspaces/dp100-certification-sdk&tid=60dd1145-8a3f-4e60-86a0-3db7d9a8b09b
