In [7]:
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential
from azure.ai.ml import MLClient

try:
    credential=DefaultAzureCredential()
    credential.get_token('https://management.azure.com/.default')
except Exception :
    credential=InteractiveBrowserCredential()
client=MLClient.from_config(credential)

Found the config file in: /config.json


In [None]:

import pandas as pd 
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve
from pathlib import Path
from argparse import ArgumentParser
import mlflow
import matplotlib.pyplot as plt
def parser():
    parser=ArgumentParser()
    parser.add_argument('--input_directory',dest='input_directory', type=str)
    parser.add_argument('--max_depth', dest='max_depth', type=int, default=2)
    parser.add_argument('--min_samples', dest='min_samples', type=int, default=2)
    parser.add_argument('--test_size', dest='test_size', type=float)
    parser.add_argument('--random_state', dest='random_state', type=int, default=42)
    args=parser.parse_args()
    return args

def train(args):
    mlflow.autolog()

    df=pd.read_csv(args.input_directory)
    X=df.drop('Outcome', axis=1)
    y=df['Outcome']
    DT=DecisionTreeClassifier(max_depth=args.max_depth, min_samples_split=args.min_samples)
    X_train, X_test, y_train,y_test=train_test_split(X,y, test_size=args.test_size ,random_state=args.random_state)
    model=DT.fit(X_train, y_train)
    y_pred=model.predict(X_test)
    y_pred_prob=model.predict_proba(X_test)[:,1]
    accuracy=accuracy_score(y_test, y_pred)
    #mlflow.log_metric('accuracy', accuracy)
    roc_auc=roc_auc_score(y_test, y_pred_prob)
    #mlflow.log_metric('roc_auc', roc_auc)
    fpr,tpr,thresholds=roc_curve(y_test, y_pred_prob)
    plt.plot(fpr,tpr)
    plt.xlabel('FPR')
    plt.ylabel('TPR')
    plt.title('ROC Curve')
    plt.savefig('roc_curve.png')
    #mlflow.log_artifact('roc_curve.png')

if __name__=='__main__':
    args=parser()
    train(args)



 


In [12]:
#add mlflow to yaml
from azure.ai.ml.entities import Data 
from azure.ai.ml.constants import AssetTypes
path='/home/azureuser/cloudfiles/code/src/scripts/diabetes.csv'
data=Data(
    path=path,
    type=AssetTypes.URI_FILE,
    description='data_loaded',
    name='diabetes-dataasset-hyperparam'
)
client.data.create_or_update(data)

Data({'skip_validation': False, 'mltable_schema_url': None, 'referenced_uris': None, 'type': 'uri_file', 'is_anonymous': False, 'auto_increment_version': False, 'auto_delete_setting': None, 'name': 'diabetes-dataasset-hyperparam', 'description': 'data_loaded', 'tags': {}, 'properties': {}, 'print_as_yaml': True, 'id': '/subscriptions/7b1b43ca-4b64-43cf-9446-edb35a04d7d1/resourceGroups/rg01databricks/providers/Microsoft.MachineLearningServices/workspaces/ws-azureml-01/data/diabetes-dataasset-hyperparam/versions/1', 'Resource__source_path': None, 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/satyakebakshi951/code', 'creation_context': <azure.ai.ml.entities._system_data.SystemData object at 0x7f700ef925c0>, 'serialize': <msrest.serialization.Serializer object at 0x7f700ef923b0>, 'version': '1', 'latest_version': None, 'path': 'azureml://subscriptions/7b1b43ca-4b64-43cf-9446-edb35a04d7d1/resourcegroups/rg01databricks/workspaces/ws-azureml-01/datastores/workspaceblobstore/pa

In [19]:
from azure.ai.ml.entities import AmlCompute
compute=AmlCompute(
    name='amlcompute1',
    tier='LowPriority',
    type='amlcompute',
    size='STANDARD_D2_V2',
    min_instances=1,
    max_instances=2
)
client.compute.begin_create_or_update(compute)

<azure.core.polling._poller.LROPoller at 0x7f700ef92aa0>

In [None]:
job = command(
    code="./src",
    command="python train.py --training_data {{inputs.reg_rate}}",
    inputs={
        "diabetes_data": Input(
            type=AssetTypes.URI_FILE, 
            path="azureml:diabetes-data:1"
            ),
        "reg_rate": 0.01,
    },
    environment="AzureML-sklearn-0.24-ubuntu18.04-py37-cpu@latest",
    compute="aml-cluster",
    display_name="diabetes-train-mlflow",
    experiment_name="diabetes-training", 
    tags={"model_type": "LogisticRegression"}
    )

In [34]:
from azure.ai.ml import command 
from azure.ai.ml import Input
inputs={
    'diabetes_data':Input(
        type=AssetTypes.URI_FILE,
        path='azureml:diabetes-dataasset-hyperparam:1'
    ), 
    'max_depth': 5,
    'min_samples':5,
    'test_size':0.5,}

job=command(
    display_name='non-hyperparameter-search-diabetes',
    code='/home/azureuser/cloudfiles/code/src/scripts/',
    description='hyperparameter tuning for diabetes dataset',
    compute='amlcompute1',
    command='python training_script.py --input_directory ${{inputs.diabetes_data}}  --max_depth ${{inputs.max_depth}} --min_samples ${{inputs.min_samples}} --test_size ${{inputs.test_size}} --random_state=42',
    environment='env:4',
    inputs={
    'diabetes_data':Input(
        type=AssetTypes.URI_FILE,
        path='azureml:diabetes-dataasset-hyperparam:1'
    ), 
    'max_depth': 5,
    'min_samples':5,
    'test_size':0.5,}

)
submitted_job=client.create_or_update(job)
print(submitted_job.studio_url)

https://ml.azure.com/runs/sweet_hamster_qvdk29xmdn?wsid=/subscriptions/7b1b43ca-4b64-43cf-9446-edb35a04d7d1/resourcegroups/rg01databricks/workspaces/ws-azureml-01&tid=a287f42c-46eb-424f-ab40-cd784a7b423c


In [31]:
inputs['max_depth']


5

In [42]:
from azure.ai.ml.sweep import Choice
command_job_for_sweep=job(
    max_depth=Choice([2,3,4,5]),
    min_samples=Choice([2,3,4,5]),
    test_size=Choice([0.2,0.3,0.4])
)
sweep_job=command_job_for_sweep.sweep(
    compute='amlcompute1',
    sampling_algorithm='bayesian',
    primary_metric='accuracy',
    goal='maximize')
sweep_job.experiment_name='sweep-diabetes-hyperparameter'
sweep_job.set_limits(max_total_trials=4, max_concurrent_trials=2, timeout=1200)

sweep_submit=client.create_or_update(sweep_job)
sweep_submit.studio_url


'https://ml.azure.com/runs/dynamic_match_frvb070lzh?wsid=/subscriptions/7b1b43ca-4b64-43cf-9446-edb35a04d7d1/resourcegroups/rg01databricks/workspaces/ws-azureml-01&tid=a287f42c-46eb-424f-ab40-cd784a7b423c'