# Creating Pipeline

In [100]:
import azureml.core
from azureml.core import Workspace

# Load the workspace from the saved config file
ws = Workspace.from_config()
print('Ready to use Azure ML {} to work with {}'.format(azureml.core.VERSION, ws.name))

Ready to use Azure ML 1.26.0 to work with precigout


In [101]:
from azureml.core import Dataset

default_ds = ws.get_default_datastore()

if 'House_pricing' not in ws.datasets:
    print('Dataset is not registered')
else:
    print('Dataset already registered.')

Dataset already registered.


In [102]:
import os
# Create a folder for the pipeline step files
experiment_folder = 'housePrice_pipeline'
os.makedirs(experiment_folder, exist_ok=True)

print(experiment_folder)

housePrice_pipeline


In [103]:
%%writefile $experiment_folder/prep_housePrice.py
# Import libraries
import os
import argparse
import numpy as np
import pandas as pd
from azureml.core import Run
from sklearn.preprocessing import MinMaxScaler

# Get parameters
parser = argparse.ArgumentParser()
parser.add_argument("--input-data", type=str, dest='raw_dataset_id', help='raw dataset')
parser.add_argument('--prepped-data', type=str, dest='prepped_data', default='prepped_data', help='Folder for results')
args = parser.parse_args()
save_folder = args.prepped_data

# Get the experiment run context
run = Run.get_context()

# load the data (passed as an input dataset)
print("Loading Data...")
housePrice = run.input_datasets['raw_data'].to_pandas_dataframe()

# remove nulls and creating dummies
housePrice = housePrice.replace('NA',np.nan)
housePrice = housePrice.drop(columns=["MiscFeature","Fence","PoolQC","FireplaceQu","Alley","LotFrontage"])
housePrice = housePrice.dropna(axis=0)

dataprep = pd.get_dummies(housePrice, columns=['MSSubClass','Neighborhood',"MSZoning","Street","LotShape","LandContour",'Utilities','LotConfig','LandSlope','Condition1','Condition2','BldgType','HouseStyle','RoofStyle','RoofMatl','Exterior1st','Exterior2nd','MasVnrType','ExterQual','ExterCond','Foundation','BsmtQual','BsmtCond','BsmtExposure','BsmtFinType1','BsmtFinType2','Heating','HeatingQC','CentralAir','Electrical','KitchenQual','Functional','GarageType','GarageFinish','GarageQual','GarageCond','PavedDrive' ,'SaleType','SaleCondition'])

# Save the prepped data
print("Saving Data...")
os.makedirs(save_folder, exist_ok=True)
save_path = os.path.join(save_folder,'dataHP.csv')
dataprep.to_csv(save_path, index=False, header=True)

# End the run
run.complete()

Overwriting housePrice_pipeline/prep_housePrice.py


In [104]:
%%writefile $experiment_folder/train_housePrice.py
# Import libraries
from azureml.core import Run, Model
import argparse
import pandas as pd
import numpy as np
import joblib
import os
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import roc_auc_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import roc_curve
import matplotlib.pyplot as plt

# Get the experiment run context
run = Run.get_context()

# Get parameters
parser = argparse.ArgumentParser()
parser.add_argument("--training-folder", type=str, dest='training_folder', help='training data folder')
args = parser.parse_args()
training_folder = args.training_folder

# Hyperparameters
parser.add_argument('--learning_rate', type=float, dest='learning_rate', default=0.1, help='learning rate')
parser.add_argument('--n_estimators', type=int, dest='n_estimators', default=100, help='number of estimators')

# Add arguments to args collection
args = parser.parse_args()

# Log Hyperparameter values
run.log('learning_rate',  np.float(args.learning_rate))
run.log('n_estimators',  np.int(args.n_estimators))

# load the prepared data file in the training folder
print("Loading Data...")
file_path = os.path.join(training_folder,'dataHP.csv')
housePrice = pd.read_csv(file_path)

# Separate features and labels 
X, y = housePrice[['Id', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt',
       'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2',
       'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF',
       'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath',
       'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd',
       'Fireplaces', 'GarageYrBlt', 'GarageCars', 'GarageArea',
       'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch',
       'ScreenPorch', 'PoolArea', 'MiscVal', 'MoSold', 'YrSold',
       'SalePrice', 'MSSubClass_20', 'MSSubClass_30', 'MSSubClass_40',
       'MSSubClass_45', 'MSSubClass_50', 'MSSubClass_60', 'MSSubClass_70',
       'MSSubClass_75', 'MSSubClass_80', 'MSSubClass_85', 'MSSubClass_90',
       'MSSubClass_120', 'MSSubClass_160', 'MSSubClass_180',
       'MSSubClass_190', 'Neighborhood_Blmngtn', 'Neighborhood_Blueste',
       'Neighborhood_BrDale', 'Neighborhood_BrkSide',
       'Neighborhood_ClearCr', 'Neighborhood_CollgCr',
       'Neighborhood_Crawfor', 'Neighborhood_Edwards',
       'Neighborhood_Gilbert', 'Neighborhood_IDOTRR',
       'Neighborhood_MeadowV', 'Neighborhood_Mitchel',
       'Neighborhood_NAmes', 'Neighborhood_NPkVill',
       'Neighborhood_NWAmes', 'Neighborhood_NoRidge',
       'Neighborhood_NridgHt', 'Neighborhood_OldTown',
       'Neighborhood_SWISU', 'Neighborhood_Sawyer',
       'Neighborhood_SawyerW', 'Neighborhood_Somerst',
       'Neighborhood_StoneBr', 'Neighborhood_Timber',
       'Neighborhood_Veenker', 'MSZoning_C (all)', 'MSZoning_FV',
       'MSZoning_RH', 'MSZoning_RL', 'MSZoning_RM', 'Street_Grvl',
       'Street_Pave', 'LotShape_IR1', 'LotShape_IR2', 'LotShape_IR3',
       'LotShape_Reg', 'LandContour_Bnk', 'LandContour_HLS',
       'LandContour_Low', 'LandContour_Lvl', 'Utilities_AllPub',
       'Utilities_NoSeWa', 'LotConfig_Corner', 'LotConfig_CulDSac',
       'LotConfig_FR2', 'LotConfig_FR3', 'LotConfig_Inside',
       'LandSlope_Gtl', 'LandSlope_Mod', 'LandSlope_Sev',
       'Condition1_Artery', 'Condition1_Feedr', 'Condition1_Norm',
       'Condition1_PosA', 'Condition1_PosN', 'Condition1_RRAe',
       'Condition1_RRAn', 'Condition1_RRNe', 'Condition1_RRNn',
       'Condition2_Artery', 'Condition2_Feedr', 'Condition2_Norm',
       'Condition2_PosA', 'Condition2_PosN', 'Condition2_RRAe',
       'Condition2_RRAn', 'Condition2_RRNn', 'BldgType_1Fam',
       'BldgType_2fmCon', 'BldgType_Duplex', 'BldgType_Twnhs',
       'BldgType_TwnhsE', 'HouseStyle_1.5Fin', 'HouseStyle_1.5Unf',
       'HouseStyle_1Story', 'HouseStyle_2.5Fin', 'HouseStyle_2.5Unf',
       'HouseStyle_2Story', 'HouseStyle_SFoyer', 'HouseStyle_SLvl',
       'RoofStyle_Flat', 'RoofStyle_Gable', 'RoofStyle_Gambrel',
       'RoofStyle_Hip', 'RoofStyle_Mansard', 'RoofStyle_Shed',
       'RoofMatl_ClyTile', 'RoofMatl_CompShg', 'RoofMatl_Membran',
       'RoofMatl_Metal', 'RoofMatl_Roll', 'RoofMatl_Tar&Grv',
       'RoofMatl_WdShake', 'RoofMatl_WdShngl', 'Exterior1st_AsbShng',
       'Exterior1st_BrkComm', 'Exterior1st_BrkFace', 'Exterior1st_CBlock',
       'Exterior1st_CemntBd', 'Exterior1st_HdBoard',
       'Exterior1st_ImStucc', 'Exterior1st_MetalSd',
       'Exterior1st_Plywood', 'Exterior1st_Stone', 'Exterior1st_Stucco',
       'Exterior1st_VinylSd', 'Exterior1st_Wd Sdng',
       'Exterior1st_WdShing', 'Exterior2nd_AsbShng',
       'Exterior2nd_AsphShn', 'Exterior2nd_Brk Cmn',
       'Exterior2nd_BrkFace', 'Exterior2nd_CBlock', 'Exterior2nd_CmentBd',
       'Exterior2nd_HdBoard', 'Exterior2nd_ImStucc',
       'Exterior2nd_MetalSd', 'Exterior2nd_Other', 'Exterior2nd_Plywood',
       'Exterior2nd_Stone', 'Exterior2nd_Stucco', 'Exterior2nd_VinylSd',
       'Exterior2nd_Wd Sdng', 'Exterior2nd_Wd Shng', 'MasVnrType_BrkCmn',
       'MasVnrType_BrkFace', 'MasVnrType_None', 'MasVnrType_Stone',
       'ExterQual_Ex', 'ExterQual_Fa', 'ExterQual_Gd', 'ExterQual_TA',
       'ExterCond_Ex', 'ExterCond_Fa', 'ExterCond_Gd', 'ExterCond_TA',
       'Foundation_BrkTil', 'Foundation_CBlock', 'Foundation_PConc',
       'Foundation_Stone', 'Foundation_Wood', 'BsmtQual_Ex',
       'BsmtQual_Fa', 'BsmtQual_Gd', 'BsmtQual_TA', 'BsmtCond_Fa',
       'BsmtCond_Gd', 'BsmtCond_Po', 'BsmtCond_TA', 'BsmtExposure_Av',
       'BsmtExposure_Gd', 'BsmtExposure_Mn', 'BsmtExposure_No',
       'BsmtFinType1_ALQ', 'BsmtFinType1_BLQ', 'BsmtFinType1_GLQ',
       'BsmtFinType1_LwQ', 'BsmtFinType1_Rec', 'BsmtFinType1_Unf',
       'BsmtFinType2_ALQ', 'BsmtFinType2_BLQ', 'BsmtFinType2_GLQ',
       'BsmtFinType2_LwQ', 'BsmtFinType2_Rec', 'BsmtFinType2_Unf',
       'Heating_GasA', 'Heating_GasW', 'Heating_Grav', 'Heating_OthW',
       'HeatingQC_Ex', 'HeatingQC_Fa', 'HeatingQC_Gd', 'HeatingQC_Po',
       'HeatingQC_TA', 'Electrical_FuseA',
       'Electrical_FuseF', 'Electrical_FuseP', 'Electrical_Mix',
       'Electrical_SBrkr', 'KitchenQual_Ex', 'KitchenQual_Fa',
       'KitchenQual_Gd', 'KitchenQual_TA', 'Functional_Maj1',
       'Functional_Maj2', 'Functional_Min1', 'Functional_Min2',
       'Functional_Mod', 'Functional_Sev', 'Functional_Typ',
       'GarageType_2Types', 'GarageType_Attchd', 'GarageType_Basment',
       'GarageType_BuiltIn', 'GarageType_CarPort', 'GarageType_Detchd',
       'GarageFinish_Fin', 'GarageFinish_RFn', 'GarageFinish_Unf',
       'GarageQual_Ex', 'GarageQual_Fa', 'GarageQual_Gd', 'GarageQual_Po',
       'GarageQual_TA', 'GarageCond_Ex', 'GarageCond_Fa', 'GarageCond_Gd',
       'GarageCond_Po', 'GarageCond_TA', 'PavedDrive_N', 'PavedDrive_P',
       'PavedDrive_Y', 'SaleType_COD', 'SaleType_CWD', 'SaleType_Con',
       'SaleType_ConLD', 'SaleType_ConLI', 'SaleType_ConLw',
       'SaleType_New', 'SaleType_Oth', 'SaleType_WD',
       'SaleCondition_Abnorml', 'SaleCondition_AdjLand',
       'SaleCondition_Alloca', 'SaleCondition_Family',
       'SaleCondition_Normal', 'SaleCondition_Partial']].values, housePrice['SalePrice'].values

# Split data into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=0)

# Train a Gradient Boosting classification model
print('Training a gradient boosting classification model...')
model = DecisionTreeClassifier().fit(X_train, y_train)

# calculate accuracy
y_hat = model.predict(X_test)
acc = np.average(y_hat == y_test)
print('Accuracy:', acc)
run.log('Accuracy', np.float(acc))



# Save the trained model in the outputs folder
print("Saving model...")
os.makedirs('outputs', exist_ok=True)
model_file = os.path.join('outputs', 'housePrice_model.pkl')
joblib.dump(value=model, filename=model_file)

# Register the model
print('Registering model...')
Model.register(workspace=run.experiment.workspace,
               model_path = model_file,
               model_name = 'housePrice_model',
               tags={'Training context':'Pipeline'},
               properties={ 'Accuracy': np.float(acc)})


run.complete()

Overwriting housePrice_pipeline/train_housePrice.py


In [105]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cluster_name = "PreCC"

try:
    # Check for existing compute target
    pipeline_cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    # If it doesn't already exist, create it
    try:
        compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS11_V2', max_nodes=2)
        pipeline_cluster = ComputeTarget.create(ws, cluster_name, compute_config)
        pipeline_cluster.wait_for_completion(show_output=True)
    except Exception as ex:
        print(ex)
    

Found existing cluster, use it.


In [106]:
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import RunConfiguration

# Create a Python environment for the experiment
housePrice_env = Environment("housePrice-pipeline-env")
housePrice_env.python.user_managed_dependencies = False # Let Azure ML manage dependencies
housePrice_env.docker.enabled = True # Use a docker container

# Create a set of package dependencies
housePrice_packages = CondaDependencies.create(conda_packages=['scikit-learn','ipykernel','matplotlib','pandas','pip'],
                                             pip_packages=['azureml-defaults','azureml-dataprep[pandas]','pyarrow'])

# Add the dependencies to the environment
housePrice_env.python.conda_dependencies = housePrice_packages

# Register the environment 
housePrice_env.register(workspace=ws)
registered_env = Environment.get(ws, 'housePrice-pipeline-env')

# Create a new runconfig object for the pipeline
pipeline_run_config = RunConfiguration()

# Use the compute you created above. 
pipeline_run_config.target = pipeline_cluster

# Assign the environment to the run configuration
pipeline_run_config.environment = registered_env

print ("Run configuration created.")

'enabled' is deprecated. Please use the azureml.core.runconfig.DockerConfiguration object with the 'use_docker' param instead.


Run configuration created.


In [107]:
from azureml.pipeline.core import PipelineData
from azureml.pipeline.steps import PythonScriptStep

# Get the training dataset
housePrice_ds = ws.datasets.get("House_pricing")

# Create a PipelineData (temporary Data Reference) for the model folder
prepped_data_folder = PipelineData("prepped_data_folder", datastore=ws.get_default_datastore())

# Step 1, Run the data prep script
prep_step = PythonScriptStep(name = "Prepare Data",
                                source_directory = experiment_folder,
                                script_name = "prep_housePrice.py",
                                arguments = ['--input-data', housePrice_ds.as_named_input('raw_data'),
                                             '--prepped-data', prepped_data_folder],
                                outputs=[prepped_data_folder],
                                compute_target = pipeline_cluster,
                                runconfig = pipeline_run_config,
                                allow_reuse = True)

# Step 2, run the training script
train_step = PythonScriptStep(name = "Train and Register Model",
                                source_directory = experiment_folder,
                                script_name = "train_housePrice.py",
                                arguments = ['--training-folder', prepped_data_folder],
                                inputs=[prepped_data_folder],
                                compute_target = pipeline_cluster,
                                runconfig = pipeline_run_config,
                                allow_reuse = True)

print("Pipeline steps defined")

Pipeline steps defined


In [108]:
from azureml.core import Experiment
from azureml.pipeline.core import Pipeline
from azureml.widgets import RunDetails

# Construct the pipeline
pipeline_steps = [prep_step, train_step]
pipeline = Pipeline(workspace=ws, steps=pipeline_steps)
print("Pipeline is built.")

# Create an experiment and run the pipeline
experiment = Experiment(workspace=ws, name = 'gprecigout')
pipeline_run = experiment.submit(pipeline, regenerate_outputs=True)
print("Pipeline submitted for execution.")
RunDetails(pipeline_run).show()
pipeline_run.wait_for_completion(show_output=True)

Pipeline is built.
Created step Prepare Data [4d50d18b][89ccc437-444d-4e71-a7c3-1bcbdc986c8b], (This step will run and generate new outputs)
Created step Train and Register Model [c0c6958a][f4c6e5b6-69f6-4387-ac3f-3060f2911f64], (This step will run and generate new outputs)
Submitted PipelineRun 0f166599-d9af-4d22-9412-393ebf77e4c8
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/0f166599-d9af-4d22-9412-393ebf77e4c8?wsid=/subscriptions/caaae8aa-a0a7-44db-a52c-2690c23ed8c4/resourcegroups/st2aic-bd1-sg3/workspaces/precigout&tid=413600cf-bd4e-4c7c-8a61-69e73cddf731
Pipeline submitted for execution.


_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

PipelineRunId: 0f166599-d9af-4d22-9412-393ebf77e4c8
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/0f166599-d9af-4d22-9412-393ebf77e4c8?wsid=/subscriptions/caaae8aa-a0a7-44db-a52c-2690c23ed8c4/resourcegroups/st2aic-bd1-sg3/workspaces/precigout&tid=413600cf-bd4e-4c7c-8a61-69e73cddf731
PipelineRun Status: Running


StepRunId: 8f9b6d26-92ad-4068-bc9e-9fd64fc648a1
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/8f9b6d26-92ad-4068-bc9e-9fd64fc648a1?wsid=/subscriptions/caaae8aa-a0a7-44db-a52c-2690c23ed8c4/resourcegroups/st2aic-bd1-sg3/workspaces/precigout&tid=413600cf-bd4e-4c7c-8a61-69e73cddf731
StepRun( Prepare Data ) Status: NotStarted
StepRun( Prepare Data ) Status: Running

Streaming azureml-logs/55_azureml-execution-tvmps_ff5bf5181dd24b1220c68edac54932efaf3190c103a55e7587d1b4a474f50068_d.txt
2021-04-25T21:06:06Z Successfully mounted a/an Blobfuse File System at /mnt/batch/tasks/shared/LS_root/jobs/precigout/azureml/8f9b6d26-92ad-4068-bc9e-9fd64fc648a1/

'Finished'

In [109]:
for run in pipeline_run.get_children():
    print(run.name, ':')
    metrics = run.get_metrics()
    for metric_name in metrics:
        print('\t',metric_name, ":", metrics[metric_name])

Train and Register Model :
	 learning_rate : 0.1
	 n_estimators : 100
	 Accuracy : 0.42039800995024873
Prepare Data :


In [110]:
from azureml.core import Model

for model in Model.list(ws):
    print(model.name, 'version:', model.version)
    for tag_name in model.tags:
        tag = model.tags[tag_name]
        print ('\t',tag_name, ':', tag)
    for prop_name in model.properties:
        prop = model.properties[prop_name]
        print ('\t',prop_name, ':', prop)
    print('\n')

housePrice_model version: 1
	 Training context : Pipeline
	 Accuracy : 0.42039800995024873




In [111]:
# Publish the pipeline from the run
published_pipeline = pipeline_run.publish_pipeline(
    name="housePrice-training-pipeline", description="Trains housePrice model", version="1.0")

published_pipeline

Name,Id,Status,Endpoint
housePrice-training-pipeline,0eccd662-f0f3-4aab-ba06-6d8eb65a1c74,Active,REST Endpoint


In [112]:
rest_endpoint = published_pipeline.endpoint
print(rest_endpoint)

https://westus.api.azureml.ms/pipelines/v1.0/subscriptions/caaae8aa-a0a7-44db-a52c-2690c23ed8c4/resourceGroups/st2aic-bd1-sg3/providers/Microsoft.MachineLearningServices/workspaces/precigout/PipelineRuns/PipelineSubmit/0eccd662-f0f3-4aab-ba06-6d8eb65a1c74


In [113]:
from azureml.core.authentication import InteractiveLoginAuthentication

interactive_auth = InteractiveLoginAuthentication()
auth_header = interactive_auth.get_authentication_header()
print("Authentication header ready.")

Authentication header ready.


In [114]:
import requests

experiment_name = 'mslearn-housePrice-pipeline'

rest_endpoint = published_pipeline.endpoint
response = requests.post(rest_endpoint, 
                         headers=auth_header, 
                         json={"ExperimentName": experiment_name})
run_id = response.json()["Id"]
run_id

'7763b2ea-7253-40a3-92de-dfa0a1df25fd'

In [115]:
from azureml.pipeline.core.run import PipelineRun

published_pipeline_run = PipelineRun(ws.experiments[experiment_name], run_id)
published_pipeline_run.wait_for_completion(show_output=True)

PipelineRunId: 7763b2ea-7253-40a3-92de-dfa0a1df25fd
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/7763b2ea-7253-40a3-92de-dfa0a1df25fd?wsid=/subscriptions/caaae8aa-a0a7-44db-a52c-2690c23ed8c4/resourcegroups/st2aic-bd1-sg3/workspaces/precigout&tid=413600cf-bd4e-4c7c-8a61-69e73cddf731
PipelineRun Status: NotStarted
PipelineRun Status: Running

PipelineRun Execution Summary
PipelineRun Status: Finished
{'runId': '7763b2ea-7253-40a3-92de-dfa0a1df25fd', 'status': 'Completed', 'startTimeUtc': '2021-04-25T21:07:47.651504Z', 'endTimeUtc': '2021-04-25T21:07:49.599947Z', 'properties': {'azureml.runsource': 'azureml.PipelineRun', 'runSource': 'Unavailable', 'runType': 'HTTP', 'azureml.parameters': '{}', 'azureml.pipelineid': '0eccd662-f0f3-4aab-ba06-6d8eb65a1c74'}, 'inputDatasets': [], 'outputDatasets': [], 'logFiles': {'logs/azureml/executionlogs.txt': 'https://precigout3984143882.blob.core.windows.net/azureml/ExperimentRun/dcid.7763b2ea-7253-40a3-92de-dfa0a1df25fd/logs/azurem

'Finished'

In [116]:
pipeline_experiment = ws.experiments.get('mslearn-housePrice-pipeline')
latest_run = list(pipeline_experiment.get_runs())[0]

latest_run.get_details()

{'runId': '7763b2ea-7253-40a3-92de-dfa0a1df25fd',
 'status': 'Completed',
 'startTimeUtc': '2021-04-25T21:07:47.651504Z',
 'endTimeUtc': '2021-04-25T21:07:49.599947Z',
 'properties': {'azureml.runsource': 'azureml.PipelineRun',
  'runSource': 'Unavailable',
  'runType': 'HTTP',
  'azureml.parameters': '{}',
  'azureml.pipelineid': '0eccd662-f0f3-4aab-ba06-6d8eb65a1c74'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'logs/azureml/executionlogs.txt': 'https://precigout3984143882.blob.core.windows.net/azureml/ExperimentRun/dcid.7763b2ea-7253-40a3-92de-dfa0a1df25fd/logs/azureml/executionlogs.txt?sv=2019-02-02&sr=b&sig=eO80IyxlePNDZW6kyYb0kFpCjs00wA0LC%2BFClznrnrg%3D&st=2021-04-25T20%3A57%3A50Z&se=2021-04-26T05%3A07%3A50Z&sp=r',
  'logs/azureml/stderrlogs.txt': 'https://precigout3984143882.blob.core.windows.net/azureml/ExperimentRun/dcid.7763b2ea-7253-40a3-92de-dfa0a1df25fd/logs/azureml/stderrlogs.txt?sv=2019-02-02&sr=b&sig=YcQYKsVJvWOsr6zOe4OBavBZkEb3yozxEX35fkPoG70%3D&st=202

In [117]:
# Tuning hyper parameters

In [118]:
from azureml.core import Experiment, ScriptRunConfig, Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.train.hyperdrive import GridParameterSampling, HyperDriveConfig, PrimaryMetricGoal, choice
from azureml.widgets import RunDetails

# Create a Python environment for the experiment
sklearn_env = Environment("sklearn-env")

# Ensure the required packages are installed (we need scikit-learn, Azure ML defaults, and Azure ML dataprep)
packages = CondaDependencies.create(conda_packages=['scikit-learn','pip'],
                                    pip_packages=['azureml-defaults','azureml-dataprep[pandas]'])
sklearn_env.python.conda_dependencies = packages

# Get the training dataset
housePrice_ds = ws.datasets.get("House_pricing")

# Create a script config
script_config = ScriptRunConfig(source_directory=experiment_folder,
                                script='housePrice_training.py',
                                # Add non-hyperparameter arguments -in this case, the training dataset
                                arguments = ['--input-data', housePrice_td.as_named_input('training_data')],
                                environment=sklearn_env,
                                compute_target = pipeline_cluster)

# Sample a range of parameter values
params = GridParameterSampling(
    {
        # Hyperdrive will try 6 combinations, adding these as script arguments
        '--learning_rate': choice(0.01, 0.1, 1.0),
        '--n_estimators' : choice(10, 100)
    }
)

# Configure hyperdrive settings
hyperdrive = HyperDriveConfig(run_config=script_config, 
                          hyperparameter_sampling=params, 
                          policy=None, # No early stopping policy
                          primary_metric_name='AUC', # Find the highest AUC metric
                          primary_metric_goal=PrimaryMetricGoal.MAXIMIZE, 
                          max_total_runs=6, # Restict the experiment to 6 iterations
                          max_concurrent_runs=2) # Run up to 2 iterations in parallel

# Run the experiment
experiment = Experiment(workspace=ws, name='mslearn-housePrice-hyperdrive')
run = experiment.submit(config=hyperdrive)

# Show the status in the notebook as the experiment runs
RunDetails(run).show()
run.wait_for_completion()

NameError: name 'diabetes_ds' is not defined

In [None]:
# Print all child runs, sorted by the primary metric
for child_run in run.get_children_sorted_by_primary_metric():
    print(child_run)

# Get the best run, and its metrics and arguments
best_run = run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
script_arguments = best_run.get_details() ['runDefinition']['arguments']
print('Best Run Id: ', best_run.id)
print(' -AUC:', best_run_metrics['AUC'])
print(' -Accuracy:', best_run_metrics['Accuracy'])
print(' -Arguments:',script_arguments)