In [None]:
# ------------------------------------------------------------------------------
# This is generated from https://ml.azure.com/visualinterface/authoring/Normal/a2d91bee-b325-48c9-acf7-77e79a400e55?wsid=/subscriptions/af3877c2-18a2-4ce2-b67c-a8e21e968128/resourcegroups/amldemo/workspaces/amldemo-ml-ws&tid=72f988bf-86f1-41af-91ab-2d7cd011db47
# To run this code, please install SDK by this command:
# !pip install "azure-ml-component[notebooks]" --extra-index-url https://azuremlsdktestpypi.azureedge.net/modulesdkpreview --upgrade
# More detailed guide to set up your environment: https://github.com/Azure/DesignerPrivatePreviewFeatures/blob/master/azure-ml-components/samples/setup-environment.ipynb
# ------------------------------------------------------------------------------

In [None]:
from azureml.core import Workspace
from azureml.core import Datastore
from azure.ml.component import Pipeline, Component, dsl

In [None]:
# configure aml workspace
ws = Workspace.from_config()

In [None]:
# get components
azureml_split_data_func, azureml_evaluate_model_func, azureml_select_columns_in_dataset_func, azureml_train_model_func, azureml_clean_missing_data_func, azureml_decision_forest_regression_func, azureml_linear_regression_func, azureml_tune_model_hyperparameters_func, azureml_boosted_decision_tree_regression_func, azureml_score_model_func = Component.batch_load(ws, selectors=['azureml://Split Data', 'azureml://Evaluate Model', 'azureml://Select Columns in Dataset', 'azureml://Train Model', 'azureml://Clean Missing Data', 'azureml://Decision Forest Regression', 'azureml://Linear Regression', 'azureml://Tune Model Hyperparameters', 'azureml://Boosted Decision Tree Regression', 'azureml://Score Model'])

In [None]:
# get dataset
from azureml.core import Dataset
if 'automobile_price_data_raw' not in ws.datasets:
    datastore = Datastore.get(ws, 'azureml_globaldatasets')
    dataset = Dataset.File.from_files((datastore, 'GenericCSV/Automobile_price_data_(Raw)'))
    dataset.register(workspace=ws, name='automobile_price_data_raw', description='Clean missing data module required. Prices of various automobiles against make, model and technical specifications')
automobile_price_data_raw = ws.datasets['automobile_price_data_raw']

In [None]:
# define pipeline
@dsl.pipeline(name='AML Demo Car Price', description='Pipeline created on 20211118', default_compute_target='amldemo-cluster', default_datastore='workspaceblobstore')
def generated_pipeline():
    azureml_select_columns_in_dataset_0 = azureml_select_columns_in_dataset_func(
        dataset=automobile_price_data_raw,
        select_columns='[{"KeepInputDataOrder":true,"ColumnNames":["symboling","make","num-of-doors","body-style","engine-type","engine-size","horsepower","city-mpg","highway-mpg","price","num-of-cylinders"]}]')
    azureml_select_columns_in_dataset_0.runsettings.resource_layout.configure(node_count=1)
    azureml_select_columns_in_dataset_0.runsettings.docker_configuration.configure(use_docker=True, shared_volumes=True, shm_size='2g', arguments='[]')
    
    azureml_clean_missing_data_0 = azureml_clean_missing_data_func(
        dataset=azureml_select_columns_in_dataset_0.outputs.results_dataset,
        columns_to_be_cleaned='["AllColumns"]',
        minimum_missing_value_ratio=0.0,
        maximum_missing_value_ratio=1.0,
        cleaning_mode='Custom substitution value',
        replacement_value='0',
        generate_missing_value_indicator_column=False,
        cols_with_all_missing_values='Remove')
    azureml_clean_missing_data_0.runsettings.resource_layout.configure(node_count=1)
    azureml_clean_missing_data_0.runsettings.docker_configuration.configure(use_docker=True, shared_volumes=True, shm_size='2g', arguments='[]')
    
    azureml_split_data_0 = azureml_split_data_func(
        dataset=azureml_clean_missing_data_0.outputs.cleaned_dataset,
        splitting_mode='Split Rows',
        fraction_of_rows_in_the_first_output_dataset=0.80,
        randomized_split=True,
        random_seed=123,
        stratified_split='False',
        regular_expression='\"column name" ^start',
        relational_expression='\"column name" > 3')
    azureml_split_data_0.runsettings.resource_layout.configure(node_count=1)
    azureml_split_data_0.runsettings.docker_configuration.configure(use_docker=True, shared_volumes=True, shm_size='2g', arguments='[]')
    
    azureml_linear_regression_0 = azureml_linear_regression_func(
        solution_method='Ordinary Least Squares',
        l2_regularization_weight=0.001,
        include_intercept_term=True,
        random_number_seed=None,
        create_trainer_mode='SingleParameter',
        learning_rate=0.1,
        number_of_epochs_over_which_algorithm_iterates_through_examples=10,
        l2_regularization_term_weight=0.001,
        range_for_learning_rate='0.025; 0.05; 0.1; 0.2',
        range_for_number_of_epochs_over_which_algorithm_iterates_through_examples='1; 10; 100',
        range_for_l2_regularization_term_weight='0.001; 0.01; 0.1',
        should_input_instances_be_normalized=True,
        decrease_learning_rate_as_iterations_progress=True)
    azureml_linear_regression_0.runsettings.resource_layout.configure(node_count=1)
    azureml_linear_regression_0.runsettings.docker_configuration.configure(use_docker=True, shared_volumes=True, shm_size='2g', arguments='[]')
    
    azureml_train_model_0 = azureml_train_model_func(
        dataset=azureml_split_data_0.outputs.results_dataset1,
        untrained_model=azureml_linear_regression_0.outputs.untrained_model,
        label_column='[{"KeepInputDataOrder":true,"ColumnNames":["price"]}]',
        model_explanations=False)
    azureml_train_model_0.runsettings.resource_layout.configure(node_count=1)
    azureml_train_model_0.runsettings.docker_configuration.configure(use_docker=True, shared_volumes=True, shm_size='2g', arguments='[]')
    
    azureml_score_model_0 = azureml_score_model_func(
        dataset=azureml_split_data_0.outputs.results_dataset2,
        trained_model=azureml_train_model_0.outputs.trained_model,
        append_score_columns_to_output=True)
    azureml_score_model_0.runsettings.resource_layout.configure(node_count=1)
    azureml_score_model_0.runsettings.docker_configuration.configure(use_docker=True, shared_volumes=True, shm_size='2g', arguments='[]')
    
    azureml_boosted_decision_tree_regression_0 = azureml_boosted_decision_tree_regression_func(
        create_trainer_mode='SingleParameter',
        maximum_number_of_leaves_per_tree=20,
        minimum_number_of_training_instances_required_to_form_a_leaf=10,
        the_learning_rate=0.2,
        total_number_of_trees_constructed=100,
        random_number_seed=123,
        range_for_maximum_number_of_leaves_per_tree='2; 8; 32; 128',
        range_for_minimum_number_of_training_instances_required_to_form_a_leaf='1; 10; 50',
        range_for_learning_rate='0.025; 0.05; 0.1; 0.2; 0.4',
        range_for_total_number_of_trees_constructed='20; 100; 500')
    azureml_boosted_decision_tree_regression_0.runsettings.resource_layout.configure(node_count=1)
    azureml_boosted_decision_tree_regression_0.runsettings.docker_configuration.configure(use_docker=True, shared_volumes=True, shm_size='2g', arguments='[]')
    
    azureml_train_model_1 = azureml_train_model_func(
        untrained_model=azureml_boosted_decision_tree_regression_0.outputs.untrained_model,
        dataset=azureml_split_data_0.outputs.results_dataset1,
        label_column='[{"KeepInputDataOrder":true,"ColumnNames":["price"]}]',
        model_explanations=False)
    azureml_train_model_1.runsettings.resource_layout.configure(node_count=1)
    azureml_train_model_1.runsettings.docker_configuration.configure(use_docker=True, shared_volumes=True, shm_size='2g', arguments='[]')
    
    azureml_score_model_1 = azureml_score_model_func(
        dataset=azureml_split_data_0.outputs.results_dataset2,
        trained_model=azureml_train_model_1.outputs.trained_model,
        append_score_columns_to_output=True)
    azureml_score_model_1.runsettings.resource_layout.configure(node_count=1)
    azureml_score_model_1.runsettings.docker_configuration.configure(use_docker=True, shared_volumes=True, shm_size='2g', arguments='[]')
    
    azureml_evaluate_model_0 = azureml_evaluate_model_func(
        scored_dataset=azureml_score_model_0.outputs.scored_dataset,
        scored_dataset_to_compare=azureml_score_model_1.outputs.scored_dataset)
    azureml_evaluate_model_0.runsettings.resource_layout.configure(node_count=1)
    azureml_evaluate_model_0.runsettings.docker_configuration.configure(use_docker=True, shared_volumes=True, shm_size='2g', arguments='[]')
    
    azureml_decision_forest_regression_0 = azureml_decision_forest_regression_func(
        create_trainer_mode='SingleParameter',
        number_of_decision_trees=8,
        maximum_depth_of_the_decision_trees=32,
        minimum_number_of_samples_per_leaf_node=1,
        resampling_method='Bagging Resampling',
        range_for_number_of_decision_trees='1; 8; 32',
        range_for_the_maximum_depth_of_the_decision_trees='1; 16; 64',
        range_for_the_minimum_number_of_samples_per_leaf_node='1; 4; 16')
    azureml_decision_forest_regression_0.runsettings.resource_layout.configure(node_count=1)
    azureml_decision_forest_regression_0.runsettings.docker_configuration.configure(use_docker=True, shared_volumes=True, shm_size='2g', arguments='[]')
    
    azureml_tune_model_hyperparameters_0 = azureml_tune_model_hyperparameters_func(
        training_dataset=azureml_split_data_0.outputs.results_dataset1,
        untrained_model=azureml_decision_forest_regression_0.outputs.untrained_model,
        specify_parameter_sweeping_mode='Random sweep',
        maximum_number_of_runs_on_random_sweep=5,
        random_seed=0,
        name_or_numerical_index_of_the_label_column='[{"KeepInputDataOrder":true,"ColumnNames":["price"]}]',
        metric_for_measuring_performance_for_classification='Accuracy',
        metric_for_measuring_performance_for_regression='Mean absolute error')
    azureml_tune_model_hyperparameters_0.runsettings.resource_layout.configure(node_count=1)
    azureml_tune_model_hyperparameters_0.runsettings.docker_configuration.configure(use_docker=True, shared_volumes=True, shm_size='2g', arguments='[]')
    
    azureml_score_model_2 = azureml_score_model_func(
        dataset=azureml_split_data_0.outputs.results_dataset2,
        trained_model=azureml_tune_model_hyperparameters_0.outputs.trained_best_model,
        append_score_columns_to_output=True)
    azureml_score_model_2.runsettings.resource_layout.configure(node_count=1)
    azureml_score_model_2.runsettings.docker_configuration.configure(use_docker=True, shared_volumes=True, shm_size='2g', arguments='[]')
    
    azureml_evaluate_model_1 = azureml_evaluate_model_func(
        scored_dataset_to_compare=azureml_score_model_2.outputs.scored_dataset,
        scored_dataset=azureml_score_model_1.outputs.scored_dataset)
    azureml_evaluate_model_1.runsettings.resource_layout.configure(node_count=1)
    azureml_evaluate_model_1.runsettings.docker_configuration.configure(use_docker=True, shared_volumes=True, shm_size='2g', arguments='[]')

In [None]:
# create a pipeline
pipeline = generated_pipeline()

In [None]:
# validate pipeline and visualize the graph
pipeline.validate()

In [None]:
# submit a pipeline run
# pipeline.submit(experiment_name='sample-experiment-name').wait_for_completion()