In [None]:
from azure.ml import MLClient
from azure.identity import DefaultAzureCredential

In [None]:
subscription_id = "15ae9cb6-95c1-483d-a0e3-b1a1a3b06324"
resource_group = "ray"
workspace = "ray"

ml_client = MLClient(DefaultAzureCredential(), subscription_id, resource_group, workspace)

In [None]:
ml_client.jobs.

SyntaxError: invalid syntax (868553302.py, line 1)

## Command Job

In [None]:
from azure.ml import command, Input, Output, PyTorchDistribution
from azure.ml.entities import ResourceConfiguration, Environment 
prep = command(
  code='src',
  command=
    "python startDask.py "
    "--script prep-nyctaxi.py "
    "--nyc_taxi_dataset ${{inputs.nyc_taxi_dataset}} "
    "--output_folder ${{outputs.output_folder}}",
  inputs={
    'nyc_taxi_dataset': Input(
        path= 'wasbs://datasets@azuremlexamples.blob.core.windows.net/nyctaxi/',
        mode= 'ro_mount')},
  outputs={
    'output_folder':Output(
      type= 'uri_folder')},
  environment=Environment( 
    image= 'mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04',
    conda_file= 'conda.yml'),
  compute= 'daniel-big',
  resources=ResourceConfiguration(instance_count= 4),
  distribution=PyTorchDistribution(),
  experiment_name= 'dask-nyctaxi-pipeline-example',
  description= 'This sample shows how to run a distributed DASK job on AzureML. The 24GB NYC Taxi dataset is read in CSV format by a 4 node DASK cluster, processed and then written as job output in parquet format.'
)

In [31]:
train = command(
  code= 'src',
  command=
    "python train-xgboost.py "
    "--nyc_taxi_parquet ${{inputs.nyc_taxi_parquet}} "
    "--model ${{outputs.model}} "
    "--tree_method ${{inputs.tree_method}} "
    "--learning_rate ${{inputs.learning_rate}} "
    "--gamma ${{inputs.gamma}} "
    "--max_depth ${{inputs.max_depth}} "
    "--num_boost_round ${{inputs.num_boost_round}} ",
  inputs={
    "nyc_taxi_parquet": Input(
      path='azureml:azureml_polite_loquat_c3x4fj4l4m_output_data_output_folder:1',
      mode='ro_mount'),
    "tree_method": "auto",
    "learning_rate": 0.3,
    "gamma": 1,
    "max_depth": 7,
    "num_boost_round": 20,
  },
  outputs={
    "model": Output(type='mlflow_model')
  },
  environment=Environment(
    image= "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04",
    conda_file= "conda.yml"),
  compute= "daniel-big",
  experiment_name= "dask-nyctaxi-example"
)

In [47]:
from azure.ml import command, Input, Output
from azure.ml.entities import Environment

test = command(
  code= 'src',
  command=
    "cp -r ${{inputs.model_in}}/* ${{outputs.model_out}} ",
  inputs={
    "model_in": Input(
      path='azureml:azureml_yellow_nail_h3t6w6r57k_output_model:1',
      mode='ro_mount')
  },
  outputs={
    "model_out": Output(type='mlflow_model')
  },
  environment=Environment(
    image= "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04"),
  compute= "daniel-big",
  experiment_name= "dask-nyctaxi-example"
)

In [48]:
ml_client.jobs.create_or_update(test)

Experiment,Name,Type,Status,Details Page
dask-nyctaxi-example,loving_avocado_9jzd5msrzn,command,Starting,Link to Azure Machine Learning studio


## Pipeline Job

In [45]:
from azure.ml.dsl import pipeline

@pipeline()
def prep_train_test(dataset: Input):
  prep_job = prep(nyc_taxi_dataset=dataset)
  train_job = train(nyc_taxi_parquet=prep_job.outputs.output_folder,
                    tree_method='auto',
                    learning_rate= 0.3,
                    gamma= 1,
                    max_depth= 7,
                    num_boost_round= 12)
  test_job = test(model_in=train_job.outputs.model)
  return dict(model=test_job.outputs.model_out)

In [46]:
nyc_raw_data = Input(path= 'wasbs://datasets@azuremlexamples.blob.core.windows.net/nyctaxi/')

pipeline_job = prep_train_test(dataset=nyc_raw_data)

ml_client.jobs.create_or_update(pipeline_job)

HttpResponseError: (UserError) Component 628a22ae-a1de-ab0f-b37f-36ce7df1963c has invalid spec, details: In command "cp -r ${{inputs.model}}/* ${{outputs.model}}", input with name 'model' not found.
Code: UserError
Message: Component 628a22ae-a1de-ab0f-b37f-36ce7df1963c has invalid spec, details: In command "cp -r ${{inputs.model}}/* ${{outputs.model}}", input with name 'model' not found.
Additional Information:Type: ComponentName
Info: {
    "value": "managementfrontend"
}Type: Correlation
Info: {
    "value": {
        "operation": "b60e688d701a8b49960dd3bfd6f36be6",
        "request": "d95c31175ad15eaf"
    }
}Type: Environment
Info: {
    "value": "eastus2"
}Type: Location
Info: {
    "value": "eastus2"
}Type: Time
Info: {
    "value": "2022-05-01T15:13:23.2077451+00:00"
}Type: InnerError
Info: {
    "value": {
        "code": "BadArgument",
        "innerError": {
            "code": "ArgumentInvalid",
            "innerError": {
                "code": "InvalidComponent",
                "innerError": {
                    "code": "InvalidComponentSpec",
                    "innerError": null
                }
            }
        }
    }
}

## Sweep Job

In [None]:
from azure.ml.dsl import pipeline
from azure.ml.sweep import Choice, Uniform

@pipeline()
def prep_sweep_test(dataset: Input):
  prep_job = prep(nyc_taxi_dataset=dataset)

  train_job = train(nyc_taxi_parquet=prep_job.outputs.output_folder,
                    tree_method=Choice(['auto', 'exact', 'approx', 'hist']),
                    learning_rate=Uniform(0, 1),
                    gamma= Choice(range(7)),
                    max_depth= Choice(range(4,8)),
                    num_boost_round= 20,)

  sweep_job = train_job.sweep(primary_metric='test-rmse',
                              goal='minimize',
                              sampling_algorithm='bayesian',
                              compute='daniel-big')

  sweep_job.set_limits(max_concurrent_trials=5,
                      max_total_trials=25)

  test_job = test(model=sweep_job.outputs.model)
  return {}

In [None]:
nyc_raw_data = Input(path= 'wasbs://datasets@azuremlexamples.blob.core.windows.net/nyctaxi/')

pipeline_job = prep_sweep_test(dataset=nyc_raw_data)

ml_client.jobs.create_or_update(pipeline_job)

Experiment,Name,Type,Status,Details Page
nyctaxi,sad_candle_bmxm6nnv0w,pipeline,Preparing,Link to Azure Machine Learning studio


## Standalone Sweep Job

In [None]:
from azure.ml.sweep import Choice, Uniform, LogUniform

train_job = train(tree_method=Choice(['auto', 'exact', 'approx', 'hist']),
                  learning_rate=Uniform(0, 1),
                  gamma= Choice(range(7)),
                  max_depth= Choice(range(4,8)),
                  num_boost_round= 20,)

In [None]:
from azure.ml.sweep import BayesianSamplingAlgorithm

sweep_job = train_job.sweep(primary_metric='test-rmse',
                            goal='minimize',
                            sampling_algorithm=BayesianSamplingAlgorithm(),
                            compute='daniel-big')

sweep_job.set_limits(max_concurrent_trials=5,
                     max_total_trials=25)


In [None]:
ml_client.jobs.create_or_update(sweep_job)

AssetException: Error with code: /home/azureuser/localfiles/git/azureml-examples/tutorials/e2e-dask-sweep/sdfsjl not found, local_path or path must be a path to a file or directory.

In [None]:
foo = ml_client.jobs.get('14aff5b3-fc20-4d54-8350-074be94d0951')

In [None]:
sweep2 = foo(tree_method=Choice(['auto', 'exact', 'approx', 'hist']),
              learning_rate=Uniform(0, 1),
              gamma= Choice(range(7)),
              max_depth= Choice(range(4,8)),
              num_boost_round= 20,)
sweep2.goal='minimize'



TypeError: 'SweepJob' object is not callable

In [None]:
from azure.ml.dsl import pipeline
from azure.ml.sweep import Choice, Uniform

@pipeline()
def just_sweep(parquet_dataset: Input):

  train_job = train(nyc_taxi_parquet=parquet_dataset,
                    tree_method=Choice(['auto', 'exact', 'approx', 'hist']),
                    learning_rate=Uniform(0, 1),
                    gamma= Choice(range(7)),
                    max_depth= Choice(range(4,8)),
                    num_boost_round= 20,)

  sweep_job = train_job.sweep(primary_metric='test-rmse',
                              goal='minimize',
                              sampling_algorithm='bayesian',
                              compute='daniel-big')

  sweep_job.set_limits(max_concurrent_trials=5,
                      max_total_trials=25)

  
  return dict(model=sweep_job.outputs.model)


nyc_parquet = Input(path='azureml:azureml_polite_loquat_c3x4fj4l4m_output_data_output_folder:1')

pipeline_job = just_sweep(parquet_dataset=nyc_parquet)

ml_client.jobs.create_or_update(pipeline_job)

Experiment,Name,Type,Status,Details Page
nyctaxi,happy_bread_2qlqn224rp,pipeline,Preparing,Link to Azure Machine Learning studio


## Test Sweep Output

In [None]:
from azure.ml import command, Input
from azure.ml.entities import Environment

test = command(
  code= 'src',
  command=
    "find ${{inputs.model}} ",
  inputs={
    "model": Input(
      path='azureml:azureml_yellow_nail_h3t6w6r57k_output_model:1',
      mode='ro_mount')
  },
  environment=Environment(
    image= "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04"),
  compute= "daniel-big",
  experiment_name= "dask-nyctaxi-example"
)

In [None]:
ml_client.jobs.create_or_update(test)

Experiment,Name,Type,Status,Details Page
dask-nyctaxi-example,affable_oxygen_y7nm88zsld,command,Starting,Link to Azure Machine Learning studio


In [None]:
from azure.ml.dsl import pipeline
from azure.ml.sweep import Choice, Uniform

@pipeline()
def sweep_and_test(parquet_dataset: Input):

  train_job = train(nyc_taxi_parquet=parquet_dataset,
                    tree_method=Choice(['auto', 'exact', 'approx', 'hist']),
                    learning_rate=Uniform(0, 1),
                    gamma= Choice(range(7)),
                    max_depth= Choice(range(4,8)),
                    num_boost_round= 20,)

  sweep_job = train_job.sweep(primary_metric='test-rmse',
                              goal='minimize',
                              sampling_algorithm='bayesian',
                              compute='daniel-big')

  sweep_job.set_limits(max_concurrent_trials=5,
                      max_total_trials=25)

  test_job = test(model=sweep_job.outputs.model)
  
  return dict()


nyc_parquet = Input(path='azureml:azureml_polite_loquat_c3x4fj4l4m_output_data_output_folder:1')

pipeline_job = sweep_and_test(parquet_dataset=nyc_parquet)

ml_client.jobs.create_or_update(pipeline_job)

Experiment,Name,Type,Status,Details Page
nyctaxi,sharp_cassava_5d3nv4qkmd,pipeline,Preparing,Link to Azure Machine Learning studio


In [None]:
import pandas as pd 

df = pd.DataFrame({'a':[1,2,3]})

In [None]:
df

Unnamed: 0,a
0,1
1,2
2,3
