In [27]:
from azureml.core import Workspace, Experiment

print("Accessing the workspace form job...")
ws = Workspace.from_config("./config")

print("Accessing the Adult Income dataset...")
input_ds = ws.datasets.get("IncomeTrunc")

Accessing the workspace form job...
Accessing the Adult Income dataset...


In [28]:
from azureml.core import Environment
from azureml.core.environment import CondaDependencies

myenv = Environment(name="Myenviron")
myenv_dep = CondaDependencies.create(conda_packages=['scikit-learn', 'pip', 'pandas'],
                                     pip_packages=['azureml-defaults', 'azureml-interpret'])

myenv.python.conda_dependencies = myenv_dep
print("Registering the environment...")
myenv.register(ws)

Registering the environment...


{
    "assetId": "azureml://locations/westus/workspaces/54b058d1-e724-4986-aece-5eb3256cb2e6/environments/Myenviron/versions/1",
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20240304.v1",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "buildContext": null,
        "enabled": false,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": null
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "Myenviron",
    "python": {


In [29]:
cluster_name = "my-cluster-001"

from azureml.core.compute import AmlCompute
print("Accessing the compute cluster...")

if cluster_name not in ws.compute_targets:
    print("Creating the compute cluster with name: ", cluster_name)
    compute_config = AmlCompute.provisioning_configuration(
                                     vm_size="STANDARD_D11_V2",
                                     max_nodes=2)

    cluster = AmlCompute.create(ws, cluster_name, compute_config)
    cluster.wait_for_completion()
else:
    cluster = ws.compute_targets[cluster_name]
    print(cluster_name, ", compute cluster found. Using it...")

Accessing the compute cluster...
my-cluster-001 , compute cluster found. Using it...


In [30]:
from azureml.core import ScriptRunConfig
print("Creating the ScriptRunConfig....")
script_config = ScriptRunConfig(source_directory=".",
                                script="360 - Model explain script.py",
                                arguments = ['--input-data', input_ds.as_named_input('raw_data')],
                                environment=myenv,
                                compute_target=cluster)

Creating the ScriptRunConfig....


In [31]:
print("Creating the experiment")
new_experiment = Experiment(workspace=ws, name="Explainer_Exp001")

print("Submitting the experiment...")
new_run = new_experiment.submit(config=script_config)

new_run.wait_for_completion(show_output=True)

Creating the experiment
Submitting the experiment...
RunId: Explainer_Exp001_1712522192_b88d3ebd
Web View: https://ml.azure.com/runs/Explainer_Exp001_1712522192_b88d3ebd?wsid=/subscriptions/49e690c2-0fc4-42a6-ba64-9ea0fa04ddda/resourcegroups/AzureMLsdkgroup/workspaces/slavasdkworkspace&tid=77487836-da45-4554-ad65-1775dca67ca5

Streaming user_logs/std_log.txt

{'infer_column_types': 'False', 'activity': 'to_pandas_dataframe'}
{'infer_column_types': 'False', 'activity': 'to_pandas_dataframe', 'activityApp': 'TabularDataset'}
{'infer_column_types': 'False', 'activity': 'to_pandas_dataframe', 'activityApp': 'TabularDataset', 'runId': 'Explainer_Exp001_1712522192_b88d3ebd'}
{'infer_column_types': 'False', 'activity': 'to_pandas_dataframe', 'activityApp': 'TabularDataset', 'runId': 'Explainer_Exp001_1712522192_b88d3ebd', 'run_id': 'Explainer_Exp001_1712522192_b88d3ebd'}
Could not import lightgbm, required if using LGBMExplainableModel
Cleaning up all outstanding Run operations, waiting 300.0

{'runId': 'Explainer_Exp001_1712522192_b88d3ebd',
 'target': 'my-cluster-001',
 'status': 'Completed',
 'startTimeUtc': '2024-04-07T20:37:02.123388Z',
 'endTimeUtc': '2024-04-07T20:37:44.411534Z',
 'services': {},
 'properties': {'_azureml.ComputeTargetType': 'amlctrain',
  '_azureml.ClusterName': 'my-cluster-001',
  'ContentSnapshotId': 'f793bac1-5b6b-4096-8726-a595249258c6',
  'azureml.git.repository_uri': 'https://github.com/CalesSla/Azure-SDK-dp.git',
  'mlflow.source.git.repoURL': 'https://github.com/CalesSla/Azure-SDK-dp.git',
  'azureml.git.branch': 'master',
  'mlflow.source.git.branch': 'master',
  'azureml.git.commit': '53b7ae1ec0822ad3dcc9c3af12ae415546982621',
  'mlflow.source.git.commit': '53b7ae1ec0822ad3dcc9c3af12ae415546982621',
  'azureml.git.dirty': 'True',
  'ProcessInfoFile': 'azureml-logs/process_info.json',
  'ProcessStatusFile': 'azureml-logs/process_status.json'},
 'inputDatasets': [{'dataset': {'id': 'e98bfdd6-06eb-47b2-8698-d762cfb9c047'}, 'consumptionDetails'