In [5]:
from azureml.core import Workspace
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

# Access the Workspace
ws = Workspace.from_config("./config")

In [6]:
# Create custom environment
from azureml.core import Environment
from azureml.core.environment import CondaDependencies

myenv = Environment(name="MyEnvironmentAuto")
myenv_dep = CondaDependencies.create(conda_packages=['scikit-learn', 'pandas'])
myenv.python.conda_dependencies = myenv_dep
myenv.register(ws)

{
    "assetId": "azureml://locations/westus/workspaces/54b058d1-e724-4986-aece-5eb3256cb2e6/environments/MyEnvironmentAuto/versions/2",
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20240304.v1",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "buildContext": null,
        "enabled": false,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": null
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "MyEnvironmentAuto",


In [7]:
# Create a compute cluster for pipeline

cluster_name = "pipeline-cluster"

from azureml.core.compute import AmlCompute, ComputeTarget
compute_config = AmlCompute.provisioning_configuration(
    vm_size="STANDARD_D11_V2",
    max_nodes=2
)

compute_cluster = ComputeTarget.create(ws, cluster_name, compute_config)
compute_cluster.wait_for_completion()

Provisioning operation finished, operation "Succeeded"


In [8]:
# Define Pipeline steps

# Create Run Configurations for the steps
from azureml.core.runconfig import RunConfiguration
run_config = RunConfiguration()
run_config.target = compute_cluster
run_config.environment = myenv

# Define inputs and outputs
from azureml.pipeline.steps import PythonScriptStep
from azureml.pipeline.core import PipelineData
input_ds = ws.datasets.get("Defaults")
dataFolder = PipelineData("datafolder", datastore=ws.get_default_datastore())


# Step 01 - Data Preparation
dataPrep_step = PythonScriptStep(name="01 Data Preparation",
                                 source_directory=".",
                                 script_name="220 - Dataprep Pipeline.py",
                                 inputs=[input_ds.as_named_input("raw_data")],
                                 outputs=[dataFolder],
                                 runconfig=run_config,
                                 arguments=["--datafolder", dataFolder])

# Step 02 - Train the model
train_step = PythonScriptStep(name="02 Train the Model",
                                 source_directory=".",
                                 script_name="220 - Training Pipeline.py",
                                 inputs=[dataFolder],
                                 runconfig=run_config,
                                 arguments=["--datafolder", dataFolder])

# Configure and build the pipeline
from azureml.pipeline.core import Pipeline
steps = [dataPrep_step, train_step]
new_pipeline = Pipeline(workspace=ws, steps=steps)


# Create the experiment and run the pipeline
from azureml.core import Experiment

new_experiment = Experiment(workspace=ws, name="PipelineExp01")
new_pipeline_run = new_experiment.submit(new_pipeline)
new_pipeline_run.wait_for_completion(show_output=True)

Created step 01 Data Preparation [b7f8994f][afa1209e-7c29-41fb-8ff5-bbd27e8036d6], (This step will run and generate new outputs)
Created step 02 Train the Model [556c447f][bec7298e-ca10-4989-b6f3-602f5771bc92], (This step will run and generate new outputs)
Submitted PipelineRun 0935b6ed-10c5-42e3-936d-4eef8fa82e1f
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/0935b6ed-10c5-42e3-936d-4eef8fa82e1f?wsid=/subscriptions/49e690c2-0fc4-42a6-ba64-9ea0fa04ddda/resourcegroups/AzureMLsdkgroup/workspaces/slavasdkworkspace&tid=77487836-da45-4554-ad65-1775dca67ca5
PipelineRunId: 0935b6ed-10c5-42e3-936d-4eef8fa82e1f
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/0935b6ed-10c5-42e3-936d-4eef8fa82e1f?wsid=/subscriptions/49e690c2-0fc4-42a6-ba64-9ea0fa04ddda/resourcegroups/AzureMLsdkgroup/workspaces/slavasdkworkspace&tid=77487836-da45-4554-ad65-1775dca67ca5
PipelineRun Status: Running


StepRunId: 99dcc163-3488-4096-9d34-b6501fb0e94d
Link to Azure Machine Learning Por

Ran into a deserialization error. Ignoring since this is failsafe deserialization
Traceback (most recent call last):
  File "c:\Users\User\anaconda3\envs\newtensorflow\lib\site-packages\msrest\serialization.py", line 1509, in failsafe_deserialize
    return self(target_obj, data, content_type=content_type)
  File "c:\Users\User\anaconda3\envs\newtensorflow\lib\site-packages\msrest\serialization.py", line 1375, in __call__
    data = self._unpack_content(response_data, content_type)
  File "c:\Users\User\anaconda3\envs\newtensorflow\lib\site-packages\msrest\serialization.py", line 1543, in _unpack_content
    raise ValueError("This pipeline didn't have the RawDeserializer policy; can't deserialize")
ValueError: This pipeline didn't have the RawDeserializer policy; can't deserialize
Ran into a deserialization error. Ignoring since this is failsafe deserialization
Traceback (most recent call last):
  File "c:\Users\User\anaconda3\envs\newtensorflow\lib\site-packages\msrest\serialization.p

StepRun( 01 Data Preparation ) Status: Running


Ran into a deserialization error. Ignoring since this is failsafe deserialization
Traceback (most recent call last):
  File "c:\Users\User\anaconda3\envs\newtensorflow\lib\site-packages\msrest\serialization.py", line 1509, in failsafe_deserialize
    return self(target_obj, data, content_type=content_type)
  File "c:\Users\User\anaconda3\envs\newtensorflow\lib\site-packages\msrest\serialization.py", line 1375, in __call__
    data = self._unpack_content(response_data, content_type)
  File "c:\Users\User\anaconda3\envs\newtensorflow\lib\site-packages\msrest\serialization.py", line 1543, in _unpack_content
    raise ValueError("This pipeline didn't have the RawDeserializer policy; can't deserialize")
ValueError: This pipeline didn't have the RawDeserializer policy; can't deserialize
Ran into a deserialization error. Ignoring since this is failsafe deserialization
Traceback (most recent call last):
  File "c:\Users\User\anaconda3\envs\newtensorflow\lib\site-packages\msrest\serialization.p

ExperimentExecutionException: ExperimentExecutionException:
	Message: The output streaming for the run interrupted.
But the run is still executing on the compute target. 
Details for canceling the run can be found here: https://aka.ms/aml-docs-cancel-run
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "The output streaming for the run interrupted.\nBut the run is still executing on the compute target. \nDetails for canceling the run can be found here: https://aka.ms/aml-docs-cancel-run"
    }
}