In [1]:
from azureml.core import Workspace, Datastore, Experiment, Environment
from azureml.core.runconfig import RunConfiguration
from azureml.pipeline.steps import PythonScriptStep
from azureml.pipeline.core import Pipeline

In [2]:
pip install azureml-pipeline

Note: you may need to restart the kernel to use updated packages.


In [3]:
# Initialize workspace
ws = Workspace.from_config("Users/mypersonall3099/Online_retail/config.json")

# Define compute target
compute_name = "OnlineRetail"
compute_target = ws.compute_targets[compute_name]

In [4]:
#vm_size = "Standard_E4ds_v4"

# Define environment
env = Environment.from_conda_specification(name="myenv", file_path="environment.yaml")
env.register(workspace=ws)


{
    "assetId": "azureml://locations/eastus2/workspaces/7ce76f32-f5fe-4dd3-b900-96fb92b58ebb/environments/myenv/versions/3",
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20240709.v1",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "buildContext": null,
        "enabled": false,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": null
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "myenv",
    "python": {
       

In [6]:
from azureml.core import Environment

env = Environment.get(workspace=ws, name="myenv")
print(env.python.conda_dependencies.serialize_to_string())


name: myenv
channels:
- defaults
dependencies:
- python=3.7
- scikit-learn
- statsmodels
- pip
- pip:
  - pmdarima
  - azureml-sdk
  - azureml-dataset-runtime[fuse, pandas]
  - joblib
  - scikit-learn



In [7]:
#Declaring environment
# aml_config = RunConfiguration()
# aml_config.target = compute_target 

# #env = Environment.from_conda_specification(name="myenv", file_path="environment.yaml")


# Define run configuration
aml_config = RunConfiguration()
aml_config.target = compute_target
aml_config.environment = env

In [8]:
print(env.environment_variables)

{'EXAMPLE_ENV_VAR': 'EXAMPLE_VALUE'}


In [9]:
try:
    import sklearn
    import joblib
    print("scikit-learn version:", sklearn.__version__)
    print("joblib version:", joblib.__version__)
except ImportError as e:
    print(f"Import error: {e}")


scikit-learn version: 1.5.0
joblib version: 1.4.2


In [21]:
#Pipeline
read_data = 'data_wrangling.py'
prep = 'preprocessing.py'
model = 'modelling.py'

#Script initialization
py_script_run_read = PythonScriptStep(
                script_name = read_data,
                compute_target = compute_target,
                arguments=['--input-data','online_retail_utf8.csv'],
                runconfig = aml_config,
                allow_reuse=False)

py_script_run_prep = PythonScriptStep(
                script_name = prep,
                compute_target=compute_target,
                arguments=['--prep','wranggled.csv'],
                runconfig = aml_config,
                allow_reuse=False)

py_script_run_model = PythonScriptStep(
                script_name = model,
                compute_target=compute_target,
                arguments=['--train','preprocessed.csv'],
                runconfig = aml_config,
                allow_reuse=False)

pipeline_steps = [py_script_run_read, py_script_run_prep, py_script_run_model]
pipeline_1 = Pipeline(workspace=ws, steps = pipeline_steps)

In [22]:
# # Define pipeline steps
# read_data = 'data_wrangling.py'
# prep = 'preprocessing.py'
# model = 'modelling.py'

# py_script_run_read = PythonScriptStep(
#     script_name=read_data,
#     compute_target=compute_target,
#     arguments=['--input-data', 'online_retail_utf8.csv'],
#     runconfig=aml_config,
#     allow_reuse=False
# )

# py_script_run_prep = PythonScriptStep(
#     script_name=prep,
#     compute_target=compute_target,
#     arguments=['--prep', 'wrangled.csv'],
#     runconfig=aml_config,
#     allow_reuse=False
# )

# py_script_run_model = PythonScriptStep(
#     script_name=model,
#     compute_target=compute_target,
#     arguments=['--train', 'preprocessed.csv'],
#     runconfig=aml_config,
#     allow_reuse=False
# )

# # Create and submit pipeline
# pipeline_steps = [py_script_run_read, py_script_run_prep, py_script_run_model]
# pipeline = Pipeline(workspace=ws, steps=pipeline_steps)


In [23]:
# Experiment

# pipeline_run = Experiment(ws, "First_run_7").submit(pipeline_1)
# pipeline_run.wait_for_completion(show_output=True)

experiment_name = "Run3"
experiment = Experiment(ws, experiment_name)
pipeline_run = experiment.submit(pipeline_1)
pipeline_run.wait_for_completion(show_output=True)

Created step data_wrangling.py [68164aab][5038f9d7-cfda-4cfe-a252-c8a716d89e23], (This step will run and generate new outputs)
Created step preprocessing.py [2995ba0a][8ed19844-29a6-4c9d-aaf0-335ac9c1a44e], (This step will run and generate new outputs)
Created step modelling.py [52ea4d6b][73e20fa1-88a4-40f1-8fec-24d33f64d482], (This step will run and generate new outputs)
Submitted PipelineRun 1f79c964-efa3-4e29-803c-0f80dfee5b9a
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/1f79c964-efa3-4e29-803c-0f80dfee5b9a?wsid=/subscriptions/d8c4fe8f-f720-48ac-86e8-880576d9cd7a/resourcegroups/OnlineRetail/workspaces/OnlineRetail&tid=f56f1f69-458e-427b-bada-4cba658f7917
PipelineRunId: 1f79c964-efa3-4e29-803c-0f80dfee5b9a
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/1f79c964-efa3-4e29-803c-0f80dfee5b9a?wsid=/subscriptions/d8c4fe8f-f720-48ac-86e8-880576d9cd7a/resourcegroups/OnlineRetail/workspaces/OnlineRetail&tid=f56f1f69-458e-427b-bada-4cba658f7917
PipelineRu

ActivityFailedException: ActivityFailedException:
	Message: Activity Failed:
{
    "error": {
        "code": "UserError",
        "message": "Pipeline has failed child jobs. For more details and logs, please go to the job detail page and check the child jobs.",
        "messageFormat": "Pipeline has failed child jobs. {0}",
        "messageParameters": {},
        "referenceCode": "PipelineHasStepJobFailed",
        "details": []
    },
    "environment": "eastus2",
    "location": "eastus2",
    "time": "2024-08-18T08:37:15.681433Z",
    "componentName": ""
}
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Activity Failed:\n{\n    \"error\": {\n        \"code\": \"UserError\",\n        \"message\": \"Pipeline has failed child jobs. For more details and logs, please go to the job detail page and check the child jobs.\",\n        \"messageFormat\": \"Pipeline has failed child jobs. {0}\",\n        \"messageParameters\": {},\n        \"referenceCode\": \"PipelineHasStepJobFailed\",\n        \"details\": []\n    },\n    \"environment\": \"eastus2\",\n    \"location\": \"eastus2\",\n    \"time\": \"2024-08-18T08:37:15.681433Z\",\n    \"componentName\": \"\"\n}"
    }
}

In [None]:
# Experiment 2

pipeline_run = Experiment(ws, "Second_run").submit(pipeline_1)
pipeline_run.wait_for_completion(show_output=True)