In [2]:
from azureml.core import Workspace, Datastore, Experiment, Environment
from azureml.core.runconfig import RunConfiguration
from azureml.pipeline.steps import PythonScriptStep
from azureml.pipeline.core import Pipeline

In [3]:
pip install azureml-pipeline

Note: you may need to restart the kernel to use updated packages.


In [4]:
# Initialize workspace
ws = Workspace.from_config("Users/mypersonall3099/Online_retail/config.json")

# Define compute target
compute_name = "OnlineRetail"
compute_target = ws.compute_targets[compute_name]

In [5]:
#vm_size = "Standard_E4ds_v4"

# Define environment
env = Environment.from_conda_specification(name="myenv", file_path="environment.yaml")
env.register(workspace=ws)


{
    "assetId": "azureml://locations/eastus2/workspaces/7ce76f32-f5fe-4dd3-b900-96fb92b58ebb/environments/myenv/versions/3",
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20240709.v1",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "buildContext": null,
        "enabled": false,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": null
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "myenv",
    "python": {
       

In [6]:
from azureml.core import Environment

env = Environment.get(workspace=ws, name="myenv")
print(env.python.conda_dependencies.serialize_to_string())


name: myenv
channels:
- defaults
dependencies:
- python=3.7
- scikit-learn
- statsmodels
- pip
- pip:
  - pmdarima
  - azureml-sdk
  - azureml-dataset-runtime[fuse, pandas]
  - joblib
  - scikit-learn



In [7]:
#Declaring environment
# aml_config = RunConfiguration()
# aml_config.target = compute_target 

# #env = Environment.from_conda_specification(name="myenv", file_path="environment.yaml")


# Define run configuration
aml_config = RunConfiguration()
aml_config.target = compute_target
aml_config.environment = env

In [8]:
print(env.environment_variables)

{'EXAMPLE_ENV_VAR': 'EXAMPLE_VALUE'}


In [9]:
try:
    import sklearn
    import joblib
    print("scikit-learn version:", sklearn.__version__)
    print("joblib version:", joblib.__version__)
except ImportError as e:
    print(f"Import error: {e}")


scikit-learn version: 1.5.0
joblib version: 1.4.2


In [98]:
# #Pipeline
# read_data = 'data_wrangling.py'
# prep = 'preprocessing.py'
# model = 'modelling.py'

# #Script initialization
# py_script_run_read = PythonScriptStep(
#                 script_name = read_data,
#                 compute_target = compute_target,
#                 arguments=['--input-data','online_retail_II.csv'],
#                 runconfig = aml_config,
#                 allow_reuse=False)

# py_script_run_prep = PythonScriptStep(
#                 script_name = prep,
#                 compute_target=compute_target,
#                 arguments=['--prep','wranggled.csv'],
#                 runconfig = aml_config,
#                 allow_reuse=False)

# # py_script_run_model = PythonScriptStep(
# #                 script_name = model,
#                 compute_target=compute_target,
#                 arguments=['--train','preprocessed.csv'],
#                 runconfig = aml_config,
#                 allow_reuse=False)

# pipeline_steps = [py_script_run_read, py_script_run_prep, py_script_run_model]
# pipeline_1 = Pipeline(workspace=ws, steps = pipeline_steps)

In [10]:
# Define pipeline steps
read_data = 'data_wrangling.py'
prep = 'preprocessing.py'
model = 'modelling.py'

py_script_run_read = PythonScriptStep(
    script_name=read_data,
    compute_target=compute_target,
    arguments=['--input-data', 'online_retail_II.csv'],
    runconfig=aml_config,
    allow_reuse=False
)

py_script_run_prep = PythonScriptStep(
    script_name=prep,
    compute_target=compute_target,
    arguments=['--prep', 'wrangled.csv'],
    runconfig=aml_config,
    allow_reuse=False
)

py_script_run_model = PythonScriptStep(
    script_name=model,
    compute_target=compute_target,
    arguments=['--train', 'preprocessed.csv'],
    runconfig=aml_config,
    allow_reuse=False
)

# Create and submit pipeline
pipeline_steps = [py_script_run_read, py_script_run_prep, py_script_run_model]
pipeline = Pipeline(workspace=ws, steps=pipeline_steps)


In [12]:
# Experiment

# pipeline_run = Experiment(ws, "First_run_7").submit(pipeline_1)
# pipeline_run.wait_for_completion(show_output=True)

experiment_name = "Run1"
experiment = Experiment(ws, experiment_name)
pipeline_run = experiment.submit(pipeline)
pipeline_run.wait_for_completion(show_output=True)

Submitted PipelineRun 62d5bd6a-9a58-47a9-bf9e-913a99769d5f
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/62d5bd6a-9a58-47a9-bf9e-913a99769d5f?wsid=/subscriptions/d8c4fe8f-f720-48ac-86e8-880576d9cd7a/resourcegroups/OnlineRetail/workspaces/OnlineRetail&tid=f56f1f69-458e-427b-bada-4cba658f7917
PipelineRunId: 62d5bd6a-9a58-47a9-bf9e-913a99769d5f
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/62d5bd6a-9a58-47a9-bf9e-913a99769d5f?wsid=/subscriptions/d8c4fe8f-f720-48ac-86e8-880576d9cd7a/resourcegroups/OnlineRetail/workspaces/OnlineRetail&tid=f56f1f69-458e-427b-bada-4cba658f7917
PipelineRun Status: NotStarted
PipelineRun Status: Running


StepRunId: 48897db4-60a6-491d-8fc0-472c4fc819af
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/48897db4-60a6-491d-8fc0-472c4fc819af?wsid=/subscriptions/d8c4fe8f-f720-48ac-86e8-880576d9cd7a/resourcegroups/OnlineRetail/workspaces/OnlineRetail&tid=f56f1f69-458e-427b-bada-4cba658f7917
StepRun( preprocessing.p

ActivityFailedException: ActivityFailedException:
	Message: Activity Failed:
{
    "error": {
        "code": "UserError",
        "message": "Execution failed. User process '/azureml-envs/azureml_366da3323363019eaa2f3dfe7d52dbd9/bin/python' exited with status code 1. Please check log file 'user_logs/std_log.txt' for error details. Error:   File \"/azureml-envs/azureml_366da3323363019eaa2f3dfe7d52dbd9/lib/python3.7/site-packages/azureml/data/_loggerfactory.py\", line 140, in wrapper\n    return func(*args, **kwargs)\n  File \"/azureml-envs/azureml_366da3323363019eaa2f3dfe7d52dbd9/lib/python3.7/site-packages/azureml/data/dataset_factory.py\", line 354, in from_delimited_files\n    infer_column_types=infer_column_types or _is_inference_required(set_column_types, validate))\n  File \"/azureml-envs/azureml_366da3323363019eaa2f3dfe7d52dbd9/lib/python3.7/site-packages/azureml/data/dataset_factory.py\", line 1189, in _transform_and_validate\n    _validate_has_data(dataflow, 'Failed to validate the data.'\n  File \"/azureml-envs/azureml_366da3323363019eaa2f3dfe7d52dbd9/lib/python3.7/site-packages/azureml/data/dataset_error_handling.py\", line 69, in _validate_has_data\n    raise DatasetValidationError(error_message + '\\n' + e.compliant_message, exception=e)\nazureml.data.dataset_error_handling.DatasetValidationError: DatasetValidationError:\n\tMessage: Failed to validate the data.If data is inaccessible, please set validate to False.\nThe requested stream was not found. Please make sure the request uri is correct.| session_id=d5ae4b03-2b70-49e9-a694-934511f4abf4\n\tInnerException None\n\tErrorResponse \n{\n    \"error\": {\n        \"code\": \"UserError\",\n        \"message\": \"Failed to validate the data.If data is inaccessible, please set validate to False.\\nThe requested stream was not found. Please make sure the request uri is correct.| session_id=d5ae4b03-2b70-49e9-a694-934511f4abf4\"\n    }\n}\n\n",
        "messageParameters": {},
        "details": []
    },
    "time": "0001-01-01T00:00:00.000Z",
    "componentName": "CommonRuntime"
}
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Activity Failed:\n{\n    \"error\": {\n        \"code\": \"UserError\",\n        \"message\": \"Execution failed. User process '/azureml-envs/azureml_366da3323363019eaa2f3dfe7d52dbd9/bin/python' exited with status code 1. Please check log file 'user_logs/std_log.txt' for error details. Error:   File \\\"/azureml-envs/azureml_366da3323363019eaa2f3dfe7d52dbd9/lib/python3.7/site-packages/azureml/data/_loggerfactory.py\\\", line 140, in wrapper\\n    return func(*args, **kwargs)\\n  File \\\"/azureml-envs/azureml_366da3323363019eaa2f3dfe7d52dbd9/lib/python3.7/site-packages/azureml/data/dataset_factory.py\\\", line 354, in from_delimited_files\\n    infer_column_types=infer_column_types or _is_inference_required(set_column_types, validate))\\n  File \\\"/azureml-envs/azureml_366da3323363019eaa2f3dfe7d52dbd9/lib/python3.7/site-packages/azureml/data/dataset_factory.py\\\", line 1189, in _transform_and_validate\\n    _validate_has_data(dataflow, 'Failed to validate the data.'\\n  File \\\"/azureml-envs/azureml_366da3323363019eaa2f3dfe7d52dbd9/lib/python3.7/site-packages/azureml/data/dataset_error_handling.py\\\", line 69, in _validate_has_data\\n    raise DatasetValidationError(error_message + '\\\\n' + e.compliant_message, exception=e)\\nazureml.data.dataset_error_handling.DatasetValidationError: DatasetValidationError:\\n\\tMessage: Failed to validate the data.If data is inaccessible, please set validate to False.\\nThe requested stream was not found. Please make sure the request uri is correct.| session_id=d5ae4b03-2b70-49e9-a694-934511f4abf4\\n\\tInnerException None\\n\\tErrorResponse \\n{\\n    \\\"error\\\": {\\n        \\\"code\\\": \\\"UserError\\\",\\n        \\\"message\\\": \\\"Failed to validate the data.If data is inaccessible, please set validate to False.\\\\nThe requested stream was not found. Please make sure the request uri is correct.| session_id=d5ae4b03-2b70-49e9-a694-934511f4abf4\\\"\\n    }\\n}\\n\\n\",\n        \"messageParameters\": {},\n        \"details\": []\n    },\n    \"time\": \"0001-01-01T00:00:00.000Z\",\n    \"componentName\": \"CommonRuntime\"\n}"
    }
}

In [None]:
# Experiment 2

pipeline_run = Experiment(ws, "Second_run").submit(pipeline)
pipeline_run.wait_for_completion(show_output=True)

In [1]:
from azureml.core import Workspace, Datastore

# Connect to the workspace
ws = Workspace.from_config()

# Get the datastore
datastore = Datastore.get(ws, 'workspaceblobstore')

# Print details
print("Datastore:", datastore.name)
print("Blob Container Name:", datastore.container_name)
print("Storage Account Name:", datastore.account_name)


Datastore: workspaceblobstore
Blob Container Name: azureml-blobstore-7ce76f32-f5fe-4dd3-b900-96fb92b58ebb
Storage Account Name: onlineretail3582987100


In [21]:
from azureml.core import Workspace, Datastore, Dataset
import pandas as pd


data_store_name = 'workspaceblobstore'
container_name = 'azureml-blobstore-7ce76f32-f5fe-4dd3-b900-96fb92b58ebb'
raw_data_file_path = 'online_retail_II.csv' 

datastore = Datastore.get(ws, data_store_name)
dataset = Dataset.Tabular.from_delimited_files(path=[(datastore, raw_data_file_path)], validate=False)
df = dataset.to_pandas_dataframe()
#df = pd.read_csv(raw_data_file_path, encoding='latin1')

