In [15]:
from azureml.core import Workspace, Dataset, Datastore
import pandas as pd
import numpy as np
import os
from datetime import datetime, date, timedelta
import sklearn
import joblib
import math
from azureml.core.authentication import InteractiveLoginAuthentication
from azureml.core import Run
import azureml.core
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.compute import ComputeTarget
from azureml.pipeline.steps import PythonScriptStep
from azureml.core import Experiment, Environment
from azureml.core.runconfig import RunConfiguration
from azureml.pipeline.core import Pipeline

In [16]:
ws = Workspace.from_config()

In [17]:
vm_szie = "Standard_DS3_v2"
compute_name = "mlopscompute"
compute_target = ws.compute_targets[compute_name]

In [18]:
#declare environment
aml_config = RunConfiguration()
aml_config.target = compute_target

USE_CURATEDENV = True
if USE_CURATEDENV:
    curated_env = Environment.get(workspace= ws,
    name = "AzureML-sklearn-0.24-ubuntu18.04-py37-cpu")
    aml_config.environment = curated_env
else:
    aml_config.environment.python.user_managed_dependencies = False
    aml_config.environment.python.conda_dependencies = CondaDependencies.create(
        conda_packages=['pandas', 'sckit-learn'],
        pip_packages=['azureml-sdk', 'azureml-dataset-runtime[fuse, pandas]', 'seaborn'],
        pin_sdk_version=False
    )

In [19]:
import os

scripts = ["data_wrangling.py", "preprocessing.py", "modeling.py"]
for script in scripts:
    if os.path.exists(script):
        print(f"{script} exists.")
    else:
        print(f"{script} does not exist.")


data_wrangling.py exists.
preprocessing.py exists.
modeling.py exists.


In [20]:

from azureml.core import Workspace, Dataset, Datastore

subscription_id = '7b8ef4c6-77cc-453a-81db-0c0c47f97eca'
resource_group = 'mlops-learn'
workspace_name = 'mlops-wp'

workspace = Workspace(subscription_id, resource_group, workspace_name)

datastore = Datastore.get(workspace, "workspaceblobstore")
dataset = Dataset.Tabular.from_delimited_files(path=(datastore, 'diabetes.csv'))
df = dataset.to_pandas_dataframe()
df.head()

{'infer_column_types': 'False', 'activity': 'to_pandas_dataframe'}
{'infer_column_types': 'False', 'activity': 'to_pandas_dataframe', 'activityApp': 'TabularDataset'}


Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,True
1,1,85,66,29,0,26.6,0.351,31,False
2,8,183,64,0,0,23.3,0.672,32,True
3,1,89,66,23,94,28.1,0.167,21,False
4,0,137,40,35,168,43.1,2.288,33,True


In [21]:
#pipelines
read_data = "data_wrangling.py"
prep = "preprocessing.py"
model = "modeling.py"

py_script_read_run = PythonScriptStep(
    script_name=read_data,
    compute_target=compute_target,
    arguments=['--input-data', "diabetes.csv"],
    runconfig=aml_config,
    allow_reuse=False
)

py_script_prep_run = PythonScriptStep(
    script_name=prep,
    compute_target=compute_target,
    arguments=['--prep', "wrangled.csv"],
    runconfig=aml_config,
    allow_reuse=False
)

py_script_model_run = PythonScriptStep(
    script_name=model,
    compute_target=compute_target,
    arguments=['--train', "preprocessed.csv"],
    runconfig=aml_config,
    allow_reuse=False
)

pipeline_step = [py_script_read_run, py_script_prep_run, py_script_model_run]
pipeline1 = Pipeline(workspace=ws, steps=[pipeline_step])

In [13]:
from azureml.core import Workspace, Datastore

# Connect to the workspace
ws = Workspace.from_config()

# Retrieve the datastore
datastore = Datastore.get(ws, datastore_name='workspaceblobstore')
print(f"Datastore {datastore.name} retrieved successfully.")


Datastore workspaceblobstore retrieved successfully.


In [17]:
datastore

{
  "name": "workspaceblobstore",
  "container_name": "azureml-blobstore-f3092cd2-e464-4473-8d27-80b38c751698",
  "account_name": "blobstoreagemlops",
  "protocol": "https",
  "endpoint": "core.windows.net"
}

In [22]:
#Experiment1
pipeline_run1 = Experiment(ws, "First_run_exp").submit(pipeline1)
pipeline_run1.wait_for_completion(show_output=True)

Created step data_wrangling.py [167b1e4e][8713c338-7c8b-45c2-82e1-48c11b43ce57], (This step will run and generate new outputs)
Created step preprocessing.py [ee19fea7][e0c111b0-259c-4f30-8327-2488567f310c], (This step will run and generate new outputs)
Created step modeling.py [a6c88c0c][161214b3-f1ac-4e23-9099-d93a30d4327a], (This step will run and generate new outputs)
Submitted PipelineRun 9ebf337a-4277-4433-b6cb-4b1852aa0445
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/9ebf337a-4277-4433-b6cb-4b1852aa0445?wsid=/subscriptions/7b8ef4c6-77cc-453a-81db-0c0c47f97eca/resourcegroups/mlops-learn/workspaces/mlops-wp&tid=7309352a-ba9a-4107-91a8-52826b57ccd6
PipelineRunId: 9ebf337a-4277-4433-b6cb-4b1852aa0445
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/9ebf337a-4277-4433-b6cb-4b1852aa0445?wsid=/subscriptions/7b8ef4c6-77cc-453a-81db-0c0c47f97eca/resourcegroups/mlops-learn/workspaces/mlops-wp&tid=7309352a-ba9a-4107-91a8-52826b57ccd6
PipelineRun Status: N

'Finished'