<a href="https://colab.research.google.com/github/FranciscoOcampoPredictiva/azureml_course/blob/main/Lecture_8_Automate_Model_Training_in_AzureML_SDK_(Pipeline_Run).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1) Installation and Setup

In [None]:
# Install azureml SDK package
! pip install -q azureml-sdk

In [None]:
# Import the class
from azureml.core import Workspace

In [None]:
# Access the workspace from config file and creating a workspace object
ws = Workspace.from_config(path='/content/config.json')

# 2) Running the Experiment

## Creating the custom environment

In [None]:
from azureml.core import Environment
from azureml.core.environment import CondaDependencies

In [None]:
myenv = Environment(name="MyEnvironment")
myenv.python.user_managed_dependencies = False
myenv.docker.enabled = True



In [None]:
# Creating the dependencies object
myenv_dep = CondaDependencies.create(conda_packages=['scikit-learn','pandas'],
                                     pip_packages=['azureml-sdk'])
myenv.python.conda_dependencies = myenv_dep

In [None]:
# Register the environment to workspace
myenv.register(ws)

{
    "assetId": "azureml://locations/westus/workspaces/20729af0-6f54-4124-b7dd-3c59794d2397/environments/MyEnvironment/versions/1",
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20220729.v1",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "buildContext": null,
        "enabled": true,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": null
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "MyEnvironment",
    "pyth

## Provision the compute cluster

In [None]:
# Specify the name of the cluster
cluster_name = 'Compute-Cluster'

In [None]:
# Import the classes
from azureml.core.compute import AmlCompute, ComputeTarget

In [None]:
# Configuration for the cluster
compute_config = AmlCompute.provisioning_configuration(vm_size='Standard_DS3_v2',
                                                       max_nodes=2)

In [None]:
# Create the compute cluster
compute_cluster = ComputeTarget.create(workspace=ws, name=cluster_name, provisioning_configuration=compute_config)
compute_cluster.wait_for_completion(show_output=True)

InProgress.
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


## Run configuration steps

In [None]:
from azureml.core.runconfig import RunConfiguration
run_config = RunConfiguration()

run_config.target = compute_cluster
run_config.environment = myenv

## Defining the pipeline steps

In [None]:
# Importing the classes
from azureml.pipeline.steps import PythonScriptStep
from azureml.pipeline.core import PipelineData

# Access the dataset
input_ds = ws.datasets.get('Churn Modelling Data SDK')

# Pipeline data object
dataFolder = PipelineData(name='datafolder', datastore=ws.get_default_datastore())

In [None]:
# Step 1- Data Processing
dataPrep_step = PythonScriptStep(name='Data Processing',
                                 source_directory='.',
                                 script_name='data_processing_script.py',
                                 inputs=[input_ds.as_named_input('raw_data')],
                                 outputs=[dataFolder],
                                 runconfig=run_config,
                                 arguments=['--datafolder', dataFolder])

In [None]:
# Step 2 - Training the Model
train_step = PythonScriptStep(name='Model Training',
                                 source_directory='.',
                                 script_name='model_training_script.py',
                                 inputs=[dataFolder],
                                 runconfig=run_config,
                                 arguments=['--datafolder', dataFolder])

## Configure and Build the Pipeline

In [None]:
steps = [dataPrep_step, train_step] # List of step names

from azureml.pipeline.core import Pipeline
new_pipeline = Pipeline(workspace=ws, steps=steps)

## Creating experiment and running the pipeline

In [None]:
# Creating the experiment
from azureml.core import Experiment
new_experiment = Experiment(workspace=ws, name='PipelineExperiment')

# Submit the experiment run
new_pipeline_run = new_experiment.submit(new_pipeline)
new_pipeline_run.wait_for_completion(show_output=True)

Created step Data Processing [def2c3e5][c2a2d3aa-eb4c-4de5-93af-ac26619c1dab], (This step will run and generate new outputs)
Created step Model Training [9f4de622][9c28d934-e8f3-4e19-a173-bf0ea8a87078], (This step will run and generate new outputs)
Submitted PipelineRun 1d4ca0c9-efb2-4842-b0b7-faa82a3d52f8
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/1d4ca0c9-efb2-4842-b0b7-faa82a3d52f8?wsid=/subscriptions/76d4f7e1-3c6a-41a3-968f-043a0cb83503/resourcegroups/azureml-sdk-rg/workspaces/azureml-sdk-ws&tid=aca956d5-1716-486c-9340-ffedc6d009ae
PipelineRunId: 1d4ca0c9-efb2-4842-b0b7-faa82a3d52f8
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/1d4ca0c9-efb2-4842-b0b7-faa82a3d52f8?wsid=/subscriptions/76d4f7e1-3c6a-41a3-968f-043a0cb83503/resourcegroups/azureml-sdk-rg/workspaces/azureml-sdk-ws&tid=aca956d5-1716-486c-9340-ffedc6d009ae
PipelineRun Status: Running


StepRunId: ffa1ee7e-5d49-4f60-b29f-c612d6304b66
Link to Azure Machine Learning Portal: https://ml.

'Finished'