# Experiment with parameters for a Ridge Regression Model on the Diabetes Dataset

This notebook is for experimenting with different parameters to train a ridge regression model on the Diabetes dataset.

In [1]:
# Change out of the experimentation directory
%cd ..

C:\Users\brysmith\Source\Repos\MLOpsPython


In [2]:
import azureml.core
from azureml.core import Workspace

In [3]:
# Load the workspace from the saved config file
ws = Workspace.from_config()

In [14]:
import os, shutil

# Create a folder for the experiment files
training_folder = 'diabetes-training'
os.makedirs(training_folder, exist_ok=True)

# Copy the data file into the experiment folder
shutil.copy('data/diabetes.csv', os.path.join(training_folder, "diabetes.csv"))

# Copy the train functions into the experiment folder
shutil.copy('diabetes_regression/training/train.py', os.path.join(training_folder, "train.py"))

'diabetes-training\\train.py'

In [6]:
%%writefile $training_folder/parameters.json
{
    "training":
    {
        "alpha": 0.3
    },
    "evaluation":
    {

    },
    "scoring":
    {
        
    }
}


Overwriting diabetes-training/parameters.json


In [26]:
%%writefile $training_folder/diabetes_training.py
# Import libraries
from azureml.core import Run
import pandas as pd
import shutil
import joblib

from train import split_data, train_model

# Get parameters
parser = argparse.ArgumentParser()
parser.add_argument('--output_folder', type=str, dest='output_folder', default="diabetes_model", help='output folder')
args = parser.parse_args()
output_folder = args.output_folder

# Get the experiment run context
run = Run.get_context()

# load the diabetes dataset
print("Loading Data...")
train_df = pd.read_csv('diabetes.csv')

data = split_data(train_df)

# Specify the parameters to test
with open("parameters.json") as f:
    pars = json.load(f)
    train_args = pars["training"]

# Log parameters
for k, v in train_args.items():
    run.log(k, v)

model, metrics = train_model(data, train_args)

# Log metrics
for k, v in metrics.items():
    run.log(k, v)

# Save the parameters file to the outputs folder
os.makedirs(output_folder, exist_ok=True)
shutil.copy('parameters.json', os.path.join(output_folder, 'parameters.json'))
joblib.dump(value=model, filename= output_folder + "/model.pkl")
    
run.complete()

Overwriting diabetes-training/diabetes_training.py


In [23]:
%%writefile $training_folder/register_diabetes.py
# Import libraries
import argparse
import joblib
from azureml.core import Workspace, Model, Run

# Get parameters
parser = argparse.ArgumentParser()
parser.add_argument('--model_folder', type=str, dest='model_folder', default="diabetes_model", help='model location')
args = parser.parse_args()
model_folder = args.model_folder

# Get the experiment run context
run = Run.get_context()

# load the model
print("Loading model from " + model_folder)
model_file = model_folder + "/model.pkl"
model = joblib.load(model_file)

Model.register(workspace=run.experiment.workspace,
               model_path = model_file,
               model_name = 'diabetes_model',
               tags={'Training context':'Pipeline'})

run.complete()

Overwriting diabetes-training/register_diabetes.py


In [9]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cluster_name = "aml-cluster"

# Verify that cluster exists
try:
    pipeline_cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    # If not, create it
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2',
                                                           max_nodes=4,
                                                           idle_seconds_before_scaledown=1800)
    pipeline_cluster = ComputeTarget.create(ws, cluster_name, compute_config)

pipeline_cluster.wait_for_completion(show_output=True)

Creating
Succeeded
AmlCompute wait for completion finished
Minimum number of nodes requested have been provisioned


In [10]:
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import RunConfiguration

# Create a Python environment for the experiment
diabetes_env = Environment("diabetes-pipeline-env")
diabetes_env.python.user_managed_dependencies = False # Let Azure ML manage dependencies
diabetes_env.docker.enabled = True # Use a docker container

# Create a set of package dependencies
diabetes_packages = CondaDependencies.create(conda_packages=['scikit-learn','pandas'],
                                             pip_packages=['azureml-sdk'])

# Add the dependencies to the environment
diabetes_env.python.conda_dependencies = diabetes_packages

# Register the environment (just in case you want to use it again)
diabetes_env.register(workspace=ws)
registered_env = Environment.get(ws, 'diabetes-pipeline-env')

# Create a new runconfig object for the pipeline
pipeline_run_config = RunConfiguration()

# Use the compute you created above. 
pipeline_run_config.target = pipeline_cluster

# Assign the environment to the run configuration
pipeline_run_config.environment = registered_env

print ("Run configuration created.")

Run configuration created.


In [27]:
from azureml.pipeline.core import PipelineData
from azureml.pipeline.steps import PythonScriptStep, EstimatorStep
from azureml.train.estimator import Estimator

# Get the training dataset
#diabetes_ds = ws.datasets.get("diabetes dataset")

# Create a PipelineData (temporary Data Reference) for the model folder
model_folder = PipelineData("model_folder", datastore=ws.get_default_datastore())

estimator = Estimator(source_directory=training_folder,
                        compute_target = pipeline_cluster,
                        environment_definition=pipeline_run_config.environment,
                        entry_script='diabetes_training.py')

# Step 1, run the estimator to train the model
train_step = EstimatorStep(name = "Train Model",
                           estimator=estimator, 
                           estimator_entry_script_arguments=['--output_folder', model_folder],
                           outputs=[model_folder],
                           compute_target = pipeline_cluster,
                           allow_reuse = True)

# Step 2, run the model registration script
register_step = PythonScriptStep(name = "Register Model",
                                source_directory = training_folder,
                                script_name = "register_diabetes.py",
                                arguments = ['--model_folder', model_folder],
                                inputs=[model_folder],
                                compute_target = pipeline_cluster,
                                runconfig = pipeline_run_config,
                                allow_reuse = True)

print("Pipeline steps defined")

Pipeline steps defined


In [28]:
from azureml.core import Experiment
from azureml.pipeline.core import Pipeline
from azureml.widgets import RunDetails

# Construct the pipeline
pipeline_steps = [train_step, register_step]
pipeline = Pipeline(workspace = ws, steps=pipeline_steps)
print("Pipeline is built.")

# Create an experiment and run the pipeline
experiment = Experiment(workspace = ws, name = 'diabetes-training-pipeline')
pipeline_run = experiment.submit(pipeline, regenerate_outputs=True)
print("Pipeline submitted for execution.")

RunDetails(pipeline_run).show()
pipeline_run.wait_for_completion()



Pipeline is built.
Created step Train Model [f5c73851][18728f1f-bc86-4767-bfc2-3aa1032c0ba9], (This step will run and generate new outputs)
Created step Register Model [98ff322a][a35e33b1-fab1-45bc-8f1e-4e9df587d639], (This step will run and generate new outputs)
Submitted PipelineRun 61cc0698-933d-4a1b-954d-d3c0045f6dbb
Link to Azure Portal: https://mlworkspace.azure.ai/portal/subscriptions/48d404f6-3a69-4552-a210-b1afe5537cc1/resourceGroups/mlopsohrg/providers/Microsoft.MachineLearningServices/workspaces/mlopsoh-ws/experiments/diabetes-training-pipeline/runs/61cc0698-933d-4a1b-954d-d3c0045f6dbb
Pipeline submitted for execution.


_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

PipelineRunId: 61cc0698-933d-4a1b-954d-d3c0045f6dbb
Link to Portal: https://mlworkspace.azure.ai/portal/subscriptions/48d404f6-3a69-4552-a210-b1afe5537cc1/resourceGroups/mlopsohrg/providers/Microsoft.MachineLearningServices/workspaces/mlopsoh-ws/experiments/diabetes-training-pipeline/runs/61cc0698-933d-4a1b-954d-d3c0045f6dbb
PipelineRun Status: NotStarted
PipelineRun Status: Running


StepRunId: d6c05b43-8582-4e6c-aee9-0196ffc64634
Link to Portal: https://mlworkspace.azure.ai/portal/subscriptions/48d404f6-3a69-4552-a210-b1afe5537cc1/resourceGroups/mlopsohrg/providers/Microsoft.MachineLearningServices/workspaces/mlopsoh-ws/experiments/diabetes-training-pipeline/runs/d6c05b43-8582-4e6c-aee9-0196ffc64634
StepRun( Train Model ) Status: NotStarted
StepRun( Train Model ) Status: Running

Streaming azureml-logs/55_azureml-execution-tvmps_058956e2b18c5cbe96f17d2694f3bc08560b1d64dc64a8d98214df439a8dd3c6_d.txt
2020-03-03T21:45:38Z Starting output-watcher...
2020-03-03T21:45:38Z IsDedicatedComput




StepRunId: 49dcaa65-8f24-452e-99bd-4b458f9aa95b
Link to Portal: https://mlworkspace.azure.ai/portal/subscriptions/48d404f6-3a69-4552-a210-b1afe5537cc1/resourceGroups/mlopsohrg/providers/Microsoft.MachineLearningServices/workspaces/mlopsoh-ws/experiments/diabetes-training-pipeline/runs/49dcaa65-8f24-452e-99bd-4b458f9aa95b
StepRun( Register Model ) Status: NotStarted
StepRun( Register Model ) Status: Running

Streaming azureml-logs/55_azureml-execution-tvmps_058956e2b18c5cbe96f17d2694f3bc08560b1d64dc64a8d98214df439a8dd3c6_d.txt
2020-03-03T21:47:09Z Starting output-watcher...
2020-03-03T21:47:09Z IsDedicatedCompute == True, won't poll for Low Pri Preemption
Login Succeeded
Using default tag: latest
latest: Pulling from azureml/azureml_d98378851aa287fc3ea388278015dcf6
Digest: sha256:e3df99acf8c13db41600df4c75a4cc312d3974c8abffe335545d6ef675bf8022
Status: Image is up to date for mlopsohws46ec5f38.azurecr.io/azureml/azureml_d98378851aa287fc3ea388278015dcf6:latest
4d22500e0304c424c9cb39e8e



PipelineRun Execution Summary
PipelineRun Status: Finished
{'runId': '61cc0698-933d-4a1b-954d-d3c0045f6dbb', 'status': 'Completed', 'startTimeUtc': '2020-03-03T21:44:42.792356Z', 'endTimeUtc': '2020-03-03T21:48:00.994573Z', 'properties': {'azureml.runsource': 'azureml.PipelineRun', 'runSource': 'SDK', 'runType': 'SDK', 'azureml.parameters': '{}'}, 'inputDatasets': [], 'logFiles': {'logs/azureml/executionlogs.txt': 'https://mlopsohws3623368663.blob.core.windows.net/azureml/ExperimentRun/dcid.61cc0698-933d-4a1b-954d-d3c0045f6dbb/logs/azureml/executionlogs.txt?sv=2019-02-02&sr=b&sig=0C79LU1qNR%2B30%2F6pNyZPlY%2BNpQWE7vxxKbu8%2F78ujj0%3D&st=2020-03-03T21%3A38%3A04Z&se=2020-03-04T05%3A48%3A04Z&sp=r', 'logs/azureml/stderrlogs.txt': 'https://mlopsohws3623368663.blob.core.windows.net/azureml/ExperimentRun/dcid.61cc0698-933d-4a1b-954d-d3c0045f6dbb/logs/azureml/stderrlogs.txt?sv=2019-02-02&sr=b&sig=CuR0vlYXJ2UVmyNyfwAlHuANnWZoB0TaudFDRCpm%2FpU%3D&st=2020-03-03T21%3A38%3A04Z&se=2020-03-04T05%3A

'Finished'

In [29]:
from azureml.core import Model

for model in Model.list(ws):
    print(model.name, 'version:', model.version)
    for tag_name in model.tags:
        tag = model.tags[tag_name]
        print ('\t',tag_name, ':', tag)
    for prop_name in model.properties:
        prop = model.properties[prop_name]
        print ('\t',prop_name, ':', prop)
    print('\n')

diabetes_model version: 1
	 Training context : Pipeline


