In [5]:
import azureml.core
from azureml.core import Workspace
import os, shutil

ws = Workspace.from_config()
print(f'Azure ML Version: {azureml.core.VERSION}. Workspace Name: {ws.name}')


Azure ML Version: 1.53.0. Workspace Name: mlw-dp100-labs


In [10]:
# Training Script
training_folder = 'diabetes-training'
os.makedirs(training_folder, exist_ok=True)
os.makedirs('data', exist_ok=True)
shutil.copy('data/diabetes.csv', os.path.join(training_folder, 'diabetes.csv'))

'diabetes-training/diabetes.csv'

In [None]:
%%writefile training_folder/diabetes_training.py

from azureml.core import Run 
import pandas as pd
import numpy as np
import joblib
import os
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, roc_curve

run = Run.get_context()
print('run context:\n',run)

print('Loading Data')
data = pd.read_csv('./diabetes.csv')
X, y = data[['Pregnancies','PlasmaGlucose','DiastolicBloodPressure','TricepsThickness','SerumInsulin','BMI','DiabetesPedigree','Age']].values, data['Diabetic'].values
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state=0)

# Regularization state
reg = 0.01

# Train Logistic Regression Model
print('Training a logistic regression model with a regularization rate of',reg)
run.log('Regularization Rate',np.float(reg))
model = LogisticRegression(C=1/reg, solver='liblinear').fit(X_train, y_train)

predicted = model.predict(X_test)
accuracy = np.average(predicted==y_test)
print('Accuracy:',accuracy)
run.log('Accuracy',accuracy)

# Calculating AUC
pred_probs = model.predict_proba(X_test)
auc = roc_auc_score(y_test, pred_probs[:,1])
print('AUC:',auc)
run.log('AUC',np.float(auc))

# Saving the trained model 
os.makedirs('output',exist_ok=True)
joblib.dump(value=model, filename='outputs/diabetes_model.pkl')

run.complete()


In [None]:
# To run the function ScriptRunConfig, we'll need a compute cluster. 
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cluster_name = 'diabetes-compute-cluster'

try:
    # Check for existing compute target.
    training_cluster = ComputeTarget(workspace=ws, name=cluster_name)
except ComputeTargetException:
    try: # creating the target if it doesn't already exist
        compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS11_v2',max_nodes=2)
        training_cluster = ComputeTarget.create(ws, cluster_name, compute_config) #note the use of the create method.
        training_cluster.wait_for_completion(show_output=True)
    except Exception as e:
        print(e)
    

In [None]:
# Running the training script as an experiment
from azureml.core import Experiment, ScriptRunConfig, Environment
from azureml.core.runconfig import DockerConfiguration
from azureml.widgets import RunDetails 

# Creating python environment:
env = Environment.from_conda_specification('environment','./diabetes-training/environment.yml')

script_config = ScriptRunConfig(training_folder,
                                script='diabetes_training/diabetes_training.py',
                                environment = env,
                                docker_run_time_config=DockerConfiguration(use_docker=True),
                                compute_target=cluster_name)

# submit the experiment
experiment_name = 'mslearn-train-diabetes'
experiment = Experiment(workspace=ws, name=experiment_name)
run = experiment.run(config=script_config)
RunDetails(run).show()

run.wait_for_completion()




In [None]:
# Get logged metrics
metrics = run.get_metrics()

for key, value in metrics.items():
    print(key, value)

print('Files:')
for file in run.get_file_names():
    print(file)

In [None]:
# Registering Model
from azureml.core import Model

run.register_model(model_path='outputs/diabetes_model.pkl', model_name='diabetes_model',
                   tags={'Training context':'Script'},
                   properties={'AUC':run.get_metrics()['AUC'], 'Accuracy':run.get_metrics()['Accuracy']})

# Listing registered models
for model in Model.list(ws):
    print(model.name, 'version:',model.version)
    for tag_name in model.tags:
        tag = model.tags[tag_name]
        print('\t', tag_name,':', tag)
    for prop_name in model.properties:
        prop = model.properties[prop_name]
        print('\t', prop_name,':', prop)
    print('\n')

In [None]:
%% writefile diabetes-training/flex_diabetes_training.py
# To create a parameterized training script where the user has the ability to choose the regularization
# rate, we can use argparse. 
import argparse
import pandas as pd
import numpy as np
from azureml.core import Run
from azureml.core.script_run_config import ScriptRunConfig
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.linear_model import LogisticRegression
import joblib
import os

# Get experiment run context
Run.get_context() # config json file already exists 

# Set regularization rate
parser = argparse.ArgumentParser()
parser.add_argument('--reg-rate', default=0.01, type=float, dest='reg')
args = parser.parse_args()
reg = args.reg

# Load the dataset 
data = pd.read_csv('data/diabetes.csv')
print('Loaded data.')

# seperate features and label
X, y = data[['Pregnancies','PlasmaGlucose','DiastolicBloodPressure','TricepsThickness','SerumInsulin','BMI','DiabetesPedigree','Age']].values, data['Diabetic'].values
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3, random_state=0)

# Train a logistic model
run.log('Regularization Rate', np.float(reg))
model = LogisticRegression(solver = 'liblinear', C=1/reg).fit(X_train, y_train)

# Calculate accuracy
predicted = model.predict(X_test)
accuracy = np.average(predicted==y_test)
run.log('Accuracy', np.float(accuracy))

# Calculate AUC 
# Get probabilities and compare them to get the area under curve 
predicted_probs = model.predict_proba(X_test)
auc = roc_auc_score(y_test, predicted_probs[:,1])
run.log('AUC',np.float(auc))

# Create output file if it doesn't exist 
os.makedirs('outputs',exist_ok=True)
# Save the model 
joblib.dump(value=model, filename='outputs/flex_diabetes_model.pkl')

run.complete()


In [None]:
# Run the script with arguments
from azureml.core.environment import Environment
from azureml.core.runconfig import DockerConfiguration
from azureml.widgets import RunDetails

# create python environment 
env = Environment('flex_model_env', 'environment.yml')

script_run_config = ScriptRunConfig(source_directory='diabetes-training',
                                    script='flex_diabetes_training',
                                    arguments=['--reg-rate'], # course suggests to write it as ['--reg-rate',0.01] but this hardcodes 0.01, therefore is counter-intuitive
                                    env = env,
                                    docker_runtime_config = DockerConfiguration(use_docker=True),
                                    compute_target = cluster_name)

# submit the experiment 
experiment_name = 'mslearn-flex-train-diabetes'
experiment = Experiment(workspace=ws, name=experiment_name)
run = experiment.submit(config=script_run_config)

RunDetails(run).show()
run.wait_for_completion()


In [None]:
metrics = run.get_metrics()
for key, value in metrics():
    print(key, value)

for file in run.get_filenames():
    print(file)

In [None]:
# Registering the new model 
from azureml.core import Model 

run.register_model(model_path = 'outputs/flex_diabetes_model.pkl', model_name='diabetes_model',
                   tags = {'Training context':'Parameterized script'},
                   properties={'AUC': run.get_metrics()['AUC'],
                               'Accuracy':run.get_metrics()['Accuracy'],
                               'Regularization Rate':run.get_metrics()['Regularization Rate']})
# List the registered models 

for model in Model.list(ws):
    print(model.name,'version',model.version)
    for tag_name in model.tags:
        tag = model.tags[tag_name]
        print('\t', tag_name,':', tag)
    for prop_name in model.properties:
        prop = model.properties[prop_name]
        print('\t', prop_name,':',prop)
    print('\n')
            