# Exécution d’un script d’entraînement sur une compute target (VM basique) avec logging des métriques de performances dans Azure Machine Learning Services (AMLS)

## Creation du Workspace

In [1]:
import azureml.core
from azureml.core import Workspace, Environment
from azureml.core.compute import ComputeTarget, AmlCompute
import os

In [2]:
from dotenv import load_dotenv
from pathlib import Path  # python3 only
load_dotenv()
env_path = Path('.') / 'auth.env'
load_dotenv(dotenv_path=env_path)

True

In [3]:
SUBSCRIPTION_ID = os.getenv('SUBSCRIPTION_ID')
RESOURCE_GROUP = os.getenv('RESOURCE_GROUP')

In [4]:
# check core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)

Azure ML SDK Version:  1.0.85


In [5]:
# Load the workspace from the saved config file
print(Workspace.list(subscription_id=SUBSCRIPTION_ID, resource_group=RESOURCE_GROUP))

{}


In [6]:
ws = Workspace.create(name='veille_workspace',
               subscription_id=SUBSCRIPTION_ID,
               resource_group=RESOURCE_GROUP,
               create_resource_group=False,
               location='westeurope'
               )
# ws = Workspace.from_config()

Deploying StorageAccount with name veillewostorage43bd69280.
Deploying AppInsights with name veillewoinsights49a5b94f.
Deployed AppInsights with name veillewoinsights49a5b94f. Took 6.58 seconds.
Deploying KeyVault with name veillewokeyvault1c784130.
Deployed KeyVault with name veillewokeyvault1c784130. Took 21.35 seconds.
Deployed StorageAccount with name veillewostorage43bd69280. Took 24.49 seconds.
Deploying Workspace with name veille_workspace.
Deployed Workspace with name veille_workspace. Took 20.4 seconds.


## Choose an Environment (for information)

In [20]:
env = Environment.get(workspace=ws, name="AzureML-Minimal")

In [21]:
envs = Environment.list(workspace=ws)

for env in envs:
    if env.startswith("AzureML"):
        print("Name",env)
        print("packages", envs[env].python.conda_dependencies.serialize_to_string())

Name AzureML-Tutorial
packages channels:
- anaconda
- conda-forge
dependencies:
- python=3.6.2
- pip:
  - azureml-core==1.2.0
  - azureml-defaults==1.2.0
  - azureml-telemetry==1.2.0
  - azureml-train-restclients-hyperdrive==1.2.0
  - azureml-train-core==1.2.0
  - azureml-widgets==1.2.0
  - azureml-pipeline-core==1.2.0
  - azureml-pipeline-steps==1.2.0
  - azureml-opendatasets==1.2.0
  - azureml-automl-core==1.2.0
  - azureml-automl-runtime==1.2.0
  - azureml-train-automl-client==1.2.0
  - azureml-train-automl-runtime==1.2.0
  - azureml-train-automl==1.2.0
  - azureml-train==1.2.0
  - azureml-sdk==1.2.0
  - azureml-interpret==1.2.0
  - azureml-tensorboard==1.2.0
  - azureml-mlflow==1.2.0
  - mlflow
  - sklearn-pandas
- pandas
- numpy
- tqdm
- scikit-learn
- matplotlib
name: azureml_2044e6d12036fa0f0c4d02b0ee45feb5

Name AzureML-Minimal
packages channels:
- conda-forge
dependencies:
- python=3.6.2
- pip:
  - azureml-core==1.2.0
  - azureml-defaults==1.2.0
name: azureml_8efa3cc24935ba94d

## With Comput Target

In [22]:
# Specify a name for the compute (unique within the workspace)
compute_name = 'aml-cluster'

In [23]:
# Define compute configuration
compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS12_V2',
                                                       min_nodes=0, max_nodes=4,
                                                       vm_priority='dedicated')

In [24]:
# Create the compute
aml_cluster = ComputeTarget.create(ws, compute_name, compute_config)
aml_cluster.wait_for_completion(show_output=True)

Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


In [25]:
from azureml.core.environment import Environment
from azureml.core.conda_dependencies import CondaDependencies

myenv = Environment(name="myenv")

# From a pip requirements file
myenv = Environment.from_pip_requirements(name = "myenv", 
                                          file_path = "C:/Users/Utilisateur/Downloads/experiment_folder/requirements.txt")

myenv.docker.enabled = True

In [26]:
from azureml.core import Experiment

# Create and run an experiment
experiment = Experiment(workspace = ws, name = 'training_experiment')

In [27]:
from azureml.core import ScriptRunConfig
from azureml.core.runconfig import DEFAULT_CPU_IMAGE

src = ScriptRunConfig(source_directory='.', script='training.py')

# Set compute target to the one created in previous step
src.run_config.target = aml_cluster.name

# Set environment
src.run_config.environment = myenv
 
run = experiment.submit(config=src)
run

Experiment,Id,Type,Status,Details Page,Docs Page
training_experiment,training_experiment_1585297243_e49fc2fb,azureml.scriptrun,Starting,Link to Azure Machine Learning studio,Link to Documentation


## Use an Estimator

In [None]:
from azureml.core import Experiment
from azureml.train.estimator import Estimator

estimator = Estimator(source_directory='.',
                      entry_script='train.py',
                      environment_definition=myenv,
                      compute_target=aml_cluster)

In [None]:
run = experiment.submit(config=estimator)

In [None]:
run.complete()

## Local

In [28]:
# load diabetes dataset, a well-known small dataset that comes with scikit-learn
from sklearn.datasets import load_diabetes
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.externals import joblib



In [32]:
X, y = load_diabetes(return_X_y = True)
columns = ['age', 'gender', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
data = {
   "train":{"X": X_train, "y": y_train},        
   "test":{"X": X_test, "y": y_test}

}
alphas = [0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08,0.09,0.1]
for i in alphas:
    reg = Ridge(alpha = i)
    reg.fit(data['train']['X'], data['train']['y'])
    preds = reg.predict(data['test']['X'])
    print('Mean Squared Error is', mean_squared_error(preds, data['test']['y']))
    joblib.dump(value = reg, filename = '../outputs/model'+ str(i)+ '.pkl');

Mean Squared Error is 3435.8446889270103
Mean Squared Error is 3431.970965592588
Mean Squared Error is 3424.900315896017
Mean Squared Error is 3416.9720007734845
Mean Squared Error is 3408.9153122589287
Mean Squared Error is 3401.015921934314
Mean Squared Error is 3393.3986528075425
Mean Squared Error is 3386.1193944543174
Mean Squared Error is 3379.2010675133292
Mean Squared Error is 3372.649627810032


In [33]:
from azureml.core import Experiment

# Get an experiment object from Azure Machine Learning
experiment = Experiment(workspace=ws, name="train-within-notebook")

# Create a run object in the experiment
run =  experiment.start_logging(snapshot_directory=None)

In [34]:
# Log the algorithm parameter alpha to the run
alphas = [0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08,0.09,0.1]
for i in alphas:
    run.log('alpha', i)

    # Create, fit, and test the scikit-learn Ridge regression model
    regression_model = Ridge(alpha=i)
    regression_model.fit(data['train']['X'], data['train']['y'])
    preds = regression_model.predict(data['test']['X'])

    # Output the Mean Squared Error to the notebook and to the run
    print('Mean Squared Error is', mean_squared_error(data['test']['y'], preds))
    run.log('mse', mean_squared_error(data['test']['y'], preds))

    # Save the model to the outputs directory for capture
    model_file_name = 'outputs/model'+ str(i) +'.pkl'

    joblib.dump(value = regression_model, filename = model_file_name)

    # upload the model file explicitly into artifacts 
    run.upload_file(name = model_file_name, path_or_stream = model_file_name)

    # Complete the run
    run.complete()

Mean Squared Error is 3435.8446889270103
Mean Squared Error is 3431.970965592588
Mean Squared Error is 3424.900315896017
Mean Squared Error is 3416.9720007734845
Mean Squared Error is 3408.9153122589287
Mean Squared Error is 3401.015921934314
Mean Squared Error is 3393.3986528075425
Mean Squared Error is 3386.1193944543174
Mean Squared Error is 3379.2010675133292
Mean Squared Error is 3372.649627810032
